1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the X86 AVX512 instruction set, defining the
11// instructions, and properties of the instructions which are needed for code
12// generation, machine code emission, and analysis.
13//
14//===----------------------------------------------------------------------===//
15
16// Group template arguments that can be derived from the vector type (EltNum x
17// EltVT).  These are things like the register class for the writemask, etc.
18// The idea is to pass one of these as the template argument rather than the
19// individual arguments.
20// The template is also used for scalar types, in this case numelts is 1.
21class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
22                      string suffix = ""> {
23  RegisterClass RC = rc;
24  ValueType EltVT = eltvt;
25  int NumElts = numelts;
26
27  // Corresponding mask register class.
28  RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29
30  // Corresponding write-mask register class.
31  RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
32
33  // The mask VT.
34  ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
35
36  // Suffix used in the instruction mnemonic.
37  string Suffix = suffix;
38
39  // VTName is a string name for vector VT. For vector types it will be
40  // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
41  // It is a little bit complex for scalar types, where NumElts = 1.
42  // In this case we build v4f32 or v2f64
43  string VTName = "v" # !if (!eq (NumElts, 1),
44                        !if (!eq (EltVT.Size, 32), 4,
45                        !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
46
47  // The vector VT.
48  ValueType VT = !cast<ValueType>(VTName);
49
50  string EltTypeName = !cast<string>(EltVT);
51  // Size of the element type in bits, e.g. 32 for v16i32.
52  string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
53  int EltSize = EltVT.Size;
54
55  // "i" for integer types and "f" for floating-point types
56  string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
57
58  // Size of RC in bits, e.g. 512 for VR512.
59  int Size = VT.Size;
60
61  // The corresponding memory operand, e.g. i512mem for VR512.
62  X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
63  X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
64  // FP scalar memory operand for intrinsics - ssmem/sdmem.
65  Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
66                           !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
67
68  // Load patterns
69  // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
70  //       due to load promotion during legalization
71  PatFrag LdFrag = !cast<PatFrag>("load" #
72                                  !if (!eq (TypeVariantName, "i"),
73                                       !if (!eq (Size, 128), "v2i64",
74                                       !if (!eq (Size, 256), "v4i64",
75                                       !if (!eq (Size, 512), "v8i64",
76                                            VTName))), VTName));
77
78  PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
79                                         !if (!eq (TypeVariantName, "i"),
80                                               !if (!eq (Size, 128), "v2i64",
81                                               !if (!eq (Size, 256), "v4i64",
82                                               !if (!eq (Size, 512), "v8i64",
83                                                   VTName))), VTName));
84
85  PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
86
87  ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
88                                          !cast<ComplexPattern>("sse_load_f32"),
89                                    !if (!eq (EltTypeName, "f64"),
90                                          !cast<ComplexPattern>("sse_load_f64"),
91                                    ?));
92
93  // The string to specify embedded broadcast in assembly.
94  string BroadcastStr = "{1to" # NumElts # "}";
95
96  // 8-bit compressed displacement tuple/subvector format.  This is only
97  // defined for NumElts <= 8.
98  CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
99                               !cast<CD8VForm>("CD8VT" # NumElts), ?);
100
101  SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
102                          !if (!eq (Size, 256), sub_ymm, ?));
103
104  Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
105                     !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
106                     SSEPackedInt));
107
108  RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
109
110  // A vector tye of the same width with element type i64. This is used to
111  // create patterns for logic ops.
112  ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
113
114  // A vector type of the same width with element type i32.  This is used to
115  // create the canonical constant zero node ImmAllZerosV.
116  ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
117  dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
118
119  string ZSuffix = !if (!eq (Size, 128), "Z128",
120                   !if (!eq (Size, 256), "Z256", "Z"));
121}
122
123def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
124def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
125def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
126def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
127def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
128def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
129
130// "x" in v32i8x_info means RC = VR256X
131def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
132def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
133def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
134def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
135def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
136def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
137
138def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
139def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
140def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
141def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
142def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
143def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
144
145// We map scalar types to the smallest (128-bit) vector type
146// with the appropriate element type. This allows to use the same masking logic.
147def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
148def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
149def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
150def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
151
152class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
153                           X86VectorVTInfo i128> {
154  X86VectorVTInfo info512 = i512;
155  X86VectorVTInfo info256 = i256;
156  X86VectorVTInfo info128 = i128;
157}
158
159def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
160                                             v16i8x_info>;
161def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
162                                             v8i16x_info>;
163def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
164                                             v4i32x_info>;
165def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
166                                             v2i64x_info>;
167def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
168                                             v4f32x_info>;
169def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
170                                             v2f64x_info>;
171
172class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
173                       ValueType _vt> {
174  RegisterClass KRC = _krc;
175  RegisterClass KRCWM = _krcwm;
176  ValueType KVT = _vt;
177}
178
179def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
180def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
181def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
182def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
183def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
184def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
185def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
186
187// This multiclass generates the masking variants from the non-masking
188// variant.  It only provides the assembly pieces for the masking variants.
189// It assumes custom ISel patterns for masking which can be provided as
190// template arguments.
191multiclass AVX512_maskable_custom<bits<8> O, Format F,
192                                  dag Outs,
193                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
194                                  string OpcodeStr,
195                                  string AttSrcAsm, string IntelSrcAsm,
196                                  list<dag> Pattern,
197                                  list<dag> MaskingPattern,
198                                  list<dag> ZeroMaskingPattern,
199                                  string MaskingConstraint = "",
200                                  bit IsCommutable = 0,
201                                  bit IsKCommutable = 0,
202                                  bit IsKZCommutable = IsCommutable> {
203  let isCommutable = IsCommutable in
204    def NAME: AVX512<O, F, Outs, Ins,
205                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
206                                     "$dst, "#IntelSrcAsm#"}",
207                       Pattern>;
208
209  // Prefer over VMOV*rrk Pat<>
210  let isCommutable = IsKCommutable in
211    def NAME#k: AVX512<O, F, Outs, MaskingIns,
212                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
213                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
214                       MaskingPattern>,
215              EVEX_K {
216      // In case of the 3src subclass this is overridden with a let.
217      string Constraints = MaskingConstraint;
218    }
219
220  // Zero mask does not add any restrictions to commute operands transformation.
221  // So, it is Ok to use IsCommutable instead of IsKCommutable.
222  let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
223    def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
224                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
225                                     "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
226                       ZeroMaskingPattern>,
227              EVEX_KZ;
228}
229
230
231// Common base class of AVX512_maskable and AVX512_maskable_3src.
232multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
233                                  dag Outs,
234                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
235                                  string OpcodeStr,
236                                  string AttSrcAsm, string IntelSrcAsm,
237                                  dag RHS, dag MaskingRHS,
238                                  SDNode Select = vselect,
239                                  string MaskingConstraint = "",
240                                  bit IsCommutable = 0,
241                                  bit IsKCommutable = 0,
242                                  bit IsKZCommutable = IsCommutable> :
243  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
244                         AttSrcAsm, IntelSrcAsm,
245                         [(set _.RC:$dst, RHS)],
246                         [(set _.RC:$dst, MaskingRHS)],
247                         [(set _.RC:$dst,
248                               (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
249                         MaskingConstraint, IsCommutable,
250                         IsKCommutable, IsKZCommutable>;
251
252// This multiclass generates the unconditional/non-masking, the masking and
253// the zero-masking variant of the vector instruction.  In the masking case, the
254// perserved vector elements come from a new dummy input operand tied to $dst.
255// This version uses a separate dag for non-masking and masking.
256multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
257                           dag Outs, dag Ins, string OpcodeStr,
258                           string AttSrcAsm, string IntelSrcAsm,
259                           dag RHS, dag MaskRHS,
260                           bit IsCommutable = 0, bit IsKCommutable = 0,
261                           SDNode Select = vselect> :
262   AVX512_maskable_custom<O, F, Outs, Ins,
263                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
264                          !con((ins _.KRCWM:$mask), Ins),
265                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
266                          [(set _.RC:$dst, RHS)],
267                          [(set _.RC:$dst,
268                              (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
269                          [(set _.RC:$dst,
270                              (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
271                          "$src0 = $dst", IsCommutable, IsKCommutable>;
272
273// This multiclass generates the unconditional/non-masking, the masking and
274// the zero-masking variant of the vector instruction.  In the masking case, the
275// perserved vector elements come from a new dummy input operand tied to $dst.
276multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
277                           dag Outs, dag Ins, string OpcodeStr,
278                           string AttSrcAsm, string IntelSrcAsm,
279                           dag RHS,
280                           bit IsCommutable = 0, bit IsKCommutable = 0,
281                           bit IsKZCommutable = IsCommutable,
282                           SDNode Select = vselect> :
283   AVX512_maskable_common<O, F, _, Outs, Ins,
284                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
285                          !con((ins _.KRCWM:$mask), Ins),
286                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
287                          (Select _.KRCWM:$mask, RHS, _.RC:$src0),
288                          Select, "$src0 = $dst", IsCommutable, IsKCommutable,
289                          IsKZCommutable>;
290
291// This multiclass generates the unconditional/non-masking, the masking and
292// the zero-masking variant of the scalar instruction.
293multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
294                           dag Outs, dag Ins, string OpcodeStr,
295                           string AttSrcAsm, string IntelSrcAsm,
296                           dag RHS,
297                           bit IsCommutable = 0> :
298   AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
299                   RHS, IsCommutable, 0, IsCommutable, X86selects>;
300
301// Similar to AVX512_maskable but in this case one of the source operands
302// ($src1) is already tied to $dst so we just use that for the preserved
303// vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
304// $src1.
305multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
306                                dag Outs, dag NonTiedIns, string OpcodeStr,
307                                string AttSrcAsm, string IntelSrcAsm,
308                                dag RHS,
309                                bit IsCommutable = 0,
310                                bit IsKCommutable = 0,
311                                SDNode Select = vselect,
312                                bit MaskOnly = 0> :
313   AVX512_maskable_common<O, F, _, Outs,
314                          !con((ins _.RC:$src1), NonTiedIns),
315                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
316                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
317                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
318                          !if(MaskOnly, (null_frag), RHS),
319                          (Select _.KRCWM:$mask, RHS, _.RC:$src1),
320                          Select, "", IsCommutable, IsKCommutable>;
321
322// Similar to AVX512_maskable_3src but in this case the input VT for the tied
323// operand differs from the output VT. This requires a bitconvert on
324// the preserved vector going into the vselect.
325// NOTE: The unmasked pattern is disabled.
326multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
327                                     X86VectorVTInfo InVT,
328                                     dag Outs, dag NonTiedIns, string OpcodeStr,
329                                     string AttSrcAsm, string IntelSrcAsm,
330                                     dag RHS, bit IsCommutable = 0> :
331   AVX512_maskable_common<O, F, OutVT, Outs,
332                          !con((ins InVT.RC:$src1), NonTiedIns),
333                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
334                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
335                          OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
336                          (vselect InVT.KRCWM:$mask, RHS,
337                           (bitconvert InVT.RC:$src1)),
338                           vselect, "", IsCommutable>;
339
340multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
341                                     dag Outs, dag NonTiedIns, string OpcodeStr,
342                                     string AttSrcAsm, string IntelSrcAsm,
343                                     dag RHS,
344                                     bit IsCommutable = 0,
345                                     bit IsKCommutable = 0,
346                                     bit MaskOnly = 0> :
347   AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
348                        IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
349                        X86selects, MaskOnly>;
350
351multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
352                                  dag Outs, dag Ins,
353                                  string OpcodeStr,
354                                  string AttSrcAsm, string IntelSrcAsm,
355                                  list<dag> Pattern> :
356   AVX512_maskable_custom<O, F, Outs, Ins,
357                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
358                          !con((ins _.KRCWM:$mask), Ins),
359                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
360                          "$src0 = $dst">;
361
362multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
363                                       dag Outs, dag NonTiedIns,
364                                       string OpcodeStr,
365                                       string AttSrcAsm, string IntelSrcAsm,
366                                       list<dag> Pattern> :
367   AVX512_maskable_custom<O, F, Outs,
368                          !con((ins _.RC:$src1), NonTiedIns),
369                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
370                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
371                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
372                          "">;
373
374// Instruction with mask that puts result in mask register,
375// like "compare" and "vptest"
376multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
377                                  dag Outs,
378                                  dag Ins, dag MaskingIns,
379                                  string OpcodeStr,
380                                  string AttSrcAsm, string IntelSrcAsm,
381                                  list<dag> Pattern,
382                                  list<dag> MaskingPattern,
383                                  bit IsCommutable = 0> {
384    let isCommutable = IsCommutable in
385    def NAME: AVX512<O, F, Outs, Ins,
386                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
387                                     "$dst, "#IntelSrcAsm#"}",
388                       Pattern>;
389
390    def NAME#k: AVX512<O, F, Outs, MaskingIns,
391                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
392                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
393                       MaskingPattern>, EVEX_K;
394}
395
396multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
397                                  dag Outs,
398                                  dag Ins, dag MaskingIns,
399                                  string OpcodeStr,
400                                  string AttSrcAsm, string IntelSrcAsm,
401                                  dag RHS, dag MaskingRHS,
402                                  bit IsCommutable = 0> :
403  AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
404                         AttSrcAsm, IntelSrcAsm,
405                         [(set _.KRC:$dst, RHS)],
406                         [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
407
408multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
409                           dag Outs, dag Ins, string OpcodeStr,
410                           string AttSrcAsm, string IntelSrcAsm,
411                           dag RHS, bit IsCommutable = 0> :
412   AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
413                          !con((ins _.KRCWM:$mask), Ins),
414                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
415                          (and _.KRCWM:$mask, RHS), IsCommutable>;
416
417multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
418                           dag Outs, dag Ins, string OpcodeStr,
419                           string AttSrcAsm, string IntelSrcAsm> :
420   AVX512_maskable_custom_cmp<O, F, Outs,
421                             Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
422                             AttSrcAsm, IntelSrcAsm, [], []>;
423
424// This multiclass generates the unconditional/non-masking, the masking and
425// the zero-masking variant of the vector instruction.  In the masking case, the
426// perserved vector elements come from a new dummy input operand tied to $dst.
427multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
428                           dag Outs, dag Ins, string OpcodeStr,
429                           string AttSrcAsm, string IntelSrcAsm,
430                           dag RHS, dag MaskedRHS,
431                           bit IsCommutable = 0, SDNode Select = vselect> :
432   AVX512_maskable_custom<O, F, Outs, Ins,
433                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
434                          !con((ins _.KRCWM:$mask), Ins),
435                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
436                          [(set _.RC:$dst, RHS)],
437                          [(set _.RC:$dst,
438                                (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
439                          [(set _.RC:$dst,
440                                (Select _.KRCWM:$mask, MaskedRHS,
441                                        _.ImmAllZerosV))],
442                          "$src0 = $dst", IsCommutable>;
443
444
445// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
446// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
447// swizzled by ExecutionDomainFix to pxor.
448// We set canFoldAsLoad because this can be converted to a constant-pool
449// load of an all-zeros value if folding it would be beneficial.
450let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
451    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
452def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
453               [(set VR512:$dst, (v16i32 immAllZerosV))]>;
454def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
455               [(set VR512:$dst, (v16i32 immAllOnesV))]>;
456}
457
458// Alias instructions that allow VPTERNLOG to be used with a mask to create
459// a mix of all ones and all zeros elements. This is done this way to force
460// the same register to be used as input for all three sources.
461let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
462def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
463                                (ins VK16WM:$mask), "",
464                           [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
465                                                      (v16i32 immAllOnesV),
466                                                      (v16i32 immAllZerosV)))]>;
467def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
468                                (ins VK8WM:$mask), "",
469                [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
470                                           (bc_v8i64 (v16i32 immAllOnesV)),
471                                           (bc_v8i64 (v16i32 immAllZerosV))))]>;
472}
473
474let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
475    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
476def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
477               [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
478def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
479               [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
480}
481
482// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
483// This is expanded by ExpandPostRAPseudos.
484let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
485    isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
486  def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
487                          [(set FR32X:$dst, fp32imm0)]>;
488  def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
489                          [(set FR64X:$dst, fpimm0)]>;
490}
491
492//===----------------------------------------------------------------------===//
493// AVX-512 - VECTOR INSERT
494//
495
496// Supports two different pattern operators for mask and unmasked ops. Allows
497// null_frag to be passed for one.
498multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
499                                  X86VectorVTInfo To,
500                                  SDPatternOperator vinsert_insert,
501                                  SDPatternOperator vinsert_for_mask,
502                                  X86FoldableSchedWrite sched> {
503  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
504    defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
505                   (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
506                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
507                   "$src3, $src2, $src1", "$src1, $src2, $src3",
508                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
509                                         (From.VT From.RC:$src2),
510                                         (iPTR imm)),
511                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
512                                           (From.VT From.RC:$src2),
513                                           (iPTR imm))>,
514                   AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
515    let mayLoad = 1 in
516    defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
517                   (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
518                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
519                   "$src3, $src2, $src1", "$src1, $src2, $src3",
520                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
521                               (From.VT (bitconvert (From.LdFrag addr:$src2))),
522                               (iPTR imm)),
523                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
524                               (From.VT (bitconvert (From.LdFrag addr:$src2))),
525                               (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
526                   EVEX_CD8<From.EltSize, From.CD8TupleForm>,
527                   Sched<[sched.Folded, ReadAfterLd]>;
528  }
529}
530
531// Passes the same pattern operator for masked and unmasked ops.
532multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
533                            X86VectorVTInfo To,
534                            SDPatternOperator vinsert_insert,
535                            X86FoldableSchedWrite sched> :
536  vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
537
538multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
539                       X86VectorVTInfo To, PatFrag vinsert_insert,
540                       SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
541  let Predicates = p in {
542    def : Pat<(vinsert_insert:$ins
543                     (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
544              (To.VT (!cast<Instruction>(InstrStr#"rr")
545                     To.RC:$src1, From.RC:$src2,
546                     (INSERT_get_vinsert_imm To.RC:$ins)))>;
547
548    def : Pat<(vinsert_insert:$ins
549                  (To.VT To.RC:$src1),
550                  (From.VT (bitconvert (From.LdFrag addr:$src2))),
551                  (iPTR imm)),
552              (To.VT (!cast<Instruction>(InstrStr#"rm")
553                  To.RC:$src1, addr:$src2,
554                  (INSERT_get_vinsert_imm To.RC:$ins)))>;
555  }
556}
557
558multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
559                            ValueType EltVT64, int Opcode256,
560                            X86FoldableSchedWrite sched> {
561
562  let Predicates = [HasVLX] in
563    defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
564                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
565                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
566                                 vinsert128_insert, sched>, EVEX_V256;
567
568  defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
569                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
570                                 X86VectorVTInfo<16, EltVT32, VR512>,
571                                 vinsert128_insert, sched>, EVEX_V512;
572
573  defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
574                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
575                                 X86VectorVTInfo< 8, EltVT64, VR512>,
576                                 vinsert256_insert, sched>, VEX_W, EVEX_V512;
577
578  // Even with DQI we'd like to only use these instructions for masking.
579  let Predicates = [HasVLX, HasDQI] in
580    defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
581                                   X86VectorVTInfo< 2, EltVT64, VR128X>,
582                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
583                                   null_frag, vinsert128_insert, sched>,
584                                   VEX_W1X, EVEX_V256;
585
586  // Even with DQI we'd like to only use these instructions for masking.
587  let Predicates = [HasDQI] in {
588    defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
589                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
590                                 X86VectorVTInfo< 8, EltVT64, VR512>,
591                                 null_frag, vinsert128_insert, sched>,
592                                 VEX_W, EVEX_V512;
593
594    defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
595                                   X86VectorVTInfo< 8, EltVT32, VR256X>,
596                                   X86VectorVTInfo<16, EltVT32, VR512>,
597                                   null_frag, vinsert256_insert, sched>,
598                                   EVEX_V512;
599  }
600}
601
602// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
603defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
604defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
605
606// Codegen pattern with the alternative types,
607// Even with AVX512DQ we'll still use these for unmasked operations.
608defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
609              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
610defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
611              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
612
613defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
614              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
615defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
616              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
617
618defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
619              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
620defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
621              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
622
623// Codegen pattern with the alternative types insert VEC128 into VEC256
624defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
625              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
626defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
627              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
628// Codegen pattern with the alternative types insert VEC128 into VEC512
629defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
630              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
631defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
632               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
633// Codegen pattern with the alternative types insert VEC256 into VEC512
634defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
635              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
636defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
637              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
638
639
640multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
641                                 X86VectorVTInfo To, X86VectorVTInfo Cast,
642                                 PatFrag vinsert_insert,
643                                 SDNodeXForm INSERT_get_vinsert_imm,
644                                 list<Predicate> p> {
645let Predicates = p in {
646  def : Pat<(Cast.VT
647             (vselect Cast.KRCWM:$mask,
648                      (bitconvert
649                       (vinsert_insert:$ins (To.VT To.RC:$src1),
650                                            (From.VT From.RC:$src2),
651                                            (iPTR imm))),
652                      Cast.RC:$src0)),
653            (!cast<Instruction>(InstrStr#"rrk")
654             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
655             (INSERT_get_vinsert_imm To.RC:$ins))>;
656  def : Pat<(Cast.VT
657             (vselect Cast.KRCWM:$mask,
658                      (bitconvert
659                       (vinsert_insert:$ins (To.VT To.RC:$src1),
660                                            (From.VT
661                                             (bitconvert
662                                              (From.LdFrag addr:$src2))),
663                                            (iPTR imm))),
664                      Cast.RC:$src0)),
665            (!cast<Instruction>(InstrStr#"rmk")
666             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
667             (INSERT_get_vinsert_imm To.RC:$ins))>;
668
669  def : Pat<(Cast.VT
670             (vselect Cast.KRCWM:$mask,
671                      (bitconvert
672                       (vinsert_insert:$ins (To.VT To.RC:$src1),
673                                            (From.VT From.RC:$src2),
674                                            (iPTR imm))),
675                      Cast.ImmAllZerosV)),
676            (!cast<Instruction>(InstrStr#"rrkz")
677             Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
678             (INSERT_get_vinsert_imm To.RC:$ins))>;
679  def : Pat<(Cast.VT
680             (vselect Cast.KRCWM:$mask,
681                      (bitconvert
682                       (vinsert_insert:$ins (To.VT To.RC:$src1),
683                                            (From.VT
684                                             (bitconvert
685                                              (From.LdFrag addr:$src2))),
686                                            (iPTR imm))),
687                      Cast.ImmAllZerosV)),
688            (!cast<Instruction>(InstrStr#"rmkz")
689             Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
690             (INSERT_get_vinsert_imm To.RC:$ins))>;
691}
692}
693
694defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
695                             v8f32x_info, vinsert128_insert,
696                             INSERT_get_vinsert128_imm, [HasVLX]>;
697defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
698                             v4f64x_info, vinsert128_insert,
699                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
700
701defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
702                             v8i32x_info, vinsert128_insert,
703                             INSERT_get_vinsert128_imm, [HasVLX]>;
704defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
705                             v8i32x_info, vinsert128_insert,
706                             INSERT_get_vinsert128_imm, [HasVLX]>;
707defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
708                             v8i32x_info, vinsert128_insert,
709                             INSERT_get_vinsert128_imm, [HasVLX]>;
710defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
711                             v4i64x_info, vinsert128_insert,
712                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
713defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
714                             v4i64x_info, vinsert128_insert,
715                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
716defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
717                             v4i64x_info, vinsert128_insert,
718                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
719
720defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
721                             v16f32_info, vinsert128_insert,
722                             INSERT_get_vinsert128_imm, [HasAVX512]>;
723defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
724                             v8f64_info, vinsert128_insert,
725                             INSERT_get_vinsert128_imm, [HasDQI]>;
726
727defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
728                             v16i32_info, vinsert128_insert,
729                             INSERT_get_vinsert128_imm, [HasAVX512]>;
730defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
731                             v16i32_info, vinsert128_insert,
732                             INSERT_get_vinsert128_imm, [HasAVX512]>;
733defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
734                             v16i32_info, vinsert128_insert,
735                             INSERT_get_vinsert128_imm, [HasAVX512]>;
736defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
737                             v8i64_info, vinsert128_insert,
738                             INSERT_get_vinsert128_imm, [HasDQI]>;
739defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
740                             v8i64_info, vinsert128_insert,
741                             INSERT_get_vinsert128_imm, [HasDQI]>;
742defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
743                             v8i64_info, vinsert128_insert,
744                             INSERT_get_vinsert128_imm, [HasDQI]>;
745
746defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
747                             v16f32_info, vinsert256_insert,
748                             INSERT_get_vinsert256_imm, [HasDQI]>;
749defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
750                             v8f64_info, vinsert256_insert,
751                             INSERT_get_vinsert256_imm, [HasAVX512]>;
752
753defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
754                             v16i32_info, vinsert256_insert,
755                             INSERT_get_vinsert256_imm, [HasDQI]>;
756defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
757                             v16i32_info, vinsert256_insert,
758                             INSERT_get_vinsert256_imm, [HasDQI]>;
759defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
760                             v16i32_info, vinsert256_insert,
761                             INSERT_get_vinsert256_imm, [HasDQI]>;
762defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
763                             v8i64_info, vinsert256_insert,
764                             INSERT_get_vinsert256_imm, [HasAVX512]>;
765defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
766                             v8i64_info, vinsert256_insert,
767                             INSERT_get_vinsert256_imm, [HasAVX512]>;
768defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
769                             v8i64_info, vinsert256_insert,
770                             INSERT_get_vinsert256_imm, [HasAVX512]>;
771
772// vinsertps - insert f32 to XMM
773let ExeDomain = SSEPackedSingle in {
774def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
775      (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
776      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
777      [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
778      EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
779def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
780      (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
781      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
782      [(set VR128X:$dst, (X86insertps VR128X:$src1,
783                          (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
784                          imm:$src3))]>,
785      EVEX_4V, EVEX_CD8<32, CD8VT1>,
786      Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
787}
788
789//===----------------------------------------------------------------------===//
790// AVX-512 VECTOR EXTRACT
791//---
792
793// Supports two different pattern operators for mask and unmasked ops. Allows
794// null_frag to be passed for one.
795multiclass vextract_for_size_split<int Opcode,
796                                   X86VectorVTInfo From, X86VectorVTInfo To,
797                                   SDPatternOperator vextract_extract,
798                                   SDPatternOperator vextract_for_mask,
799                                   SchedWrite SchedRR, SchedWrite SchedMR> {
800
801  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
802    defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
803                (ins From.RC:$src1, u8imm:$idx),
804                "vextract" # To.EltTypeName # "x" # To.NumElts,
805                "$idx, $src1", "$src1, $idx",
806                (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
807                (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
808                AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
809
810    def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
811                    (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
812                    "vextract" # To.EltTypeName # "x" # To.NumElts #
813                        "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
814                    [(store (To.VT (vextract_extract:$idx
815                                    (From.VT From.RC:$src1), (iPTR imm))),
816                             addr:$dst)]>, EVEX,
817                    Sched<[SchedMR]>;
818
819    let mayStore = 1, hasSideEffects = 0 in
820    def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
821                    (ins To.MemOp:$dst, To.KRCWM:$mask,
822                                        From.RC:$src1, u8imm:$idx),
823                     "vextract" # To.EltTypeName # "x" # To.NumElts #
824                          "\t{$idx, $src1, $dst {${mask}}|"
825                          "$dst {${mask}}, $src1, $idx}", []>,
826                    EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
827  }
828}
829
830// Passes the same pattern operator for masked and unmasked ops.
831multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
832                             X86VectorVTInfo To,
833                             SDPatternOperator vextract_extract,
834                             SchedWrite SchedRR, SchedWrite SchedMR> :
835  vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
836
837// Codegen pattern for the alternative types
838multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
839                X86VectorVTInfo To, PatFrag vextract_extract,
840                SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
841  let Predicates = p in {
842     def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
843               (To.VT (!cast<Instruction>(InstrStr#"rr")
844                          From.RC:$src1,
845                          (EXTRACT_get_vextract_imm To.RC:$ext)))>;
846     def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
847                              (iPTR imm))), addr:$dst),
848               (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
849                (EXTRACT_get_vextract_imm To.RC:$ext))>;
850  }
851}
852
853multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
854                             ValueType EltVT64, int Opcode256,
855                             SchedWrite SchedRR, SchedWrite SchedMR> {
856  let Predicates = [HasAVX512] in {
857    defm NAME # "32x4Z" : vextract_for_size<Opcode128,
858                                   X86VectorVTInfo<16, EltVT32, VR512>,
859                                   X86VectorVTInfo< 4, EltVT32, VR128X>,
860                                   vextract128_extract, SchedRR, SchedMR>,
861                                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
862    defm NAME # "64x4Z" : vextract_for_size<Opcode256,
863                                   X86VectorVTInfo< 8, EltVT64, VR512>,
864                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
865                                   vextract256_extract, SchedRR, SchedMR>,
866                                       VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
867  }
868  let Predicates = [HasVLX] in
869    defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
870                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
871                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
872                                 vextract128_extract, SchedRR, SchedMR>,
873                                     EVEX_V256, EVEX_CD8<32, CD8VT4>;
874
875  // Even with DQI we'd like to only use these instructions for masking.
876  let Predicates = [HasVLX, HasDQI] in
877    defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
878                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
879                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
880                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
881                                     VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
882
883  // Even with DQI we'd like to only use these instructions for masking.
884  let Predicates = [HasDQI] in {
885    defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
886                                 X86VectorVTInfo< 8, EltVT64, VR512>,
887                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
888                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
889                                     VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
890    defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
891                                 X86VectorVTInfo<16, EltVT32, VR512>,
892                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
893                                 null_frag, vextract256_extract, SchedRR, SchedMR>,
894                                     EVEX_V512, EVEX_CD8<32, CD8VT8>;
895  }
896}
897
898// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
899defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
900defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
901
902// extract_subvector codegen patterns with the alternative types.
903// Even with AVX512DQ we'll still use these for unmasked operations.
904defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
905          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
906defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
907          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
908
909defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
910          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
911defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
912          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
913
914defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
915          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
916defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
917          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
918
919// Codegen pattern with the alternative types extract VEC128 from VEC256
920defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
921          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
922defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
923          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
924
925// Codegen pattern with the alternative types extract VEC128 from VEC512
926defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
927                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
928defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
929                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
930// Codegen pattern with the alternative types extract VEC256 from VEC512
931defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
932                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
933defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
934                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
935
936
937// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
938// smaller extract to enable EVEX->VEX.
939let Predicates = [NoVLX] in {
940def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
941          (v2i64 (VEXTRACTI128rr
942                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
943                  (iPTR 1)))>;
944def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
945          (v2f64 (VEXTRACTF128rr
946                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
947                  (iPTR 1)))>;
948def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
949          (v4i32 (VEXTRACTI128rr
950                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
951                  (iPTR 1)))>;
952def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
953          (v4f32 (VEXTRACTF128rr
954                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
955                  (iPTR 1)))>;
956def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
957          (v8i16 (VEXTRACTI128rr
958                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
959                  (iPTR 1)))>;
960def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
961          (v16i8 (VEXTRACTI128rr
962                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
963                  (iPTR 1)))>;
964}
965
966// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
967// smaller extract to enable EVEX->VEX.
968let Predicates = [HasVLX] in {
969def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
970          (v2i64 (VEXTRACTI32x4Z256rr
971                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
972                  (iPTR 1)))>;
973def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
974          (v2f64 (VEXTRACTF32x4Z256rr
975                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
976                  (iPTR 1)))>;
977def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
978          (v4i32 (VEXTRACTI32x4Z256rr
979                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
980                  (iPTR 1)))>;
981def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
982          (v4f32 (VEXTRACTF32x4Z256rr
983                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
984                  (iPTR 1)))>;
985def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
986          (v8i16 (VEXTRACTI32x4Z256rr
987                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
988                  (iPTR 1)))>;
989def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
990          (v16i8 (VEXTRACTI32x4Z256rr
991                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
992                  (iPTR 1)))>;
993}
994
995
996// Additional patterns for handling a bitcast between the vselect and the
997// extract_subvector.
998multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
999                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
1000                                  PatFrag vextract_extract,
1001                                  SDNodeXForm EXTRACT_get_vextract_imm,
1002                                  list<Predicate> p> {
1003let Predicates = p in {
1004  def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1005                              (bitconvert
1006                               (To.VT (vextract_extract:$ext
1007                                       (From.VT From.RC:$src), (iPTR imm)))),
1008                              To.RC:$src0)),
1009            (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1010                      Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1011                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1012
1013  def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1014                              (bitconvert
1015                               (To.VT (vextract_extract:$ext
1016                                       (From.VT From.RC:$src), (iPTR imm)))),
1017                              Cast.ImmAllZerosV)),
1018            (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1019                      Cast.KRCWM:$mask, From.RC:$src,
1020                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1021}
1022}
1023
1024defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1025                              v4f32x_info, vextract128_extract,
1026                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1027defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1028                              v2f64x_info, vextract128_extract,
1029                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1030
1031defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1032                              v4i32x_info, vextract128_extract,
1033                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1034defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1035                              v4i32x_info, vextract128_extract,
1036                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1037defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1038                              v4i32x_info, vextract128_extract,
1039                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1040defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1041                              v2i64x_info, vextract128_extract,
1042                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1043defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1044                              v2i64x_info, vextract128_extract,
1045                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1046defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1047                              v2i64x_info, vextract128_extract,
1048                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1049
1050defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1051                              v4f32x_info, vextract128_extract,
1052                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1053defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1054                              v2f64x_info, vextract128_extract,
1055                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1056
1057defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1058                              v4i32x_info, vextract128_extract,
1059                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1060defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1061                              v4i32x_info, vextract128_extract,
1062                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1063defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1064                              v4i32x_info, vextract128_extract,
1065                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1066defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1067                              v2i64x_info, vextract128_extract,
1068                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1069defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1070                              v2i64x_info, vextract128_extract,
1071                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1072defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1073                              v2i64x_info, vextract128_extract,
1074                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1075
1076defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1077                              v8f32x_info, vextract256_extract,
1078                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1079defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1080                              v4f64x_info, vextract256_extract,
1081                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1082
1083defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1084                              v8i32x_info, vextract256_extract,
1085                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1086defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1087                              v8i32x_info, vextract256_extract,
1088                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1089defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1090                              v8i32x_info, vextract256_extract,
1091                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1092defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1093                              v4i64x_info, vextract256_extract,
1094                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1095defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1096                              v4i64x_info, vextract256_extract,
1097                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1098defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1099                              v4i64x_info, vextract256_extract,
1100                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1101
1102// vextractps - extract 32 bits from XMM
1103def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1104      (ins VR128X:$src1, u8imm:$src2),
1105      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1106      [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1107      EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1108
1109def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1110      (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1111      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1112      [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1113                          addr:$dst)]>,
1114      EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1115
1116//===---------------------------------------------------------------------===//
1117// AVX-512 BROADCAST
1118//---
1119// broadcast with a scalar argument.
1120multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1121                            string Name,
1122                            X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1123  def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1124            (!cast<Instruction>(Name#DestInfo.ZSuffix#r)
1125             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1126  def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1127                                  (X86VBroadcast SrcInfo.FRC:$src),
1128                                  DestInfo.RC:$src0)),
1129            (!cast<Instruction>(Name#DestInfo.ZSuffix#rk)
1130             DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1131             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1132  def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1133                                  (X86VBroadcast SrcInfo.FRC:$src),
1134                                  DestInfo.ImmAllZerosV)),
1135            (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz)
1136             DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1137}
1138
1139// Split version to allow mask and broadcast node to be different types. This
1140// helps support the 32x2 broadcasts.
1141multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1142                                     string Name,
1143                                     SchedWrite SchedRR, SchedWrite SchedRM,
1144                                     X86VectorVTInfo MaskInfo,
1145                                     X86VectorVTInfo DestInfo,
1146                                     X86VectorVTInfo SrcInfo,
1147                                     SDPatternOperator UnmaskedOp = X86VBroadcast> {
1148  let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
1149  defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
1150                   (outs MaskInfo.RC:$dst),
1151                   (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
1152                   (MaskInfo.VT
1153                    (bitconvert
1154                     (DestInfo.VT
1155                      (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
1156                   (MaskInfo.VT
1157                    (bitconvert
1158                     (DestInfo.VT
1159                      (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>,
1160                   T8PD, EVEX, Sched<[SchedRR]>;
1161  let mayLoad = 1 in
1162  defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
1163                   (outs MaskInfo.RC:$dst),
1164                   (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
1165                   (MaskInfo.VT
1166                    (bitconvert
1167                     (DestInfo.VT (UnmaskedOp
1168                                   (SrcInfo.ScalarLdFrag addr:$src))))),
1169                   (MaskInfo.VT
1170                    (bitconvert
1171                     (DestInfo.VT (X86VBroadcast
1172                                   (SrcInfo.ScalarLdFrag addr:$src)))))>,
1173                   T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
1174                   Sched<[SchedRM]>;
1175  }
1176
1177  def : Pat<(MaskInfo.VT
1178             (bitconvert
1179              (DestInfo.VT (UnmaskedOp
1180                            (SrcInfo.VT (scalar_to_vector
1181                                         (SrcInfo.ScalarLdFrag addr:$src))))))),
1182            (!cast<Instruction>(Name#MaskInfo.ZSuffix#m) addr:$src)>;
1183  def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1184                          (bitconvert
1185                           (DestInfo.VT
1186                            (X86VBroadcast
1187                             (SrcInfo.VT (scalar_to_vector
1188                                          (SrcInfo.ScalarLdFrag addr:$src)))))),
1189                          MaskInfo.RC:$src0)),
1190            (!cast<Instruction>(Name#DestInfo.ZSuffix#mk)
1191             MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
1192  def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1193                          (bitconvert
1194                           (DestInfo.VT
1195                            (X86VBroadcast
1196                             (SrcInfo.VT (scalar_to_vector
1197                                          (SrcInfo.ScalarLdFrag addr:$src)))))),
1198                          MaskInfo.ImmAllZerosV)),
1199            (!cast<Instruction>(Name#MaskInfo.ZSuffix#mkz)
1200             MaskInfo.KRCWM:$mask, addr:$src)>;
1201}
1202
1203// Helper class to force mask and broadcast result to same type.
1204multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1205                               SchedWrite SchedRR, SchedWrite SchedRM,
1206                               X86VectorVTInfo DestInfo,
1207                               X86VectorVTInfo SrcInfo> :
1208  avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1209                            DestInfo, DestInfo, SrcInfo>;
1210
1211multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1212                                                       AVX512VLVectorVTInfo _> {
1213  let Predicates = [HasAVX512] in {
1214    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1215                                  WriteFShuffle256Ld, _.info512, _.info128>,
1216              avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1217                                      _.info128>,
1218              EVEX_V512;
1219  }
1220
1221  let Predicates = [HasVLX] in {
1222    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1223                                     WriteFShuffle256Ld, _.info256, _.info128>,
1224                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1225                                         _.info128>,
1226                 EVEX_V256;
1227  }
1228}
1229
1230multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1231                                                       AVX512VLVectorVTInfo _> {
1232  let Predicates = [HasAVX512] in {
1233    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1234                                  WriteFShuffle256Ld, _.info512, _.info128>,
1235              avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1236                                      _.info128>,
1237              EVEX_V512;
1238  }
1239
1240  let Predicates = [HasVLX] in {
1241    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1242                                     WriteFShuffle256Ld, _.info256, _.info128>,
1243                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1244                                         _.info128>,
1245                 EVEX_V256;
1246    defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1247                                     WriteFShuffle256Ld, _.info128, _.info128>,
1248                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1249                                         _.info128>,
1250                 EVEX_V128;
1251  }
1252}
1253defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1254                                       avx512vl_f32_info>;
1255defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1256                                       avx512vl_f64_info>, VEX_W1X;
1257
1258multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1259                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1260                                    RegisterClass SrcRC> {
1261  let ExeDomain = _.ExeDomain in
1262  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1263                         (ins SrcRC:$src),
1264                         "vpbroadcast"##_.Suffix, "$src", "$src",
1265                         (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
1266                         Sched<[SchedRR]>;
1267}
1268
1269multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1270                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1271                                    RegisterClass SrcRC, SubRegIndex Subreg> {
1272  let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1273  defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1274                        (outs _.RC:$dst), (ins GR32:$src),
1275                        !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1276                        !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1277                        "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1278                        "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1279
1280  def : Pat <(_.VT (OpNode SrcRC:$src)),
1281             (!cast<Instruction>(Name#r)
1282              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1283
1284  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1285             (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1286              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1287
1288  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1289             (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1290              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1291}
1292
1293multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1294                      AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1295                      RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1296  let Predicates = [prd] in
1297    defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1298              OpNode, SrcRC, Subreg>, EVEX_V512;
1299  let Predicates = [prd, HasVLX] in {
1300    defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1301              _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1302    defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1303              _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1304  }
1305}
1306
1307multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1308                                       SDPatternOperator OpNode,
1309                                       RegisterClass SrcRC, Predicate prd> {
1310  let Predicates = [prd] in
1311    defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1312                                      SrcRC>, EVEX_V512;
1313  let Predicates = [prd, HasVLX] in {
1314    defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1315                                         SrcRC>, EVEX_V256;
1316    defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1317                                         SrcRC>, EVEX_V128;
1318  }
1319}
1320
1321defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1322                       avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1323defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1324                       avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1325                       HasBWI>;
1326defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1327                                                 X86VBroadcast, GR32, HasAVX512>;
1328defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1329                                                 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1330
1331// Provide aliases for broadcast from the same register class that
1332// automatically does the extract.
1333multiclass avx512_int_broadcast_rm_lowering<string Name,
1334                                            X86VectorVTInfo DestInfo,
1335                                            X86VectorVTInfo SrcInfo,
1336                                            X86VectorVTInfo ExtInfo> {
1337  def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
1338            (!cast<Instruction>(Name#DestInfo.ZSuffix#"r")
1339                (ExtInfo.VT (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm)))>;
1340}
1341
1342multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1343                                        AVX512VLVectorVTInfo _, Predicate prd> {
1344  let Predicates = [prd] in {
1345    defm Z :   avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1346                                   WriteShuffle256Ld, _.info512, _.info128>,
1347               avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info256, _.info128>,
1348                                  EVEX_V512;
1349    // Defined separately to avoid redefinition.
1350    defm Z_Alt : avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info512, _.info128>;
1351  }
1352  let Predicates = [prd, HasVLX] in {
1353    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1354                                    WriteShuffle256Ld, _.info256, _.info128>,
1355                avx512_int_broadcast_rm_lowering<NAME, _.info256, _.info256, _.info128>,
1356                                 EVEX_V256;
1357    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1358                                    WriteShuffleXLd, _.info128, _.info128>,
1359                                 EVEX_V128;
1360  }
1361}
1362
1363defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1364                                           avx512vl_i8_info, HasBWI>;
1365defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1366                                           avx512vl_i16_info, HasBWI>;
1367defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1368                                           avx512vl_i32_info, HasAVX512>;
1369defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1370                                           avx512vl_i64_info, HasAVX512>, VEX_W1X;
1371
1372multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1373                          X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1374  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1375                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1376                           (_Dst.VT (X86SubVBroadcast
1377                             (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
1378                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1379                           AVX5128IBase, EVEX;
1380}
1381
1382// This should be used for the AVX512DQ broadcast instructions. It disables
1383// the unmasked patterns so that we only use the DQ instructions when masking
1384//  is requested.
1385multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1386                          X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1387  let hasSideEffects = 0, mayLoad = 1 in
1388  defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1389                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1390                           (null_frag),
1391                           (_Dst.VT (X86SubVBroadcast
1392                             (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
1393                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1394                           AVX5128IBase, EVEX;
1395}
1396
1397let Predicates = [HasAVX512] in {
1398  // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1399  def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
1400            (VPBROADCASTQZm addr:$src)>;
1401}
1402
1403let Predicates = [HasVLX] in {
1404  // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1405  def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
1406            (VPBROADCASTQZ128m addr:$src)>;
1407  def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
1408            (VPBROADCASTQZ256m addr:$src)>;
1409}
1410let Predicates = [HasVLX, HasBWI] in {
1411  // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1412  // This means we'll encounter truncated i32 loads; match that here.
1413  def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1414            (VPBROADCASTWZ128m addr:$src)>;
1415  def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1416            (VPBROADCASTWZ256m addr:$src)>;
1417  def : Pat<(v8i16 (X86VBroadcast
1418              (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1419            (VPBROADCASTWZ128m addr:$src)>;
1420  def : Pat<(v16i16 (X86VBroadcast
1421              (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1422            (VPBROADCASTWZ256m addr:$src)>;
1423}
1424
1425//===----------------------------------------------------------------------===//
1426// AVX-512 BROADCAST SUBVECTORS
1427//
1428
1429defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1430                       v16i32_info, v4i32x_info>,
1431                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1432defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1433                       v16f32_info, v4f32x_info>,
1434                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1435defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1436                       v8i64_info, v4i64x_info>, VEX_W,
1437                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1438defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1439                       v8f64_info, v4f64x_info>, VEX_W,
1440                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1441
1442let Predicates = [HasAVX512] in {
1443def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1444          (VBROADCASTF64X4rm addr:$src)>;
1445def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
1446          (VBROADCASTI64X4rm addr:$src)>;
1447def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
1448          (VBROADCASTI64X4rm addr:$src)>;
1449def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
1450          (VBROADCASTI64X4rm addr:$src)>;
1451
1452// Provide fallback in case the load node that is used in the patterns above
1453// is used by additional users, which prevents the pattern selection.
1454def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1455          (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1456                           (v4f64 VR256X:$src), 1)>;
1457def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1458          (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1459                           (v8f32 VR256X:$src), 1)>;
1460def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1461          (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1462                           (v4i64 VR256X:$src), 1)>;
1463def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1464          (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1465                           (v8i32 VR256X:$src), 1)>;
1466def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1467          (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1468                           (v16i16 VR256X:$src), 1)>;
1469def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1470          (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1471                           (v32i8 VR256X:$src), 1)>;
1472
1473def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1474          (VBROADCASTF32X4rm addr:$src)>;
1475def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1476          (VBROADCASTI32X4rm addr:$src)>;
1477def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1478          (VBROADCASTI32X4rm addr:$src)>;
1479def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1480          (VBROADCASTI32X4rm addr:$src)>;
1481
1482// Patterns for selects of bitcasted operations.
1483def : Pat<(vselect VK16WM:$mask,
1484                   (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1485                   (bc_v16f32 (v16i32 immAllZerosV))),
1486          (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1487def : Pat<(vselect VK16WM:$mask,
1488                   (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1489                   VR512:$src0),
1490          (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1491def : Pat<(vselect VK16WM:$mask,
1492                   (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1493                   (v16i32 immAllZerosV)),
1494          (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1495def : Pat<(vselect VK16WM:$mask,
1496                   (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1497                   VR512:$src0),
1498          (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1499
1500def : Pat<(vselect VK8WM:$mask,
1501                   (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1502                   (bc_v8f64 (v16i32 immAllZerosV))),
1503          (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1504def : Pat<(vselect VK8WM:$mask,
1505                   (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1506                   VR512:$src0),
1507          (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1508def : Pat<(vselect VK8WM:$mask,
1509                   (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
1510                   (bc_v8i64 (v16i32 immAllZerosV))),
1511          (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1512def : Pat<(vselect VK8WM:$mask,
1513                   (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
1514                   VR512:$src0),
1515          (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1516}
1517
1518let Predicates = [HasVLX] in {
1519defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1520                           v8i32x_info, v4i32x_info>,
1521                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1522defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1523                           v8f32x_info, v4f32x_info>,
1524                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1525
1526def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1527          (VBROADCASTF32X4Z256rm addr:$src)>;
1528def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1529          (VBROADCASTI32X4Z256rm addr:$src)>;
1530def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1531          (VBROADCASTI32X4Z256rm addr:$src)>;
1532def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1533          (VBROADCASTI32X4Z256rm addr:$src)>;
1534
1535// Patterns for selects of bitcasted operations.
1536def : Pat<(vselect VK8WM:$mask,
1537                   (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1538                   (bc_v8f32 (v8i32 immAllZerosV))),
1539          (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1540def : Pat<(vselect VK8WM:$mask,
1541                   (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1542                   VR256X:$src0),
1543          (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1544def : Pat<(vselect VK8WM:$mask,
1545                   (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1546                   (v8i32 immAllZerosV)),
1547          (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1548def : Pat<(vselect VK8WM:$mask,
1549                   (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1550                   VR256X:$src0),
1551          (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1552
1553
1554// Provide fallback in case the load node that is used in the patterns above
1555// is used by additional users, which prevents the pattern selection.
1556def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1557          (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1558                              (v2f64 VR128X:$src), 1)>;
1559def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1560          (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1561                              (v4f32 VR128X:$src), 1)>;
1562def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1563          (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1564                              (v2i64 VR128X:$src), 1)>;
1565def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1566          (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1567                              (v4i32 VR128X:$src), 1)>;
1568def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1569          (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1570                              (v8i16 VR128X:$src), 1)>;
1571def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1572          (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1573                              (v16i8 VR128X:$src), 1)>;
1574}
1575
1576let Predicates = [HasVLX, HasDQI] in {
1577defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1578                           v4i64x_info, v2i64x_info>, VEX_W1X,
1579                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1580defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1581                           v4f64x_info, v2f64x_info>, VEX_W1X,
1582                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1583
1584// Patterns for selects of bitcasted operations.
1585def : Pat<(vselect VK4WM:$mask,
1586                   (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1587                   (bc_v4f64 (v8i32 immAllZerosV))),
1588          (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1589def : Pat<(vselect VK4WM:$mask,
1590                   (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1591                   VR256X:$src0),
1592          (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1593def : Pat<(vselect VK4WM:$mask,
1594                   (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1595                   (bc_v4i64 (v8i32 immAllZerosV))),
1596          (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1597def : Pat<(vselect VK4WM:$mask,
1598                   (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1599                   VR256X:$src0),
1600          (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1601}
1602
1603let Predicates = [HasDQI] in {
1604defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1605                       v8i64_info, v2i64x_info>, VEX_W,
1606                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1607defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1608                       v16i32_info, v8i32x_info>,
1609                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1610defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1611                       v8f64_info, v2f64x_info>, VEX_W,
1612                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1613defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1614                       v16f32_info, v8f32x_info>,
1615                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1616
1617// Patterns for selects of bitcasted operations.
1618def : Pat<(vselect VK16WM:$mask,
1619                   (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1620                   (bc_v16f32 (v16i32 immAllZerosV))),
1621          (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1622def : Pat<(vselect VK16WM:$mask,
1623                   (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1624                   VR512:$src0),
1625          (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1626def : Pat<(vselect VK16WM:$mask,
1627                   (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1628                   (v16i32 immAllZerosV)),
1629          (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1630def : Pat<(vselect VK16WM:$mask,
1631                   (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1632                   VR512:$src0),
1633          (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1634
1635def : Pat<(vselect VK8WM:$mask,
1636                   (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1637                   (bc_v8f64 (v16i32 immAllZerosV))),
1638          (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1639def : Pat<(vselect VK8WM:$mask,
1640                   (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1641                   VR512:$src0),
1642          (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1643def : Pat<(vselect VK8WM:$mask,
1644                   (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1645                   (bc_v8i64 (v16i32 immAllZerosV))),
1646          (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1647def : Pat<(vselect VK8WM:$mask,
1648                   (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1649                   VR512:$src0),
1650          (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1651}
1652
1653multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1654                         AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1655  let Predicates = [HasDQI] in
1656    defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1657                                          WriteShuffle256Ld, _Dst.info512,
1658                                          _Src.info512, _Src.info128, null_frag>,
1659                                          EVEX_V512;
1660  let Predicates = [HasDQI, HasVLX] in
1661    defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1662                                          WriteShuffle256Ld, _Dst.info256,
1663                                          _Src.info256, _Src.info128, null_frag>,
1664                                          EVEX_V256;
1665}
1666
1667multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1668                         AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1669  avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1670
1671  let Predicates = [HasDQI, HasVLX] in
1672    defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1673                                          WriteShuffleXLd, _Dst.info128,
1674                                          _Src.info128, _Src.info128, null_frag>,
1675                                          EVEX_V128;
1676}
1677
1678defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1679                                          avx512vl_i32_info, avx512vl_i64_info>;
1680defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1681                                          avx512vl_f32_info, avx512vl_f64_info>;
1682
1683let Predicates = [HasVLX] in {
1684def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
1685          (VBROADCASTSSZ256r (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>;
1686def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
1687          (VBROADCASTSDZ256r (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>;
1688}
1689
1690def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
1691          (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))>;
1692def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1693          (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>;
1694
1695def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
1696          (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))>;
1697def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1698          (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>;
1699
1700//===----------------------------------------------------------------------===//
1701// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1702//---
1703multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1704                                  X86VectorVTInfo _, RegisterClass KRC> {
1705  def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1706                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1707                  [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1708                  EVEX, Sched<[WriteShuffle]>;
1709}
1710
1711multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1712                                 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1713  let Predicates = [HasCDI] in
1714    defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1715  let Predicates = [HasCDI, HasVLX] in {
1716    defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1717    defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1718  }
1719}
1720
1721defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1722                                               avx512vl_i32_info, VK16>;
1723defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1724                                               avx512vl_i64_info, VK8>, VEX_W;
1725
1726//===----------------------------------------------------------------------===//
1727// -- VPERMI2 - 3 source operands form --
1728multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1729                         X86FoldableSchedWrite sched,
1730                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1731let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1732    hasSideEffects = 0 in {
1733  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1734          (ins _.RC:$src2, _.RC:$src3),
1735          OpcodeStr, "$src3, $src2", "$src2, $src3",
1736          (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1737          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1738
1739  let mayLoad = 1 in
1740  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1741            (ins _.RC:$src2, _.MemOp:$src3),
1742            OpcodeStr, "$src3, $src2", "$src2, $src3",
1743            (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1744                   (_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>,
1745            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
1746  }
1747}
1748
1749multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1750                            X86FoldableSchedWrite sched,
1751                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1752  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1753      hasSideEffects = 0, mayLoad = 1 in
1754  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1755              (ins _.RC:$src2, _.ScalarMemOp:$src3),
1756              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1757              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1758              (_.VT (X86VPermt2 _.RC:$src2,
1759               IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1760              AVX5128IBase, EVEX_4V, EVEX_B,
1761              Sched<[sched.Folded, ReadAfterLd]>;
1762}
1763
1764multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1765                               X86FoldableSchedWrite sched,
1766                               AVX512VLVectorVTInfo VTInfo,
1767                               AVX512VLVectorVTInfo ShuffleMask> {
1768  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1769                           ShuffleMask.info512>,
1770            avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1771                             ShuffleMask.info512>, EVEX_V512;
1772  let Predicates = [HasVLX] in {
1773  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1774                               ShuffleMask.info128>,
1775                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1776                                  ShuffleMask.info128>, EVEX_V128;
1777  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1778                               ShuffleMask.info256>,
1779                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1780                                  ShuffleMask.info256>, EVEX_V256;
1781  }
1782}
1783
1784multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1785                                  X86FoldableSchedWrite sched,
1786                                  AVX512VLVectorVTInfo VTInfo,
1787                                  AVX512VLVectorVTInfo Idx,
1788                                  Predicate Prd> {
1789  let Predicates = [Prd] in
1790  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1791                           Idx.info512>, EVEX_V512;
1792  let Predicates = [Prd, HasVLX] in {
1793  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1794                               Idx.info128>, EVEX_V128;
1795  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1796                               Idx.info256>,  EVEX_V256;
1797  }
1798}
1799
1800defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1801                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1802defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1803                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1804defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1805                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1806                  VEX_W, EVEX_CD8<16, CD8VF>;
1807defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1808                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1809                  EVEX_CD8<8, CD8VF>;
1810defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1811                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1812defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1813                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1814
1815// Extra patterns to deal with extra bitcasts due to passthru and index being
1816// different types on the fp versions.
1817multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1818                                  X86VectorVTInfo IdxVT,
1819                                  X86VectorVTInfo CastVT> {
1820  def : Pat<(_.VT (vselect _.KRCWM:$mask,
1821                             (X86VPermt2 (_.VT _.RC:$src2),
1822                                         (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
1823                             (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1824            (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1825                                                _.RC:$src2, _.RC:$src3)>;
1826  def : Pat<(_.VT (vselect _.KRCWM:$mask,
1827                             (X86VPermt2 _.RC:$src2,
1828                                         (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1829                                         (_.LdFrag addr:$src3)),
1830                             (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1831            (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1832                                                _.RC:$src2, addr:$src3)>;
1833  def : Pat<(_.VT (vselect _.KRCWM:$mask,
1834                             (X86VPermt2 _.RC:$src2,
1835                                         (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1836                                         (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
1837                             (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1838            (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1839                                                 _.RC:$src2, addr:$src3)>;
1840}
1841
1842// TODO: Should we add more casts? The vXi64 case is common due to ABI.
1843defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1844defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1845defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1846
1847// VPERMT2
1848multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1849                         X86FoldableSchedWrite sched,
1850                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1851let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1852  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1853          (ins IdxVT.RC:$src2, _.RC:$src3),
1854          OpcodeStr, "$src3, $src2", "$src2, $src3",
1855          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1856          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1857
1858  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1859            (ins IdxVT.RC:$src2, _.MemOp:$src3),
1860            OpcodeStr, "$src3, $src2", "$src2, $src3",
1861            (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1862                   (bitconvert (_.LdFrag addr:$src3)))), 1>,
1863            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
1864  }
1865}
1866multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1867                            X86FoldableSchedWrite sched,
1868                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1869  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1870  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1871              (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1872              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1873              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1874              (_.VT (X86VPermt2 _.RC:$src1,
1875               IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1876              AVX5128IBase, EVEX_4V, EVEX_B,
1877              Sched<[sched.Folded, ReadAfterLd]>;
1878}
1879
1880multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1881                               X86FoldableSchedWrite sched,
1882                               AVX512VLVectorVTInfo VTInfo,
1883                               AVX512VLVectorVTInfo ShuffleMask> {
1884  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1885                              ShuffleMask.info512>,
1886            avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1887                              ShuffleMask.info512>, EVEX_V512;
1888  let Predicates = [HasVLX] in {
1889  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1890                              ShuffleMask.info128>,
1891                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1892                              ShuffleMask.info128>, EVEX_V128;
1893  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1894                              ShuffleMask.info256>,
1895                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1896                              ShuffleMask.info256>, EVEX_V256;
1897  }
1898}
1899
1900multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1901                                  X86FoldableSchedWrite sched,
1902                                  AVX512VLVectorVTInfo VTInfo,
1903                                  AVX512VLVectorVTInfo Idx, Predicate Prd> {
1904  let Predicates = [Prd] in
1905  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1906                           Idx.info512>, EVEX_V512;
1907  let Predicates = [Prd, HasVLX] in {
1908  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1909                               Idx.info128>, EVEX_V128;
1910  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1911                               Idx.info256>, EVEX_V256;
1912  }
1913}
1914
1915defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1916                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1917defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1918                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1919defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1920                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1921                  VEX_W, EVEX_CD8<16, CD8VF>;
1922defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1923                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1924                  EVEX_CD8<8, CD8VF>;
1925defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1926                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1927defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1928                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1929
1930//===----------------------------------------------------------------------===//
1931// AVX-512 - BLEND using mask
1932//
1933
1934multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1935                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1936  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1937  def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1938             (ins _.RC:$src1, _.RC:$src2),
1939             !strconcat(OpcodeStr,
1940             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1941             EVEX_4V, Sched<[sched]>;
1942  def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1943             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1944             !strconcat(OpcodeStr,
1945             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1946             []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1947  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1948             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1949             !strconcat(OpcodeStr,
1950             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1951             []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1952  let mayLoad = 1 in {
1953  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1954             (ins _.RC:$src1, _.MemOp:$src2),
1955             !strconcat(OpcodeStr,
1956             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1957             []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1958             Sched<[sched.Folded, ReadAfterLd]>;
1959  def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1960             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1961             !strconcat(OpcodeStr,
1962             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1963             []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1964             Sched<[sched.Folded, ReadAfterLd]>;
1965  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1966             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1967             !strconcat(OpcodeStr,
1968             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1969             []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1970             Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
1971  }
1972  }
1973}
1974multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1975                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1976  let mayLoad = 1, hasSideEffects = 0 in {
1977  def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1978      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1979       !strconcat(OpcodeStr,
1980            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1981            "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1982      EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1983      Sched<[sched.Folded, ReadAfterLd]>;
1984
1985  def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1986      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1987       !strconcat(OpcodeStr,
1988            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1989            "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1990      EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1991      Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
1992
1993  def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1994      (ins _.RC:$src1, _.ScalarMemOp:$src2),
1995       !strconcat(OpcodeStr,
1996            "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1997            "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1998      EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1999      Sched<[sched.Folded, ReadAfterLd]>;
2000  }
2001}
2002
2003multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2004                        AVX512VLVectorVTInfo VTInfo> {
2005  defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2006           WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2007                                 EVEX_V512;
2008
2009  let Predicates = [HasVLX] in {
2010    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2011                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2012                                      EVEX_V256;
2013    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2014                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2015                                      EVEX_V128;
2016  }
2017}
2018
2019multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2020                        AVX512VLVectorVTInfo VTInfo> {
2021  let Predicates = [HasBWI] in
2022    defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2023                               EVEX_V512;
2024
2025  let Predicates = [HasBWI, HasVLX] in {
2026    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2027                                  EVEX_V256;
2028    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2029                                  EVEX_V128;
2030  }
2031}
2032
2033defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2034                              avx512vl_f32_info>;
2035defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2036                              avx512vl_f64_info>, VEX_W;
2037defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2038                              avx512vl_i32_info>;
2039defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2040                              avx512vl_i64_info>, VEX_W;
2041defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2042                              avx512vl_i8_info>;
2043defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2044                              avx512vl_i16_info>, VEX_W;
2045
2046//===----------------------------------------------------------------------===//
2047// Compare Instructions
2048//===----------------------------------------------------------------------===//
2049
2050// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2051
2052multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
2053                             X86FoldableSchedWrite sched> {
2054  defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2055                      (outs _.KRC:$dst),
2056                      (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2057                      "vcmp${cc}"#_.Suffix,
2058                      "$src2, $src1", "$src1, $src2",
2059                      (OpNode (_.VT _.RC:$src1),
2060                              (_.VT _.RC:$src2),
2061                              imm:$cc)>, EVEX_4V, Sched<[sched]>;
2062  let mayLoad = 1 in
2063  defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2064                    (outs _.KRC:$dst),
2065                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
2066                    "vcmp${cc}"#_.Suffix,
2067                    "$src2, $src1", "$src1, $src2",
2068                    (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2069                        imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2070                    Sched<[sched.Folded, ReadAfterLd]>;
2071
2072  defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2073                     (outs _.KRC:$dst),
2074                     (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2075                     "vcmp${cc}"#_.Suffix,
2076                     "{sae}, $src2, $src1", "$src1, $src2, {sae}",
2077                     (OpNodeRnd (_.VT _.RC:$src1),
2078                                (_.VT _.RC:$src2),
2079                                imm:$cc,
2080                                (i32 FROUND_NO_EXC))>,
2081                     EVEX_4V, EVEX_B, Sched<[sched]>;
2082  // Accept explicit immediate argument form instead of comparison code.
2083  let isAsmParserOnly = 1, hasSideEffects = 0 in {
2084    defm  rri_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2085                        (outs VK1:$dst),
2086                        (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2087                        "vcmp"#_.Suffix,
2088                        "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V,
2089                        Sched<[sched]>, NotMemoryFoldable;
2090  let mayLoad = 1 in
2091    defm  rmi_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2092                        (outs _.KRC:$dst),
2093                        (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2094                        "vcmp"#_.Suffix,
2095                        "$cc, $src2, $src1", "$src1, $src2, $cc">,
2096                        EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2097                        Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
2098
2099    defm  rrb_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2100                       (outs _.KRC:$dst),
2101                       (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2102                       "vcmp"#_.Suffix,
2103                       "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">,
2104                       EVEX_4V, EVEX_B, Sched<[sched]>, NotMemoryFoldable;
2105  }// let isAsmParserOnly = 1, hasSideEffects = 0
2106
2107  let isCodeGenOnly = 1 in {
2108    let isCommutable = 1 in
2109    def rr : AVX512Ii8<0xC2, MRMSrcReg,
2110                (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
2111                !strconcat("vcmp${cc}", _.Suffix,
2112                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2113                [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2114                                          _.FRC:$src2,
2115                                          imm:$cc))]>,
2116                EVEX_4V, Sched<[sched]>;
2117    def rm : AVX512Ii8<0xC2, MRMSrcMem,
2118              (outs _.KRC:$dst),
2119              (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2120              !strconcat("vcmp${cc}", _.Suffix,
2121                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2122              [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2123                                        (_.ScalarLdFrag addr:$src2),
2124                                        imm:$cc))]>,
2125              EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2126              Sched<[sched.Folded, ReadAfterLd]>;
2127  }
2128}
2129
2130let Predicates = [HasAVX512] in {
2131  let ExeDomain = SSEPackedSingle in
2132  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
2133                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2134  let ExeDomain = SSEPackedDouble in
2135  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
2136                                   SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2137}
2138
2139multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2140                              X86FoldableSchedWrite sched, X86VectorVTInfo _,
2141                              bit IsCommutable> {
2142  let isCommutable = IsCommutable in
2143  def rr : AVX512BI<opc, MRMSrcReg,
2144             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2145             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2146             [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))]>,
2147             EVEX_4V, Sched<[sched]>;
2148  def rm : AVX512BI<opc, MRMSrcMem,
2149             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2150             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2151             [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2152                                       (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
2153             EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
2154  let isCommutable = IsCommutable in
2155  def rrk : AVX512BI<opc, MRMSrcReg,
2156              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2157              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2158                          "$dst {${mask}}, $src1, $src2}"),
2159              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2160                                   (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>,
2161              EVEX_4V, EVEX_K, Sched<[sched]>;
2162  def rmk : AVX512BI<opc, MRMSrcMem,
2163              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2164              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2165                          "$dst {${mask}}, $src1, $src2}"),
2166              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2167                                   (OpNode (_.VT _.RC:$src1),
2168                                       (_.VT (bitconvert
2169                                              (_.LdFrag addr:$src2))))))]>,
2170              EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2171}
2172
2173multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2174                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2175                                  bit IsCommutable> :
2176           avx512_icmp_packed<opc, OpcodeStr, OpNode, sched, _, IsCommutable> {
2177  def rmb : AVX512BI<opc, MRMSrcMem,
2178              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2179              !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2180                                    "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2181              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2182                              (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>,
2183              EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2184  def rmbk : AVX512BI<opc, MRMSrcMem,
2185               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2186                                       _.ScalarMemOp:$src2),
2187               !strconcat(OpcodeStr,
2188                          "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2189                          "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2190               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2191                                      (OpNode (_.VT _.RC:$src1),
2192                                        (X86VBroadcast
2193                                          (_.ScalarLdFrag addr:$src2)))))]>,
2194               EVEX_4V, EVEX_K, EVEX_B,
2195               Sched<[sched.Folded, ReadAfterLd]>;
2196}
2197
2198multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2199                                 X86SchedWriteWidths sched,
2200                                 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2201                                 bit IsCommutable = 0> {
2202  let Predicates = [prd] in
2203  defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.ZMM,
2204                              VTInfo.info512, IsCommutable>, EVEX_V512;
2205
2206  let Predicates = [prd, HasVLX] in {
2207    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.YMM,
2208                                   VTInfo.info256, IsCommutable>, EVEX_V256;
2209    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.XMM,
2210                                   VTInfo.info128, IsCommutable>, EVEX_V128;
2211  }
2212}
2213
2214multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2215                                     PatFrag OpNode, X86SchedWriteWidths sched,
2216                                     AVX512VLVectorVTInfo VTInfo,
2217                                     Predicate prd, bit IsCommutable = 0> {
2218  let Predicates = [prd] in
2219  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.ZMM,
2220                                  VTInfo.info512, IsCommutable>, EVEX_V512;
2221
2222  let Predicates = [prd, HasVLX] in {
2223    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.YMM,
2224                                       VTInfo.info256, IsCommutable>, EVEX_V256;
2225    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.XMM,
2226                                       VTInfo.info128, IsCommutable>, EVEX_V128;
2227  }
2228}
2229
2230// This fragment treats X86cmpm as commutable to help match loads in both
2231// operands for PCMPEQ.
2232def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2233def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
2234                           (X86setcc_commute node:$src1, node:$src2, SETEQ)>;
2235def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2236                         (setcc node:$src1, node:$src2, SETGT)>;
2237
2238// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2239// increase the pattern complexity the way an immediate would.
2240let AddedComplexity = 2 in {
2241// FIXME: Is there a better scheduler class for VPCMP?
2242defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c,
2243                      SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2244                EVEX_CD8<8, CD8VF>, VEX_WIG;
2245
2246defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c,
2247                      SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2248                EVEX_CD8<16, CD8VF>, VEX_WIG;
2249
2250defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c,
2251                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2252                EVEX_CD8<32, CD8VF>;
2253
2254defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c,
2255                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2256                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2257
2258defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
2259                      SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2260                EVEX_CD8<8, CD8VF>, VEX_WIG;
2261
2262defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
2263                      SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2264                EVEX_CD8<16, CD8VF>, VEX_WIG;
2265
2266defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
2267                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2268                EVEX_CD8<32, CD8VF>;
2269
2270defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
2271                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2272                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2273}
2274
2275multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2276                          PatFrag CommFrag, X86FoldableSchedWrite sched,
2277                          X86VectorVTInfo _, string Name> {
2278  let isCommutable = 1 in
2279  def rri : AVX512AIi8<opc, MRMSrcReg,
2280             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
2281             !strconcat("vpcmp${cc}", Suffix,
2282                        "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2283             [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2284                                                (_.VT _.RC:$src2),
2285                                                cond)))]>,
2286             EVEX_4V, Sched<[sched]>;
2287  def rmi : AVX512AIi8<opc, MRMSrcMem,
2288             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
2289             !strconcat("vpcmp${cc}", Suffix,
2290                        "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2291             [(set _.KRC:$dst, (_.KVT
2292                                (Frag:$cc
2293                                 (_.VT _.RC:$src1),
2294                                 (_.VT (bitconvert (_.LdFrag addr:$src2))),
2295                                 cond)))]>,
2296             EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
2297  let isCommutable = 1 in
2298  def rrik : AVX512AIi8<opc, MRMSrcReg,
2299              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2300                                      AVX512ICC:$cc),
2301              !strconcat("vpcmp${cc}", Suffix,
2302                         "\t{$src2, $src1, $dst {${mask}}|",
2303                         "$dst {${mask}}, $src1, $src2}"),
2304              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2305                                     (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2306                                                      (_.VT _.RC:$src2),
2307                                                      cond))))]>,
2308              EVEX_4V, EVEX_K, Sched<[sched]>;
2309  def rmik : AVX512AIi8<opc, MRMSrcMem,
2310              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2311                                    AVX512ICC:$cc),
2312              !strconcat("vpcmp${cc}", Suffix,
2313                         "\t{$src2, $src1, $dst {${mask}}|",
2314                         "$dst {${mask}}, $src1, $src2}"),
2315              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2316                                     (_.KVT
2317                                      (Frag:$cc
2318                                       (_.VT _.RC:$src1),
2319                                       (_.VT (bitconvert
2320                                              (_.LdFrag addr:$src2))),
2321                                       cond))))]>,
2322              EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2323
2324  // Accept explicit immediate argument form instead of comparison code.
2325  let isAsmParserOnly = 1, hasSideEffects = 0 in {
2326    def rri_alt : AVX512AIi8<opc, MRMSrcReg,
2327               (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2328               !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2329                          "$dst, $src1, $src2, $cc}"), []>,
2330               EVEX_4V, Sched<[sched]>, NotMemoryFoldable;
2331    let mayLoad = 1 in
2332    def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
2333               (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2334               !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2335                          "$dst, $src1, $src2, $cc}"), []>,
2336               EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
2337    def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
2338               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2339                                       u8imm:$cc),
2340               !strconcat("vpcmp", Suffix,
2341                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
2342                          "$dst {${mask}}, $src1, $src2, $cc}"), []>,
2343               EVEX_4V, EVEX_K, Sched<[sched]>, NotMemoryFoldable;
2344    let mayLoad = 1 in
2345    def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
2346               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2347                                       u8imm:$cc),
2348               !strconcat("vpcmp", Suffix,
2349                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
2350                          "$dst {${mask}}, $src1, $src2, $cc}"), []>,
2351               EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>,
2352               NotMemoryFoldable;
2353  }
2354
2355  def : Pat<(_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
2356                                 (_.VT _.RC:$src1), cond)),
2357            (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2358             _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2359
2360  def : Pat<(and _.KRCWM:$mask,
2361                 (_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
2362                                      (_.VT _.RC:$src1), cond))),
2363            (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2364             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2365             (CommFrag.OperandTransform $cc))>;
2366}
2367
2368multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2369                              PatFrag CommFrag, X86FoldableSchedWrite sched,
2370                              X86VectorVTInfo _, string Name> :
2371           avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched, _, Name> {
2372  def rmib : AVX512AIi8<opc, MRMSrcMem,
2373             (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2374                                     AVX512ICC:$cc),
2375             !strconcat("vpcmp${cc}", Suffix,
2376                        "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2377                        "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2378             [(set _.KRC:$dst, (_.KVT (Frag:$cc
2379                                       (_.VT _.RC:$src1),
2380                                       (X86VBroadcast
2381                                        (_.ScalarLdFrag addr:$src2)),
2382                                       cond)))]>,
2383             EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2384  def rmibk : AVX512AIi8<opc, MRMSrcMem,
2385              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2386                                       _.ScalarMemOp:$src2, AVX512ICC:$cc),
2387              !strconcat("vpcmp${cc}", Suffix,
2388                       "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2389                       "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2390              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2391                                     (_.KVT (Frag:$cc
2392                                             (_.VT _.RC:$src1),
2393                                             (X86VBroadcast
2394                                              (_.ScalarLdFrag addr:$src2)),
2395                                             cond))))]>,
2396              EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2397
2398  // Accept explicit immediate argument form instead of comparison code.
2399  let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
2400    def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
2401               (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2402                                       u8imm:$cc),
2403               !strconcat("vpcmp", Suffix,
2404                   "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2405                   "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
2406               EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
2407               NotMemoryFoldable;
2408    def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
2409               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2410                                       _.ScalarMemOp:$src2, u8imm:$cc),
2411               !strconcat("vpcmp", Suffix,
2412                  "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2413                  "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
2414               EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
2415               NotMemoryFoldable;
2416  }
2417
2418  def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2419                    (_.VT _.RC:$src1), cond)),
2420            (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2421             _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2422
2423  def : Pat<(and _.KRCWM:$mask,
2424                 (_.KVT (CommFrag:$cc (X86VBroadcast
2425                                       (_.ScalarLdFrag addr:$src2)),
2426                                      (_.VT _.RC:$src1), cond))),
2427            (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2428             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2429             (CommFrag.OperandTransform $cc))>;
2430}
2431
2432multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2433                             PatFrag CommFrag, X86SchedWriteWidths sched,
2434                             AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2435  let Predicates = [prd] in
2436  defm Z : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.ZMM,
2437                          VTInfo.info512, NAME>, EVEX_V512;
2438
2439  let Predicates = [prd, HasVLX] in {
2440    defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.YMM,
2441                               VTInfo.info256, NAME>, EVEX_V256;
2442    defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.XMM,
2443                               VTInfo.info128, NAME>, EVEX_V128;
2444  }
2445}
2446
2447multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2448                                 PatFrag CommFrag, X86SchedWriteWidths sched,
2449                                 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2450  let Predicates = [prd] in
2451  defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.ZMM,
2452                              VTInfo.info512, NAME>, EVEX_V512;
2453
2454  let Predicates = [prd, HasVLX] in {
2455    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.YMM,
2456                                    VTInfo.info256, NAME>, EVEX_V256;
2457    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.XMM,
2458                                   VTInfo.info128, NAME>, EVEX_V128;
2459  }
2460}
2461
2462def X86pcmpm_imm : SDNodeXForm<setcc, [{
2463  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2464  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2465  return getI8Imm(SSECC, SDLoc(N));
2466}]>;
2467
2468// Swapped operand version of the above.
2469def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2470  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2471  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2472  SSECC = X86::getSwappedVPCMPImm(SSECC);
2473  return getI8Imm(SSECC, SDLoc(N));
2474}]>;
2475
2476def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2477                       (setcc node:$src1, node:$src2, node:$cc), [{
2478  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2479  return !ISD::isUnsignedIntSetCC(CC);
2480}], X86pcmpm_imm>;
2481
2482// Same as above, but commutes immediate. Use for load folding.
2483def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2484                               (setcc node:$src1, node:$src2, node:$cc), [{
2485  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2486  return !ISD::isUnsignedIntSetCC(CC);
2487}], X86pcmpm_imm_commute>;
2488
2489def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2490                        (setcc node:$src1, node:$src2, node:$cc), [{
2491  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2492  return ISD::isUnsignedIntSetCC(CC);
2493}], X86pcmpm_imm>;
2494
2495// Same as above, but commutes immediate. Use for load folding.
2496def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2497                                (setcc node:$src1, node:$src2, node:$cc), [{
2498  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2499  return ISD::isUnsignedIntSetCC(CC);
2500}], X86pcmpm_imm_commute>;
2501
2502// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2503defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_commute,
2504                                SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2505                                EVEX_CD8<8, CD8VF>;
2506defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_commute,
2507                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2508                                 EVEX_CD8<8, CD8VF>;
2509
2510defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_commute,
2511                                SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2512                                VEX_W, EVEX_CD8<16, CD8VF>;
2513defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_commute,
2514                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2515                                 VEX_W, EVEX_CD8<16, CD8VF>;
2516
2517defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_commute,
2518                                    SchedWriteVecALU, avx512vl_i32_info,
2519                                    HasAVX512>, EVEX_CD8<32, CD8VF>;
2520defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_commute,
2521                                     SchedWriteVecALU, avx512vl_i32_info,
2522                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2523
2524defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_commute,
2525                                    SchedWriteVecALU, avx512vl_i64_info,
2526                                    HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2527defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_commute,
2528                                     SchedWriteVecALU, avx512vl_i64_info,
2529                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2530
2531multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2532                              string Name> {
2533  defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2534                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
2535                   "vcmp${cc}"#_.Suffix,
2536                   "$src2, $src1", "$src1, $src2",
2537                   (X86cmpm (_.VT _.RC:$src1),
2538                         (_.VT _.RC:$src2),
2539                           imm:$cc), 1>,
2540                   Sched<[sched]>;
2541
2542  defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2543                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
2544                "vcmp${cc}"#_.Suffix,
2545                "$src2, $src1", "$src1, $src2",
2546                (X86cmpm (_.VT _.RC:$src1),
2547                        (_.VT (bitconvert (_.LdFrag addr:$src2))),
2548                        imm:$cc)>,
2549                Sched<[sched.Folded, ReadAfterLd]>;
2550
2551  defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2552                (outs _.KRC:$dst),
2553                (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2554                "vcmp${cc}"#_.Suffix,
2555                "${src2}"##_.BroadcastStr##", $src1",
2556                "$src1, ${src2}"##_.BroadcastStr,
2557                (X86cmpm (_.VT _.RC:$src1),
2558                        (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2559                        imm:$cc)>,
2560                EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2561  // Accept explicit immediate argument form instead of comparison code.
2562  let isAsmParserOnly = 1, hasSideEffects = 0 in {
2563    defm  rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2564                         (outs _.KRC:$dst),
2565                         (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2566                         "vcmp"#_.Suffix,
2567                         "$cc, $src2, $src1", "$src1, $src2, $cc">,
2568                         Sched<[sched]>, NotMemoryFoldable;
2569
2570    let mayLoad = 1 in {
2571      defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2572                             (outs _.KRC:$dst),
2573                             (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2574                             "vcmp"#_.Suffix,
2575                             "$cc, $src2, $src1", "$src1, $src2, $cc">,
2576                             Sched<[sched.Folded, ReadAfterLd]>,
2577                             NotMemoryFoldable;
2578
2579      defm  rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2580                         (outs _.KRC:$dst),
2581                         (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2582                         "vcmp"#_.Suffix,
2583                         "$cc, ${src2}"##_.BroadcastStr##", $src1",
2584                         "$src1, ${src2}"##_.BroadcastStr##", $cc">,
2585                         EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
2586                         NotMemoryFoldable;
2587    }
2588  }
2589
2590  // Patterns for selecting with loads in other operand.
2591  def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2592                     CommutableCMPCC:$cc),
2593            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2594                                                      imm:$cc)>;
2595
2596  def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
2597                                         (_.VT _.RC:$src1),
2598                                         CommutableCMPCC:$cc)),
2599            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2600                                                       _.RC:$src1, addr:$src2,
2601                                                       imm:$cc)>;
2602
2603  def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2604                     (_.VT _.RC:$src1), CommutableCMPCC:$cc),
2605            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2606                                                       imm:$cc)>;
2607
2608  def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
2609                                          (_.ScalarLdFrag addr:$src2)),
2610                                         (_.VT _.RC:$src1),
2611                                         CommutableCMPCC:$cc)),
2612            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2613                                                        _.RC:$src1, addr:$src2,
2614                                                        imm:$cc)>;
2615}
2616
2617multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2618  // comparison code form (VCMP[EQ/LT/LE/...]
2619  defm  rrib  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2620                     (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2621                     "vcmp${cc}"#_.Suffix,
2622                     "{sae}, $src2, $src1", "$src1, $src2, {sae}",
2623                     (X86cmpmRnd (_.VT _.RC:$src1),
2624                                    (_.VT _.RC:$src2),
2625                                    imm:$cc,
2626                                (i32 FROUND_NO_EXC))>,
2627                     EVEX_B, Sched<[sched]>;
2628
2629  let isAsmParserOnly = 1, hasSideEffects = 0 in {
2630    defm  rrib_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2631                         (outs _.KRC:$dst),
2632                         (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2633                         "vcmp"#_.Suffix,
2634                         "$cc, {sae}, $src2, $src1",
2635                         "$src1, $src2, {sae}, $cc">,
2636                         EVEX_B, Sched<[sched]>, NotMemoryFoldable;
2637   }
2638}
2639
2640multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2641  let Predicates = [HasAVX512] in {
2642    defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2643                avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2644
2645  }
2646  let Predicates = [HasAVX512,HasVLX] in {
2647   defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2648   defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2649  }
2650}
2651
2652defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2653                          AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2654defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2655                          AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2656
2657// Patterns to select fp compares with load as first operand.
2658let Predicates = [HasAVX512] in {
2659  def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2660                            CommutableCMPCC:$cc)),
2661            (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
2662
2663  def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2664                            CommutableCMPCC:$cc)),
2665            (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
2666}
2667
2668// ----------------------------------------------------------------
2669// FPClass
2670//handle fpclass instruction  mask =  op(reg_scalar,imm)
2671//                                    op(mem_scalar,imm)
2672multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2673                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2674                                 Predicate prd> {
2675  let Predicates = [prd], ExeDomain = _.ExeDomain in {
2676      def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2677                      (ins _.RC:$src1, i32u8imm:$src2),
2678                      OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2679                      [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2680                              (i32 imm:$src2)))]>,
2681                      Sched<[sched]>;
2682      def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2683                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2684                      OpcodeStr##_.Suffix#
2685                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2686                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2687                                      (OpNode (_.VT _.RC:$src1),
2688                                      (i32 imm:$src2))))]>,
2689                      EVEX_K, Sched<[sched]>;
2690    def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2691                    (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2692                    OpcodeStr##_.Suffix##
2693                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2694                    [(set _.KRC:$dst,
2695                          (OpNode _.ScalarIntMemCPat:$src1,
2696                                  (i32 imm:$src2)))]>,
2697                    Sched<[sched.Folded, ReadAfterLd]>;
2698    def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2699                    (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2700                    OpcodeStr##_.Suffix##
2701                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2702                    [(set _.KRC:$dst,(and _.KRCWM:$mask,
2703                        (OpNode _.ScalarIntMemCPat:$src1,
2704                            (i32 imm:$src2))))]>,
2705                    EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2706  }
2707}
2708
2709//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2710//                                  fpclass(reg_vec, mem_vec, imm)
2711//                                  fpclass(reg_vec, broadcast(eltVt), imm)
2712multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2713                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2714                                 string mem, string broadcast>{
2715  let ExeDomain = _.ExeDomain in {
2716  def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2717                      (ins _.RC:$src1, i32u8imm:$src2),
2718                      OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2719                      [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2720                                       (i32 imm:$src2)))]>,
2721                      Sched<[sched]>;
2722  def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2723                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2724                      OpcodeStr##_.Suffix#
2725                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2726                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2727                                       (OpNode (_.VT _.RC:$src1),
2728                                       (i32 imm:$src2))))]>,
2729                      EVEX_K, Sched<[sched]>;
2730  def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2731                    (ins _.MemOp:$src1, i32u8imm:$src2),
2732                    OpcodeStr##_.Suffix##mem#
2733                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2734                    [(set _.KRC:$dst,(OpNode
2735                                     (_.VT (bitconvert (_.LdFrag addr:$src1))),
2736                                     (i32 imm:$src2)))]>,
2737                    Sched<[sched.Folded, ReadAfterLd]>;
2738  def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2739                    (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2740                    OpcodeStr##_.Suffix##mem#
2741                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2742                    [(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode
2743                                  (_.VT (bitconvert (_.LdFrag addr:$src1))),
2744                                  (i32 imm:$src2))))]>,
2745                    EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2746  def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2747                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2748                    OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2749                                      _.BroadcastStr##", $dst|$dst, ${src1}"
2750                                                  ##_.BroadcastStr##", $src2}",
2751                    [(set _.KRC:$dst,(OpNode
2752                                     (_.VT (X86VBroadcast
2753                                           (_.ScalarLdFrag addr:$src1))),
2754                                     (i32 imm:$src2)))]>,
2755                    EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2756  def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2757                    (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2758                    OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2759                          _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2760                                                   _.BroadcastStr##", $src2}",
2761                    [(set _.KRC:$dst,(and _.KRCWM:$mask, (OpNode
2762                                     (_.VT (X86VBroadcast
2763                                           (_.ScalarLdFrag addr:$src1))),
2764                                     (i32 imm:$src2))))]>,
2765                    EVEX_B, EVEX_K,  Sched<[sched.Folded, ReadAfterLd]>;
2766  }
2767}
2768
2769multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2770                                     bits<8> opc, SDNode OpNode,
2771                                     X86SchedWriteWidths sched, Predicate prd,
2772                                     string broadcast>{
2773  let Predicates = [prd] in {
2774    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.ZMM,
2775                                      _.info512, "{z}", broadcast>, EVEX_V512;
2776  }
2777  let Predicates = [prd, HasVLX] in {
2778    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.XMM,
2779                                      _.info128, "{x}", broadcast>, EVEX_V128;
2780    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.YMM,
2781                                      _.info256, "{y}", broadcast>, EVEX_V256;
2782  }
2783}
2784
2785multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2786                                 bits<8> opcScalar, SDNode VecOpNode,
2787                                 SDNode ScalarOpNode, X86SchedWriteWidths sched,
2788                                 Predicate prd> {
2789  defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2790                                      VecOpNode, sched, prd, "{l}">,
2791                                      EVEX_CD8<32, CD8VF>;
2792  defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2793                                      VecOpNode, sched, prd, "{q}">,
2794                                      EVEX_CD8<64, CD8VF> , VEX_W;
2795  defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2796                                   sched.Scl, f32x_info, prd>,
2797                                   EVEX_CD8<32, CD8VT1>;
2798  defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2799                                   sched.Scl, f64x_info, prd>,
2800                                   EVEX_CD8<64, CD8VT1>, VEX_W;
2801}
2802
2803defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
2804                                      X86Vfpclasss, SchedWriteFCmp, HasDQI>,
2805                                      AVX512AIi8Base, EVEX;
2806
2807//-----------------------------------------------------------------
2808// Mask register copy, including
2809// - copy between mask registers
2810// - load/store mask registers
2811// - copy from GPR to mask register and vice versa
2812//
2813multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2814                         string OpcodeStr, RegisterClass KRC,
2815                         ValueType vvt, X86MemOperand x86memop> {
2816  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2817  def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2818             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2819             Sched<[WriteMove]>;
2820  def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2821             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2822             [(set KRC:$dst, (vvt (load addr:$src)))]>,
2823             Sched<[WriteLoad]>;
2824  def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2825             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2826             [(store KRC:$src, addr:$dst)]>,
2827             Sched<[WriteStore]>;
2828}
2829
2830multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2831                             string OpcodeStr,
2832                             RegisterClass KRC, RegisterClass GRC> {
2833  let hasSideEffects = 0 in {
2834    def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2835               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2836               Sched<[WriteMove]>;
2837    def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2838               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2839               Sched<[WriteMove]>;
2840  }
2841}
2842
2843let Predicates = [HasDQI] in
2844  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2845               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2846               VEX, PD;
2847
2848let Predicates = [HasAVX512] in
2849  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2850               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2851               VEX, PS;
2852
2853let Predicates = [HasBWI] in {
2854  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2855               VEX, PD, VEX_W;
2856  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2857               VEX, XD;
2858  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2859               VEX, PS, VEX_W;
2860  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2861               VEX, XD, VEX_W;
2862}
2863
2864// GR from/to mask register
2865def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2866          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2867def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2868          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2869
2870def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2871          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2872def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2873          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2874
2875def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2876          (KMOVWrk VK16:$src)>;
2877def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2878          (COPY_TO_REGCLASS VK16:$src, GR32)>;
2879
2880def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2881          (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2882def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2883          (COPY_TO_REGCLASS VK8:$src, GR32)>;
2884
2885def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2886          (COPY_TO_REGCLASS GR32:$src, VK32)>;
2887def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2888          (COPY_TO_REGCLASS VK32:$src, GR32)>;
2889def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2890          (COPY_TO_REGCLASS GR64:$src, VK64)>;
2891def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2892          (COPY_TO_REGCLASS VK64:$src, GR64)>;
2893
2894// Load/store kreg
2895let Predicates = [HasDQI] in {
2896  def : Pat<(store VK1:$src, addr:$dst),
2897            (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2898
2899  def : Pat<(v1i1 (load addr:$src)),
2900            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2901  def : Pat<(v2i1 (load addr:$src)),
2902            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2903  def : Pat<(v4i1 (load addr:$src)),
2904            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2905}
2906
2907let Predicates = [HasAVX512] in {
2908  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2909            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2910}
2911
2912let Predicates = [HasAVX512] in {
2913  multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2914    def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2915              (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2916
2917    def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2918              (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2919  }
2920
2921  defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2922  defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2923  defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2924  defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2925  defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2926  defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2927  defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2928
2929  def : Pat<(insert_subvector (v16i1 immAllZerosV),
2930                              (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2931            (COPY_TO_REGCLASS
2932             (KMOVWkr (AND32ri8
2933                       (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2934                       (i32 1))), VK16)>;
2935}
2936
2937// Mask unary operation
2938// - KNOT
2939multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2940                            RegisterClass KRC, SDPatternOperator OpNode,
2941                            X86FoldableSchedWrite sched, Predicate prd> {
2942  let Predicates = [prd] in
2943    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2944               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2945               [(set KRC:$dst, (OpNode KRC:$src))]>,
2946               Sched<[sched]>;
2947}
2948
2949multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2950                                SDPatternOperator OpNode,
2951                                X86FoldableSchedWrite sched> {
2952  defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2953                            sched, HasDQI>, VEX, PD;
2954  defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2955                            sched, HasAVX512>, VEX, PS;
2956  defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2957                            sched, HasBWI>, VEX, PD, VEX_W;
2958  defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2959                            sched, HasBWI>, VEX, PS, VEX_W;
2960}
2961
2962// TODO - do we need a X86SchedWriteWidths::KMASK type?
2963defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2964
2965// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2966let Predicates = [HasAVX512, NoDQI] in
2967def : Pat<(vnot VK8:$src),
2968          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2969
2970def : Pat<(vnot VK4:$src),
2971          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2972def : Pat<(vnot VK2:$src),
2973          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2974
2975// Mask binary operation
2976// - KAND, KANDN, KOR, KXNOR, KXOR
2977multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2978                           RegisterClass KRC, SDPatternOperator OpNode,
2979                           X86FoldableSchedWrite sched, Predicate prd,
2980                           bit IsCommutable> {
2981  let Predicates = [prd], isCommutable = IsCommutable in
2982    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2983               !strconcat(OpcodeStr,
2984                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2985               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2986               Sched<[sched]>;
2987}
2988
2989multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2990                                 SDPatternOperator OpNode,
2991                                 X86FoldableSchedWrite sched, bit IsCommutable,
2992                                 Predicate prdW = HasAVX512> {
2993  defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2994                             sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2995  defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2996                             sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2997  defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2998                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2999  defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3000                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
3001}
3002
3003def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
3004def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
3005// These nodes use 'vnot' instead of 'not' to support vectors.
3006def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3007def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3008
3009// TODO - do we need a X86SchedWriteWidths::KMASK type?
3010defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
3011defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
3012defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
3013defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
3014defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
3015defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3016
3017multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
3018                            Instruction Inst> {
3019  // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3020  // for the DQI set, this type is legal and KxxxB instruction is used
3021  let Predicates = [NoDQI] in
3022  def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3023            (COPY_TO_REGCLASS
3024              (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3025                    (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3026
3027  // All types smaller than 8 bits require conversion anyway
3028  def : Pat<(OpNode VK1:$src1, VK1:$src2),
3029        (COPY_TO_REGCLASS (Inst
3030                           (COPY_TO_REGCLASS VK1:$src1, VK16),
3031                           (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3032  def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3033        (COPY_TO_REGCLASS (Inst
3034                           (COPY_TO_REGCLASS VK2:$src1, VK16),
3035                           (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
3036  def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3037        (COPY_TO_REGCLASS (Inst
3038                           (COPY_TO_REGCLASS VK4:$src1, VK16),
3039                           (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
3040}
3041
3042defm : avx512_binop_pat<and,   and,  KANDWrr>;
3043defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
3044defm : avx512_binop_pat<or,    or,   KORWrr>;
3045defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
3046defm : avx512_binop_pat<xor,   xor,  KXORWrr>;
3047
3048// Mask unpacking
3049multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
3050                             RegisterClass KRCSrc, X86FoldableSchedWrite sched,
3051                             Predicate prd> {
3052  let Predicates = [prd] in {
3053    let hasSideEffects = 0 in
3054    def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
3055               (ins KRC:$src1, KRC:$src2),
3056               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3057               VEX_4V, VEX_L, Sched<[sched]>;
3058
3059    def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
3060              (!cast<Instruction>(NAME##rr)
3061                        (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
3062                        (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
3063  }
3064}
3065
3066defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, WriteShuffle, HasAVX512>, PD;
3067defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, WriteShuffle, HasBWI>, PS;
3068defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, WriteShuffle, HasBWI>, PS, VEX_W;
3069
3070// Mask bit testing
3071multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3072                              SDNode OpNode, X86FoldableSchedWrite sched,
3073                              Predicate prd> {
3074  let Predicates = [prd], Defs = [EFLAGS] in
3075    def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3076               !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3077               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3078               Sched<[sched]>;
3079}
3080
3081multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3082                                X86FoldableSchedWrite sched,
3083                                Predicate prdW = HasAVX512> {
3084  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3085                                                                VEX, PD;
3086  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3087                                                                VEX, PS;
3088  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3089                                                                VEX, PS, VEX_W;
3090  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3091                                                                VEX, PD, VEX_W;
3092}
3093
3094// TODO - do we need a X86SchedWriteWidths::KMASK type?
3095defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3096defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3097
3098// Mask shift
3099multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3100                               SDNode OpNode, X86FoldableSchedWrite sched> {
3101  let Predicates = [HasAVX512] in
3102    def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3103                 !strconcat(OpcodeStr,
3104                            "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3105                            [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>,
3106                 Sched<[sched]>;
3107}
3108
3109multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3110                                 SDNode OpNode, X86FoldableSchedWrite sched> {
3111  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3112                               sched>, VEX, TAPD, VEX_W;
3113  let Predicates = [HasDQI] in
3114  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3115                               sched>, VEX, TAPD;
3116  let Predicates = [HasBWI] in {
3117  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3118                               sched>, VEX, TAPD, VEX_W;
3119  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3120                               sched>, VEX, TAPD;
3121  }
3122}
3123
3124defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3125defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3126
3127// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3128multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
3129                                              X86VectorVTInfo Narrow,
3130                                              X86VectorVTInfo Wide> {
3131  def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
3132                              (Narrow.VT Narrow.RC:$src2))),
3133          (COPY_TO_REGCLASS
3134           (!cast<Instruction>(InstStr#"Zrr")
3135            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3136            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3137           Narrow.KRC)>;
3138
3139  def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3140                             (Frag (Narrow.VT Narrow.RC:$src1),
3141                                   (Narrow.VT Narrow.RC:$src2)))),
3142          (COPY_TO_REGCLASS
3143           (!cast<Instruction>(InstStr#"Zrrk")
3144            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3145            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3146            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3147           Narrow.KRC)>;
3148}
3149
3150// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3151multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag,
3152                                                 string InstStr,
3153                                                 X86VectorVTInfo Narrow,
3154                                                 X86VectorVTInfo Wide> {
3155def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3156                                (Narrow.VT Narrow.RC:$src2), cond)),
3157          (COPY_TO_REGCLASS
3158           (!cast<Instruction>(InstStr##Zrri)
3159            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3160            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3161            (Frag.OperandTransform $cc)), Narrow.KRC)>;
3162
3163def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3164                           (Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3165                                                 (Narrow.VT Narrow.RC:$src2),
3166                                                 cond)))),
3167          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3168           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3169           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3170           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3171           (Frag.OperandTransform $cc)), Narrow.KRC)>;
3172}
3173
3174// Same as above, but for fp types which don't use PatFrags.
3175multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
3176                                                X86VectorVTInfo Narrow,
3177                                                X86VectorVTInfo Wide> {
3178def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
3179                              (Narrow.VT Narrow.RC:$src2), imm:$cc)),
3180          (COPY_TO_REGCLASS
3181           (!cast<Instruction>(InstStr##Zrri)
3182            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3183            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3184            imm:$cc), Narrow.KRC)>;
3185
3186def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3187                           (OpNode (Narrow.VT Narrow.RC:$src1),
3188                                   (Narrow.VT Narrow.RC:$src2), imm:$cc))),
3189          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3190           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3191           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3192           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3193           imm:$cc), Narrow.KRC)>;
3194}
3195
3196let Predicates = [HasAVX512, NoVLX] in {
3197  // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3198  // increase the pattern complexity the way an immediate would.
3199  let AddedComplexity = 2 in {
3200  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v8i32x_info, v16i32_info>;
3201  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v8i32x_info, v16i32_info>;
3202
3203  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v4i32x_info, v16i32_info>;
3204  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v4i32x_info, v16i32_info>;
3205
3206  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v4i64x_info, v8i64_info>;
3207  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v4i64x_info, v8i64_info>;
3208
3209  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v2i64x_info, v8i64_info>;
3210  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v2i64x_info, v8i64_info>;
3211  }
3212
3213  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v8i32x_info, v16i32_info>;
3214  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v8i32x_info, v16i32_info>;
3215
3216  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v4i32x_info, v16i32_info>;
3217  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v4i32x_info, v16i32_info>;
3218
3219  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v4i64x_info, v8i64_info>;
3220  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v4i64x_info, v8i64_info>;
3221
3222  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v2i64x_info, v8i64_info>;
3223  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v2i64x_info, v8i64_info>;
3224
3225  defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v8f32x_info, v16f32_info>;
3226  defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v4f32x_info, v16f32_info>;
3227  defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v4f64x_info, v8f64_info>;
3228  defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v2f64x_info, v8f64_info>;
3229}
3230
3231let Predicates = [HasBWI, NoVLX] in {
3232  // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3233  // increase the pattern complexity the way an immediate would.
3234  let AddedComplexity = 2 in {
3235  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v32i8x_info, v64i8_info>;
3236  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v32i8x_info, v64i8_info>;
3237
3238  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v16i8x_info, v64i8_info>;
3239  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v16i8x_info, v64i8_info>;
3240
3241  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v16i16x_info, v32i16_info>;
3242  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v16i16x_info, v32i16_info>;
3243
3244  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v8i16x_info, v32i16_info>;
3245  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v8i16x_info, v32i16_info>;
3246  }
3247
3248  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v32i8x_info, v64i8_info>;
3249  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v32i8x_info, v64i8_info>;
3250
3251  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v16i8x_info, v64i8_info>;
3252  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v16i8x_info, v64i8_info>;
3253
3254  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v16i16x_info, v32i16_info>;
3255  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v16i16x_info, v32i16_info>;
3256
3257  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v8i16x_info, v32i16_info>;
3258  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v8i16x_info, v32i16_info>;
3259}
3260
3261// Mask setting all 0s or 1s
3262multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3263  let Predicates = [HasAVX512] in
3264    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3265        SchedRW = [WriteZero] in
3266      def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3267                     [(set KRC:$dst, (VT Val))]>;
3268}
3269
3270multiclass avx512_mask_setop_w<PatFrag Val> {
3271  defm W : avx512_mask_setop<VK16, v16i1, Val>;
3272  defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3273  defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3274}
3275
3276defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3277defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3278
3279// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3280let Predicates = [HasAVX512] in {
3281  def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3282  def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3283  def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3284  def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3285  def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3286  def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3287  def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3288  def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3289}
3290
3291// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3292multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3293                                             RegisterClass RC, ValueType VT> {
3294  def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3295            (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3296
3297  def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3298            (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3299}
3300defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3301defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3302defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3303defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3304defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3305defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3306
3307defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3308defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3309defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3310defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3311defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3312
3313defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3314defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3315defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3316defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3317
3318defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3319defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3320defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3321
3322defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3323defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3324
3325defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3326
3327//===----------------------------------------------------------------------===//
3328// AVX-512 - Aligned and unaligned load and store
3329//
3330
3331multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3332                       X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3333                       X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3334                       bit NoRMPattern = 0,
3335                       SDPatternOperator SelectOprr = vselect> {
3336  let hasSideEffects = 0 in {
3337  let isMoveReg = 1 in
3338  def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3339                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3340                    _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3341                    EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3342  def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3343                      (ins _.KRCWM:$mask,  _.RC:$src),
3344                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3345                       "${dst} {${mask}} {z}, $src}"),
3346                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3347                                           (_.VT _.RC:$src),
3348                                           _.ImmAllZerosV)))], _.ExeDomain>,
3349                       EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3350
3351  let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3352  def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3353                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3354                    !if(NoRMPattern, [],
3355                        [(set _.RC:$dst,
3356                          (_.VT (bitconvert (ld_frag addr:$src))))]),
3357                    _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3358                    EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3359
3360  let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3361    def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3362                      (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3363                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3364                      "${dst} {${mask}}, $src1}"),
3365                      [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3366                                          (_.VT _.RC:$src1),
3367                                          (_.VT _.RC:$src0))))], _.ExeDomain>,
3368                       EVEX, EVEX_K, Sched<[Sched.RR]>;
3369    def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3370                     (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3371                     !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3372                      "${dst} {${mask}}, $src1}"),
3373                     [(set _.RC:$dst, (_.VT
3374                         (vselect _.KRCWM:$mask,
3375                          (_.VT (bitconvert (ld_frag addr:$src1))),
3376                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3377                     EVEX, EVEX_K, Sched<[Sched.RM]>;
3378  }
3379  def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3380                  (ins _.KRCWM:$mask, _.MemOp:$src),
3381                  OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3382                                "${dst} {${mask}} {z}, $src}",
3383                  [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3384                    (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
3385                  _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3386  }
3387  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3388            (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3389
3390  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3391            (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3392
3393  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3394            (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0,
3395             _.KRCWM:$mask, addr:$ptr)>;
3396}
3397
3398multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3399                                 AVX512VLVectorVTInfo _, Predicate prd,
3400                                 X86SchedWriteMoveLSWidths Sched,
3401                                 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3402  let Predicates = [prd] in
3403  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3404                       _.info512.AlignedLdFrag, masked_load_aligned512,
3405                       Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3406
3407  let Predicates = [prd, HasVLX] in {
3408  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3409                          _.info256.AlignedLdFrag, masked_load_aligned256,
3410                          Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3411  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3412                          _.info128.AlignedLdFrag, masked_load_aligned128,
3413                          Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3414  }
3415}
3416
3417multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3418                          AVX512VLVectorVTInfo _, Predicate prd,
3419                          X86SchedWriteMoveLSWidths Sched,
3420                          string EVEX2VEXOvrd, bit NoRMPattern = 0,
3421                          SDPatternOperator SelectOprr = vselect> {
3422  let Predicates = [prd] in
3423  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3424                       masked_load_unaligned, Sched.ZMM, "",
3425                       NoRMPattern, SelectOprr>, EVEX_V512;
3426
3427  let Predicates = [prd, HasVLX] in {
3428  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3429                         masked_load_unaligned, Sched.YMM, EVEX2VEXOvrd#"Y",
3430                         NoRMPattern, SelectOprr>, EVEX_V256;
3431  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3432                         masked_load_unaligned, Sched.XMM, EVEX2VEXOvrd,
3433                         NoRMPattern, SelectOprr>, EVEX_V128;
3434  }
3435}
3436
3437multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3438                        X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3439                        X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3440                        bit NoMRPattern = 0> {
3441  let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3442  let isMoveReg = 1 in
3443  def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3444                         OpcodeStr # "\t{$src, $dst|$dst, $src}",
3445                         [], _.ExeDomain>, EVEX,
3446                         FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3447                         EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3448  def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3449                         (ins _.KRCWM:$mask, _.RC:$src),
3450                         OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3451                         "${dst} {${mask}}, $src}",
3452                         [], _.ExeDomain>,  EVEX, EVEX_K,
3453                         FoldGenData<BaseName#_.ZSuffix#rrk>,
3454                         Sched<[Sched.RR]>;
3455  def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3456                          (ins _.KRCWM:$mask, _.RC:$src),
3457                          OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3458                          "${dst} {${mask}} {z}, $src}",
3459                          [], _.ExeDomain>, EVEX, EVEX_KZ,
3460                          FoldGenData<BaseName#_.ZSuffix#rrkz>,
3461                          Sched<[Sched.RR]>;
3462  }
3463
3464  let hasSideEffects = 0, mayStore = 1 in
3465  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3466                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3467                    !if(NoMRPattern, [],
3468                        [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3469                    _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3470                    EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3471  def mrk : AVX512PI<opc, MRMDestMem, (outs),
3472                     (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3473              OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3474               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3475               NotMemoryFoldable;
3476
3477  def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
3478           (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3479                                                        _.KRCWM:$mask, _.RC:$src)>;
3480
3481  def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3482                  (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3483                   _.RC:$dst, _.RC:$src), 0>;
3484  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3485                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3486                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3487  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3488                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3489                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3490}
3491
3492multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3493                            AVX512VLVectorVTInfo _, Predicate prd,
3494                            X86SchedWriteMoveLSWidths Sched,
3495                            string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3496  let Predicates = [prd] in
3497  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3498                        masked_store_unaligned, Sched.ZMM, "",
3499                        NoMRPattern>, EVEX_V512;
3500  let Predicates = [prd, HasVLX] in {
3501    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3502                             masked_store_unaligned, Sched.YMM,
3503                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3504    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3505                             masked_store_unaligned, Sched.XMM, EVEX2VEXOvrd,
3506                             NoMRPattern>, EVEX_V128;
3507  }
3508}
3509
3510multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3511                                  AVX512VLVectorVTInfo _, Predicate prd,
3512                                  X86SchedWriteMoveLSWidths Sched,
3513                                  string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3514  let Predicates = [prd] in
3515  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3516                        masked_store_aligned512, Sched.ZMM, "",
3517                        NoMRPattern>, EVEX_V512;
3518
3519  let Predicates = [prd, HasVLX] in {
3520    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3521                             masked_store_aligned256, Sched.YMM,
3522                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3523    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3524                             masked_store_aligned128, Sched.XMM, EVEX2VEXOvrd,
3525                             NoMRPattern>, EVEX_V128;
3526  }
3527}
3528
3529defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3530                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3531               avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3532                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3533               PS, EVEX_CD8<32, CD8VF>;
3534
3535defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3536                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3537               avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3538                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3539               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3540
3541defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3542                              SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3543               avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3544                               SchedWriteFMoveLS, "VMOVUPS">,
3545                               PS, EVEX_CD8<32, CD8VF>;
3546
3547defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3548                              SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3549               avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3550                               SchedWriteFMoveLS, "VMOVUPD">,
3551               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3552
3553defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3554                                       HasAVX512, SchedWriteVecMoveLS,
3555                                       "VMOVDQA", 1>,
3556                 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3557                                        HasAVX512, SchedWriteVecMoveLS,
3558                                        "VMOVDQA", 1>,
3559                 PD, EVEX_CD8<32, CD8VF>;
3560
3561defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3562                                       HasAVX512, SchedWriteVecMoveLS,
3563                                       "VMOVDQA">,
3564                 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3565                                        HasAVX512, SchedWriteVecMoveLS,
3566                                        "VMOVDQA">,
3567                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3568
3569defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3570                               SchedWriteVecMoveLS, "VMOVDQU", 1>,
3571                avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3572                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3573                XD, EVEX_CD8<8, CD8VF>;
3574
3575defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3576                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3577                 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3578                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3579                 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3580
3581defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3582                                SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3583                 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3584                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3585                 XS, EVEX_CD8<32, CD8VF>;
3586
3587defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3588                                SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3589                 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3590                                 SchedWriteVecMoveLS, "VMOVDQU">,
3591                 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3592
3593/*
3594// Special instructions to help with spilling when we don't have VLX. We need
3595// to load or store from a ZMM register instead. These are converted in
3596// expandPostRAPseudos.
3597let isReMaterializable = 1, canFoldAsLoad = 1,
3598    isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3599def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3600                            "", []>, Sched<[WriteFLoadX]>;
3601def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3602                            "", []>, Sched<[WriteFLoadY]>;
3603def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3604                            "", []>, Sched<[WriteFLoadX]>;
3605def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3606                            "", []>, Sched<[WriteFLoadY]>;
3607}
3608
3609let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3610def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3611                            "", []>, Sched<[WriteFStoreX]>;
3612def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3613                            "", []>, Sched<[WriteFStoreY]>;
3614def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3615                            "", []>, Sched<[WriteFStoreX]>;
3616def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3617                            "", []>, Sched<[WriteFStoreY]>;
3618}
3619*/
3620
3621def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
3622                          (v8i64 VR512:$src))),
3623   (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3624                                              VK8), VR512:$src)>;
3625
3626def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3627                           (v16i32 VR512:$src))),
3628                  (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3629
3630// These patterns exist to prevent the above patterns from introducing a second
3631// mask inversion when one already exists.
3632def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3633                          (bc_v8i64 (v16i32 immAllZerosV)),
3634                          (v8i64 VR512:$src))),
3635                 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3636def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3637                           (v16i32 immAllZerosV),
3638                           (v16i32 VR512:$src))),
3639                  (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3640
3641multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3642                              X86VectorVTInfo Wide> {
3643 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3644                               Narrow.RC:$src1, Narrow.RC:$src0)),
3645           (EXTRACT_SUBREG
3646            (Wide.VT
3647             (!cast<Instruction>(InstrStr#"rrk")
3648              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3649              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3650              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3651            Narrow.SubRegIdx)>;
3652
3653 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3654                               Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3655           (EXTRACT_SUBREG
3656            (Wide.VT
3657             (!cast<Instruction>(InstrStr#"rrkz")
3658              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3659              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3660            Narrow.SubRegIdx)>;
3661}
3662
3663// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3664// available. Use a 512-bit operation and extract.
3665let Predicates = [HasAVX512, NoVLX] in {
3666  defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3667  defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3668  defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3669  defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3670
3671  defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3672  defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3673  defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3674  defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3675}
3676
3677let Predicates = [HasBWI, NoVLX] in {
3678  defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3679  defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3680
3681  defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3682  defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3683}
3684
3685let Predicates = [HasAVX512] in {
3686  // 512-bit store.
3687  def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3688            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3689  def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3690            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3691  def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3692            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3693  def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3694            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3695  def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3696            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3697  def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3698            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3699}
3700
3701let Predicates = [HasVLX] in {
3702  // 128-bit store.
3703  def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3704            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3705  def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3706            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3707  def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3708            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3709  def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3710            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3711  def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3712            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3713  def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3714            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3715
3716  // 256-bit store.
3717  def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3718            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3719  def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3720            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3721  def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3722            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3723  def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3724            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3725  def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3726            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3727  def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3728            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3729}
3730
3731multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From,
3732                                   X86VectorVTInfo To, X86VectorVTInfo Cast> {
3733  def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3734                              (bitconvert
3735                               (To.VT (extract_subvector
3736                                       (From.VT From.RC:$src), (iPTR 0)))),
3737                              To.RC:$src0)),
3738            (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
3739                      Cast.RC:$src0, Cast.KRCWM:$mask,
3740                      (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>;
3741
3742  def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3743                              (bitconvert
3744                               (To.VT (extract_subvector
3745                                       (From.VT From.RC:$src), (iPTR 0)))),
3746                              Cast.ImmAllZerosV)),
3747            (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
3748                      Cast.KRCWM:$mask,
3749                      (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>;
3750}
3751
3752
3753let Predicates = [HasVLX] in {
3754// A masked extract from the first 128-bits of a 256-bit vector can be
3755// implemented with masked move.
3756defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info,  v2i64x_info, v2i64x_info>;
3757defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info,  v4i32x_info, v2i64x_info>;
3758defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>;
3759defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info,  v16i8x_info, v2i64x_info>;
3760defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info,  v2i64x_info, v4i32x_info>;
3761defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info,  v4i32x_info, v4i32x_info>;
3762defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>;
3763defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info,  v16i8x_info, v4i32x_info>;
3764defm : masked_move_for_extract<"VMOVAPDZ128",   v4f64x_info,  v2f64x_info, v2f64x_info>;
3765defm : masked_move_for_extract<"VMOVAPDZ128",   v8f32x_info,  v4f32x_info, v2f64x_info>;
3766defm : masked_move_for_extract<"VMOVAPSZ128",   v4f64x_info,  v2f64x_info, v4f32x_info>;
3767defm : masked_move_for_extract<"VMOVAPSZ128",   v8f32x_info,  v4f32x_info, v4f32x_info>;
3768
3769// A masked extract from the first 128-bits of a 512-bit vector can be
3770// implemented with masked move.
3771defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info,  v2i64x_info, v2i64x_info>;
3772defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>;
3773defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>;
3774defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info,  v16i8x_info, v2i64x_info>;
3775defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info,  v2i64x_info, v4i32x_info>;
3776defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>;
3777defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>;
3778defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info,  v16i8x_info, v4i32x_info>;
3779defm : masked_move_for_extract<"VMOVAPDZ128",   v8f64_info,  v2f64x_info, v2f64x_info>;
3780defm : masked_move_for_extract<"VMOVAPDZ128",   v16f32_info, v4f32x_info, v2f64x_info>;
3781defm : masked_move_for_extract<"VMOVAPSZ128",   v8f64_info,  v2f64x_info, v4f32x_info>;
3782defm : masked_move_for_extract<"VMOVAPSZ128",   v16f32_info, v4f32x_info, v4f32x_info>;
3783
3784// A masked extract from the first 256-bits of a 512-bit vector can be
3785// implemented with masked move.
3786defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info,  v4i64x_info,  v4i64x_info>;
3787defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info,  v4i64x_info>;
3788defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>;
3789defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info,  v32i8x_info,  v4i64x_info>;
3790defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info,  v4i64x_info,  v8i32x_info>;
3791defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info,  v8i32x_info>;
3792defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>;
3793defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info,  v32i8x_info,  v8i32x_info>;
3794defm : masked_move_for_extract<"VMOVAPDZ256",   v8f64_info,  v4f64x_info,  v4f64x_info>;
3795defm : masked_move_for_extract<"VMOVAPDZ256",   v16f32_info, v8f32x_info,  v4f64x_info>;
3796defm : masked_move_for_extract<"VMOVAPSZ256",   v8f64_info,  v4f64x_info,  v8f32x_info>;
3797defm : masked_move_for_extract<"VMOVAPSZ256",   v16f32_info, v8f32x_info,  v8f32x_info>;
3798}
3799
3800// Move Int Doubleword to Packed Double Int
3801//
3802let ExeDomain = SSEPackedInt in {
3803def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3804                      "vmovd\t{$src, $dst|$dst, $src}",
3805                      [(set VR128X:$dst,
3806                        (v4i32 (scalar_to_vector GR32:$src)))]>,
3807                        EVEX, Sched<[WriteVecMoveFromGpr]>;
3808def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3809                      "vmovd\t{$src, $dst|$dst, $src}",
3810                      [(set VR128X:$dst,
3811                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3812                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3813def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3814                      "vmovq\t{$src, $dst|$dst, $src}",
3815                        [(set VR128X:$dst,
3816                          (v2i64 (scalar_to_vector GR64:$src)))]>,
3817                      EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3818let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3819def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3820                      (ins i64mem:$src),
3821                      "vmovq\t{$src, $dst|$dst, $src}", []>,
3822                      EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3823let isCodeGenOnly = 1 in {
3824def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3825                       "vmovq\t{$src, $dst|$dst, $src}",
3826                       [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3827                       EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3828def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
3829                      "vmovq\t{$src, $dst|$dst, $src}",
3830                      [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
3831                      EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3832def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3833                         "vmovq\t{$src, $dst|$dst, $src}",
3834                         [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3835                         EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3836def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
3837                         "vmovq\t{$src, $dst|$dst, $src}",
3838                         [(store (i64 (bitconvert FR64X:$src)), addr:$dst)]>,
3839                         EVEX, VEX_W, Sched<[WriteVecStore]>,
3840                         EVEX_CD8<64, CD8VT1>;
3841}
3842} // ExeDomain = SSEPackedInt
3843
3844// Move Int Doubleword to Single Scalar
3845//
3846let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3847def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3848                      "vmovd\t{$src, $dst|$dst, $src}",
3849                      [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3850                      EVEX, Sched<[WriteVecMoveFromGpr]>;
3851
3852def VMOVDI2SSZrm  : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
3853                      "vmovd\t{$src, $dst|$dst, $src}",
3854                      [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))]>,
3855                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3856} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3857
3858// Move doubleword from xmm register to r/m32
3859//
3860let ExeDomain = SSEPackedInt in {
3861def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3862                       "vmovd\t{$src, $dst|$dst, $src}",
3863                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3864                                        (iPTR 0)))]>,
3865                       EVEX, Sched<[WriteVecMoveToGpr]>;
3866def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3867                       (ins i32mem:$dst, VR128X:$src),
3868                       "vmovd\t{$src, $dst|$dst, $src}",
3869                       [(store (i32 (extractelt (v4i32 VR128X:$src),
3870                                     (iPTR 0))), addr:$dst)]>,
3871                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3872} // ExeDomain = SSEPackedInt
3873
3874// Move quadword from xmm1 register to r/m64
3875//
3876let ExeDomain = SSEPackedInt in {
3877def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3878                      "vmovq\t{$src, $dst|$dst, $src}",
3879                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3880                                                   (iPTR 0)))]>,
3881                      PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3882                      Requires<[HasAVX512]>;
3883
3884let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3885def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3886                      "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3887                      EVEX, VEX_W, Sched<[WriteVecStore]>,
3888                      Requires<[HasAVX512, In64BitMode]>;
3889
3890def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3891                      (ins i64mem:$dst, VR128X:$src),
3892                      "vmovq\t{$src, $dst|$dst, $src}",
3893                      [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3894                              addr:$dst)]>,
3895                      EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3896                      Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3897
3898let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3899def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3900                             (ins VR128X:$src),
3901                             "vmovq\t{$src, $dst|$dst, $src}", []>,
3902                             EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3903} // ExeDomain = SSEPackedInt
3904
3905def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3906                (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3907
3908// Move Scalar Single to Double Int
3909//
3910let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3911def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3912                      (ins FR32X:$src),
3913                      "vmovd\t{$src, $dst|$dst, $src}",
3914                      [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3915                      EVEX, Sched<[WriteVecMoveToGpr]>;
3916def VMOVSS2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3917                      (ins i32mem:$dst, FR32X:$src),
3918                      "vmovd\t{$src, $dst|$dst, $src}",
3919                      [(store (i32 (bitconvert FR32X:$src)), addr:$dst)]>,
3920                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3921} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3922
3923// Move Quadword Int to Packed Quadword Int
3924//
3925let ExeDomain = SSEPackedInt in {
3926def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3927                      (ins i64mem:$src),
3928                      "vmovq\t{$src, $dst|$dst, $src}",
3929                      [(set VR128X:$dst,
3930                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3931                      EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3932} // ExeDomain = SSEPackedInt
3933
3934// Allow "vmovd" but print "vmovq".
3935def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3936                (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3937def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3938                (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3939
3940//===----------------------------------------------------------------------===//
3941// AVX-512  MOVSS, MOVSD
3942//===----------------------------------------------------------------------===//
3943
3944multiclass avx512_move_scalar<string asm, SDNode OpNode,
3945                              X86VectorVTInfo _> {
3946  let Predicates = [HasAVX512, OptForSize] in
3947  def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3948             (ins _.RC:$src1, _.RC:$src2),
3949             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3950             [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3951             _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3952  def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3953              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3954              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3955              "$dst {${mask}} {z}, $src1, $src2}"),
3956              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3957                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3958                                      _.ImmAllZerosV)))],
3959              _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3960  let Constraints = "$src0 = $dst"  in
3961  def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3962             (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3963             !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3964             "$dst {${mask}}, $src1, $src2}"),
3965             [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3966                                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3967                                     (_.VT _.RC:$src0))))],
3968             _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3969  let canFoldAsLoad = 1, isReMaterializable = 1 in
3970  def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3971             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3972             [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3973             _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3974  let mayLoad = 1, hasSideEffects = 0 in {
3975    let Constraints = "$src0 = $dst" in
3976    def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3977               (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3978               !strconcat(asm, "\t{$src, $dst {${mask}}|",
3979               "$dst {${mask}}, $src}"),
3980               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3981    def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3982               (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3983               !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3984               "$dst {${mask}} {z}, $src}"),
3985               [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3986  }
3987  def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3988             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3989             [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
3990             EVEX, Sched<[WriteFStore]>;
3991  let mayStore = 1, hasSideEffects = 0 in
3992  def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3993              (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
3994              !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3995              [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
3996              NotMemoryFoldable;
3997}
3998
3999defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
4000                                  VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
4001
4002defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
4003                                  VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4004
4005
4006multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4007                                       PatLeaf ZeroFP, X86VectorVTInfo _> {
4008
4009def : Pat<(_.VT (OpNode _.RC:$src0,
4010                        (_.VT (scalar_to_vector
4011                                  (_.EltVT (X86selects VK1WM:$mask,
4012                                                       (_.EltVT _.FRC:$src1),
4013                                                       (_.EltVT _.FRC:$src2))))))),
4014          (!cast<Instruction>(InstrStr#rrk)
4015                        (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4016                        VK1WM:$mask,
4017                        (_.VT _.RC:$src0),
4018                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4019
4020def : Pat<(_.VT (OpNode _.RC:$src0,
4021                        (_.VT (scalar_to_vector
4022                                  (_.EltVT (X86selects VK1WM:$mask,
4023                                                       (_.EltVT _.FRC:$src1),
4024                                                       (_.EltVT ZeroFP))))))),
4025          (!cast<Instruction>(InstrStr#rrkz)
4026                        VK1WM:$mask,
4027                        (_.VT _.RC:$src0),
4028                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4029}
4030
4031multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4032                                        dag Mask, RegisterClass MaskRC> {
4033
4034def : Pat<(masked_store addr:$dst, Mask,
4035             (_.info512.VT (insert_subvector undef,
4036                               (_.info128.VT _.info128.RC:$src),
4037                               (iPTR 0)))),
4038          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4039                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4040                      (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4041
4042}
4043
4044multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4045                                               AVX512VLVectorVTInfo _,
4046                                               dag Mask, RegisterClass MaskRC,
4047                                               SubRegIndex subreg> {
4048
4049def : Pat<(masked_store addr:$dst, Mask,
4050             (_.info512.VT (insert_subvector undef,
4051                               (_.info128.VT _.info128.RC:$src),
4052                               (iPTR 0)))),
4053          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4054                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4055                      (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4056
4057}
4058
4059// This matches the more recent codegen from clang that avoids emitting a 512
4060// bit masked store directly. Codegen will widen 128-bit masked store to 512
4061// bits on AVX512F only targets.
4062multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4063                                               AVX512VLVectorVTInfo _,
4064                                               dag Mask512, dag Mask128,
4065                                               RegisterClass MaskRC,
4066                                               SubRegIndex subreg> {
4067
4068// AVX512F pattern.
4069def : Pat<(masked_store addr:$dst, Mask512,
4070             (_.info512.VT (insert_subvector undef,
4071                               (_.info128.VT _.info128.RC:$src),
4072                               (iPTR 0)))),
4073          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4074                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4075                      (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4076
4077// AVX512VL pattern.
4078def : Pat<(masked_store addr:$dst, Mask128, (_.info128.VT _.info128.RC:$src)),
4079          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4080                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4081                      (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4082}
4083
4084multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4085                                       dag Mask, RegisterClass MaskRC> {
4086
4087def : Pat<(_.info128.VT (extract_subvector
4088                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4089                                        (_.info512.VT (bitconvert
4090                                                       (v16i32 immAllZerosV))))),
4091                           (iPTR 0))),
4092          (!cast<Instruction>(InstrStr#rmkz)
4093                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4094                      addr:$srcAddr)>;
4095
4096def : Pat<(_.info128.VT (extract_subvector
4097                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4098                      (_.info512.VT (insert_subvector undef,
4099                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4100                            (iPTR 0))))),
4101                (iPTR 0))),
4102          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4103                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4104                      addr:$srcAddr)>;
4105
4106}
4107
4108multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4109                                              AVX512VLVectorVTInfo _,
4110                                              dag Mask, RegisterClass MaskRC,
4111                                              SubRegIndex subreg> {
4112
4113def : Pat<(_.info128.VT (extract_subvector
4114                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4115                                        (_.info512.VT (bitconvert
4116                                                       (v16i32 immAllZerosV))))),
4117                           (iPTR 0))),
4118          (!cast<Instruction>(InstrStr#rmkz)
4119                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4120                      addr:$srcAddr)>;
4121
4122def : Pat<(_.info128.VT (extract_subvector
4123                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4124                      (_.info512.VT (insert_subvector undef,
4125                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4126                            (iPTR 0))))),
4127                (iPTR 0))),
4128          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4129                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4130                      addr:$srcAddr)>;
4131
4132}
4133
4134// This matches the more recent codegen from clang that avoids emitting a 512
4135// bit masked load directly. Codegen will widen 128-bit masked load to 512
4136// bits on AVX512F only targets.
4137multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4138                                              AVX512VLVectorVTInfo _,
4139                                              dag Mask512, dag Mask128,
4140                                              RegisterClass MaskRC,
4141                                              SubRegIndex subreg> {
4142// AVX512F patterns.
4143def : Pat<(_.info128.VT (extract_subvector
4144                         (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4145                                        (_.info512.VT (bitconvert
4146                                                       (v16i32 immAllZerosV))))),
4147                           (iPTR 0))),
4148          (!cast<Instruction>(InstrStr#rmkz)
4149                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4150                      addr:$srcAddr)>;
4151
4152def : Pat<(_.info128.VT (extract_subvector
4153                (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4154                      (_.info512.VT (insert_subvector undef,
4155                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4156                            (iPTR 0))))),
4157                (iPTR 0))),
4158          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4159                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4160                      addr:$srcAddr)>;
4161
4162// AVX512Vl patterns.
4163def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4164                         (_.info128.VT (bitconvert (v4i32 immAllZerosV))))),
4165          (!cast<Instruction>(InstrStr#rmkz)
4166                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4167                      addr:$srcAddr)>;
4168
4169def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4170                         (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4171          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4172                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4173                      addr:$srcAddr)>;
4174}
4175
4176defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4177defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4178
4179defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4180                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4181defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4182                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4183defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4184                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4185
4186defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4187                   (v16i1 (insert_subvector
4188                           (v16i1 immAllZerosV),
4189                           (v4i1 (extract_subvector
4190                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4191                                  (iPTR 0))),
4192                           (iPTR 0))),
4193                   (v4i1 (extract_subvector
4194                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4195                          (iPTR 0))), GR8, sub_8bit>;
4196defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4197                   (v8i1
4198                    (extract_subvector
4199                     (v16i1
4200                      (insert_subvector
4201                       (v16i1 immAllZerosV),
4202                       (v2i1 (extract_subvector
4203                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4204                              (iPTR 0))),
4205                       (iPTR 0))),
4206                     (iPTR 0))),
4207                   (v2i1 (extract_subvector
4208                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4209                          (iPTR 0))), GR8, sub_8bit>;
4210
4211defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4212                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4213defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4214                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4215defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4216                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4217
4218defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4219                   (v16i1 (insert_subvector
4220                           (v16i1 immAllZerosV),
4221                           (v4i1 (extract_subvector
4222                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4223                                  (iPTR 0))),
4224                           (iPTR 0))),
4225                   (v4i1 (extract_subvector
4226                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4227                          (iPTR 0))), GR8, sub_8bit>;
4228defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4229                   (v8i1
4230                    (extract_subvector
4231                     (v16i1
4232                      (insert_subvector
4233                       (v16i1 immAllZerosV),
4234                       (v2i1 (extract_subvector
4235                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4236                              (iPTR 0))),
4237                       (iPTR 0))),
4238                     (iPTR 0))),
4239                   (v2i1 (extract_subvector
4240                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4241                          (iPTR 0))), GR8, sub_8bit>;
4242
4243def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4244          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4245           (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4246           VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4247           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4248
4249def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4250          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4251           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4252
4253def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4254          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4255           (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4256           VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4257           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4258
4259def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)),
4260          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4261           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4262
4263let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4264  def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4265                           (ins VR128X:$src1, VR128X:$src2),
4266                           "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4267                           []>, XS, EVEX_4V, VEX_LIG,
4268                           FoldGenData<"VMOVSSZrr">,
4269                           Sched<[SchedWriteFShuffle.XMM]>;
4270
4271  let Constraints = "$src0 = $dst" in
4272  def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4273                             (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4274                                                   VR128X:$src1, VR128X:$src2),
4275                             "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4276                                        "$dst {${mask}}, $src1, $src2}",
4277                             []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4278                             FoldGenData<"VMOVSSZrrk">,
4279                             Sched<[SchedWriteFShuffle.XMM]>;
4280
4281  def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4282                         (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4283                         "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4284                                    "$dst {${mask}} {z}, $src1, $src2}",
4285                         []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4286                         FoldGenData<"VMOVSSZrrkz">,
4287                         Sched<[SchedWriteFShuffle.XMM]>;
4288
4289  def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4290                           (ins VR128X:$src1, VR128X:$src2),
4291                           "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4292                           []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4293                           FoldGenData<"VMOVSDZrr">,
4294                           Sched<[SchedWriteFShuffle.XMM]>;
4295
4296  let Constraints = "$src0 = $dst" in
4297  def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4298                             (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4299                                                   VR128X:$src1, VR128X:$src2),
4300                             "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4301                                        "$dst {${mask}}, $src1, $src2}",
4302                             []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4303                             VEX_W, FoldGenData<"VMOVSDZrrk">,
4304                             Sched<[SchedWriteFShuffle.XMM]>;
4305
4306  def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4307                              (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4308                                                          VR128X:$src2),
4309                              "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4310                                         "$dst {${mask}} {z}, $src1, $src2}",
4311                              []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4312                              VEX_W, FoldGenData<"VMOVSDZrrkz">,
4313                              Sched<[SchedWriteFShuffle.XMM]>;
4314}
4315
4316def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4317                (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4318def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4319                             "$dst {${mask}}, $src1, $src2}",
4320                (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4321                                VR128X:$src1, VR128X:$src2), 0>;
4322def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4323                             "$dst {${mask}} {z}, $src1, $src2}",
4324                (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4325                                 VR128X:$src1, VR128X:$src2), 0>;
4326def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4327                (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4328def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4329                             "$dst {${mask}}, $src1, $src2}",
4330                (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4331                                VR128X:$src1, VR128X:$src2), 0>;
4332def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4333                             "$dst {${mask}} {z}, $src1, $src2}",
4334                (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4335                                 VR128X:$src1, VR128X:$src2), 0>;
4336
4337let Predicates = [HasAVX512, OptForSize] in {
4338  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4339            (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4340  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4341            (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4342
4343  // Move low f32 and clear high bits.
4344  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4345            (SUBREG_TO_REG (i32 0),
4346             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4347              (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4348  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4349            (SUBREG_TO_REG (i32 0),
4350             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4351              (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4352
4353  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4354            (SUBREG_TO_REG (i32 0),
4355             (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4356              (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), sub_xmm)>;
4357  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4358            (SUBREG_TO_REG (i32 0),
4359             (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
4360              (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), sub_xmm)>;
4361
4362  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4363            (SUBREG_TO_REG (i32 0),
4364             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4365              (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4366  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4367            (SUBREG_TO_REG (i32 0),
4368             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4369              (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4370
4371  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4372            (SUBREG_TO_REG (i32 0),
4373             (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4374              (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), sub_xmm)>;
4375
4376  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4377            (SUBREG_TO_REG (i32 0),
4378             (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
4379              (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), sub_xmm)>;
4380
4381}
4382
4383// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4384// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4385let Predicates = [HasAVX512, OptForSpeed] in {
4386  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4387            (SUBREG_TO_REG (i32 0),
4388             (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4389                          (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4390                          (i8 1))), sub_xmm)>;
4391  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4392            (SUBREG_TO_REG (i32 0),
4393             (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4394                          (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4395                          (i8 3))), sub_xmm)>;
4396
4397  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4398            (SUBREG_TO_REG (i32 0),
4399             (v2f64 (VBLENDPDrri (v2f64 (V_SET0)),
4400                          (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)),
4401                          (i8 1))), sub_xmm)>;
4402  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4403            (SUBREG_TO_REG (i32 0),
4404             (v2i64 (VPBLENDWrri (v2i64 (V_SET0)),
4405                          (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)),
4406                          (i8 0xf))), sub_xmm)>;
4407}
4408
4409let Predicates = [HasAVX512] in {
4410
4411  // MOVSSrm zeros the high parts of the register; represent this
4412  // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4413  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
4414            (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4415  def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
4416            (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4417  def : Pat<(v4f32 (X86vzload addr:$src)),
4418            (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4419
4420  // MOVSDrm zeros the high parts of the register; represent this
4421  // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4422  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
4423            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4424  def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
4425            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4426  def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
4427            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4428  def : Pat<(v2f64 (X86vzload addr:$src)),
4429            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4430
4431  // Represent the same patterns above but in the form they appear for
4432  // 256-bit types
4433  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4434                   (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4435            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4436  def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
4437                   (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4438            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4439  def : Pat<(v8f32 (X86vzload addr:$src)),
4440            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4441  def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
4442                   (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4443            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4444  def : Pat<(v4f64 (X86vzload addr:$src)),
4445            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4446
4447  // Represent the same patterns above but in the form they appear for
4448  // 512-bit types
4449  def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4450                   (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4451            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4452  def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
4453                   (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4454            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4455  def : Pat<(v16f32 (X86vzload addr:$src)),
4456            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4457  def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
4458                   (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4459            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4460  def : Pat<(v8f64 (X86vzload addr:$src)),
4461            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4462
4463  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4464                   (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
4465            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4466
4467  // Extract and store.
4468  def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
4469                   addr:$dst),
4470            (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
4471}
4472
4473let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4474def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4475                                (ins VR128X:$src),
4476                                "vmovq\t{$src, $dst|$dst, $src}",
4477                                [(set VR128X:$dst, (v2i64 (X86vzmovl
4478                                                   (v2i64 VR128X:$src))))]>,
4479                                EVEX, VEX_W;
4480}
4481
4482let Predicates = [HasAVX512] in {
4483  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4484            (VMOVDI2PDIZrr GR32:$src)>;
4485
4486  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4487            (VMOV64toPQIZrr GR64:$src)>;
4488
4489  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4490                               (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4491            (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIZrr GR64:$src)), sub_xmm)>;
4492
4493  def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
4494                               (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4495            (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIZrr GR64:$src)), sub_xmm)>;
4496
4497  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4498  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4499            (VMOVDI2PDIZrm addr:$src)>;
4500  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
4501            (VMOVDI2PDIZrm addr:$src)>;
4502  def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
4503            (VMOVDI2PDIZrm addr:$src)>;
4504  def : Pat<(v4i32 (X86vzload addr:$src)),
4505            (VMOVDI2PDIZrm addr:$src)>;
4506  def : Pat<(v8i32 (X86vzload addr:$src)),
4507            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4508  def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
4509            (VMOVQI2PQIZrm addr:$src)>;
4510  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4511            (VMOVZPQILo2PQIZrr VR128X:$src)>;
4512  def : Pat<(v2i64 (X86vzload addr:$src)),
4513            (VMOVQI2PQIZrm addr:$src)>;
4514  def : Pat<(v4i64 (X86vzload addr:$src)),
4515            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4516
4517  // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
4518  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4519                               (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4520            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrr GR32:$src)), sub_xmm)>;
4521  def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4522                                (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4523            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrr GR32:$src)), sub_xmm)>;
4524
4525  // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4526  def : Pat<(v16i32 (X86vzload addr:$src)),
4527            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4528  def : Pat<(v8i64 (X86vzload addr:$src)),
4529            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4530}
4531
4532//===----------------------------------------------------------------------===//
4533// AVX-512 - Non-temporals
4534//===----------------------------------------------------------------------===//
4535
4536def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4537                      (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4538                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4539                      EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4540
4541let Predicates = [HasVLX] in {
4542  def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4543                       (ins i256mem:$src),
4544                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4545                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4546                       EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4547
4548  def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4549                      (ins i128mem:$src),
4550                      "vmovntdqa\t{$src, $dst|$dst, $src}",
4551                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4552                      EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4553}
4554
4555multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4556                        X86SchedWriteMoveLS Sched,
4557                        PatFrag st_frag = alignednontemporalstore> {
4558  let SchedRW = [Sched.MR], AddedComplexity = 400 in
4559  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4560                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4561                    [(st_frag (_.VT _.RC:$src), addr:$dst)],
4562                    _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4563}
4564
4565multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4566                           AVX512VLVectorVTInfo VTInfo,
4567                           X86SchedWriteMoveLSWidths Sched> {
4568  let Predicates = [HasAVX512] in
4569    defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4570
4571  let Predicates = [HasAVX512, HasVLX] in {
4572    defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4573    defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4574  }
4575}
4576
4577defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4578                                SchedWriteVecMoveLSNT>, PD;
4579defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4580                                SchedWriteFMoveLSNT>, PD, VEX_W;
4581defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4582                                SchedWriteFMoveLSNT>, PS;
4583
4584let Predicates = [HasAVX512], AddedComplexity = 400 in {
4585  def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4586            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4587  def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4588            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4589  def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4590            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4591
4592  def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4593            (VMOVNTDQAZrm addr:$src)>;
4594  def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4595            (VMOVNTDQAZrm addr:$src)>;
4596  def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4597            (VMOVNTDQAZrm addr:$src)>;
4598}
4599
4600let Predicates = [HasVLX], AddedComplexity = 400 in {
4601  def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4602            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4603  def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4604            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4605  def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4606            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4607
4608  def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4609            (VMOVNTDQAZ256rm addr:$src)>;
4610  def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4611            (VMOVNTDQAZ256rm addr:$src)>;
4612  def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4613            (VMOVNTDQAZ256rm addr:$src)>;
4614
4615  def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4616            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4617  def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4618            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4619  def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4620            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4621
4622  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4623            (VMOVNTDQAZ128rm addr:$src)>;
4624  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4625            (VMOVNTDQAZ128rm addr:$src)>;
4626  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4627            (VMOVNTDQAZ128rm addr:$src)>;
4628}
4629
4630//===----------------------------------------------------------------------===//
4631// AVX-512 - Integer arithmetic
4632//
4633multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4634                           X86VectorVTInfo _, X86FoldableSchedWrite sched,
4635                           bit IsCommutable = 0> {
4636  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4637                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4638                    "$src2, $src1", "$src1, $src2",
4639                    (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4640                    IsCommutable>, AVX512BIBase, EVEX_4V,
4641                    Sched<[sched]>;
4642
4643  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4644                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4645                  "$src2, $src1", "$src1, $src2",
4646                  (_.VT (OpNode _.RC:$src1,
4647                                (bitconvert (_.LdFrag addr:$src2))))>,
4648                  AVX512BIBase, EVEX_4V,
4649                  Sched<[sched.Folded, ReadAfterLd]>;
4650}
4651
4652multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4653                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4654                            bit IsCommutable = 0> :
4655           avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4656  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4657                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4658                  "${src2}"##_.BroadcastStr##", $src1",
4659                  "$src1, ${src2}"##_.BroadcastStr,
4660                  (_.VT (OpNode _.RC:$src1,
4661                                (X86VBroadcast
4662                                    (_.ScalarLdFrag addr:$src2))))>,
4663                  AVX512BIBase, EVEX_4V, EVEX_B,
4664                  Sched<[sched.Folded, ReadAfterLd]>;
4665}
4666
4667multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4668                              AVX512VLVectorVTInfo VTInfo,
4669                              X86SchedWriteWidths sched, Predicate prd,
4670                              bit IsCommutable = 0> {
4671  let Predicates = [prd] in
4672    defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4673                             IsCommutable>, EVEX_V512;
4674
4675  let Predicates = [prd, HasVLX] in {
4676    defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4677                                sched.YMM, IsCommutable>, EVEX_V256;
4678    defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4679                                sched.XMM, IsCommutable>, EVEX_V128;
4680  }
4681}
4682
4683multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4684                               AVX512VLVectorVTInfo VTInfo,
4685                               X86SchedWriteWidths sched, Predicate prd,
4686                               bit IsCommutable = 0> {
4687  let Predicates = [prd] in
4688    defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4689                             IsCommutable>, EVEX_V512;
4690
4691  let Predicates = [prd, HasVLX] in {
4692    defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4693                                 sched.YMM, IsCommutable>, EVEX_V256;
4694    defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4695                                 sched.XMM, IsCommutable>, EVEX_V128;
4696  }
4697}
4698
4699multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4700                                X86SchedWriteWidths sched, Predicate prd,
4701                                bit IsCommutable = 0> {
4702  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4703                                  sched, prd, IsCommutable>,
4704                                  VEX_W, EVEX_CD8<64, CD8VF>;
4705}
4706
4707multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4708                                X86SchedWriteWidths sched, Predicate prd,
4709                                bit IsCommutable = 0> {
4710  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4711                                  sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4712}
4713
4714multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4715                                X86SchedWriteWidths sched, Predicate prd,
4716                                bit IsCommutable = 0> {
4717  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4718                                 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4719                                 VEX_WIG;
4720}
4721
4722multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4723                                X86SchedWriteWidths sched, Predicate prd,
4724                                bit IsCommutable = 0> {
4725  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4726                                 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4727                                 VEX_WIG;
4728}
4729
4730multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4731                                 SDNode OpNode, X86SchedWriteWidths sched,
4732                                 Predicate prd, bit IsCommutable = 0> {
4733  defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4734                                   IsCommutable>;
4735
4736  defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4737                                   IsCommutable>;
4738}
4739
4740multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4741                                 SDNode OpNode, X86SchedWriteWidths sched,
4742                                 Predicate prd, bit IsCommutable = 0> {
4743  defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4744                                   IsCommutable>;
4745
4746  defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4747                                   IsCommutable>;
4748}
4749
4750multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4751                                  bits<8> opc_d, bits<8> opc_q,
4752                                  string OpcodeStr, SDNode OpNode,
4753                                  X86SchedWriteWidths sched,
4754                                  bit IsCommutable = 0> {
4755  defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4756                                    sched, HasAVX512, IsCommutable>,
4757              avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4758                                    sched, HasBWI, IsCommutable>;
4759}
4760
4761multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4762                            X86FoldableSchedWrite sched,
4763                            SDNode OpNode,X86VectorVTInfo _Src,
4764                            X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4765                            bit IsCommutable = 0> {
4766  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4767                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4768                            "$src2, $src1","$src1, $src2",
4769                            (_Dst.VT (OpNode
4770                                         (_Src.VT _Src.RC:$src1),
4771                                         (_Src.VT _Src.RC:$src2))),
4772                            IsCommutable>,
4773                            AVX512BIBase, EVEX_4V, Sched<[sched]>;
4774  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4775                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4776                        "$src2, $src1", "$src1, $src2",
4777                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4778                                      (bitconvert (_Src.LdFrag addr:$src2))))>,
4779                        AVX512BIBase, EVEX_4V,
4780                        Sched<[sched.Folded, ReadAfterLd]>;
4781
4782  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4783                    (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4784                    OpcodeStr,
4785                    "${src2}"##_Brdct.BroadcastStr##", $src1",
4786                     "$src1, ${src2}"##_Brdct.BroadcastStr,
4787                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4788                                 (_Brdct.VT (X86VBroadcast
4789                                          (_Brdct.ScalarLdFrag addr:$src2))))))>,
4790                    AVX512BIBase, EVEX_4V, EVEX_B,
4791                    Sched<[sched.Folded, ReadAfterLd]>;
4792}
4793
4794defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4795                                    SchedWriteVecALU, 1>;
4796defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4797                                    SchedWriteVecALU, 0>;
4798defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
4799                                    SchedWriteVecALU, HasBWI, 1>;
4800defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
4801                                    SchedWriteVecALU, HasBWI, 0>;
4802defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
4803                                     SchedWriteVecALU, HasBWI, 1>;
4804defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
4805                                     SchedWriteVecALU, HasBWI, 0>;
4806defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4807                                    SchedWritePMULLD, HasAVX512, 1>, T8PD;
4808defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4809                                    SchedWriteVecIMul, HasBWI, 1>;
4810defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4811                                    SchedWriteVecIMul, HasDQI, 1>, T8PD,
4812                                    NotEVEX2VEXConvertible;
4813defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4814                                    HasBWI, 1>;
4815defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4816                                     HasBWI, 1>;
4817defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4818                                      SchedWriteVecIMul, HasBWI, 1>, T8PD;
4819defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4820                                   SchedWriteVecALU, HasBWI, 1>;
4821defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4822                                    SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4823defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4824                                     SchedWriteVecIMul, HasAVX512, 1>;
4825
4826multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4827                            X86SchedWriteWidths sched,
4828                            AVX512VLVectorVTInfo _SrcVTInfo,
4829                            AVX512VLVectorVTInfo _DstVTInfo,
4830                            SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
4831  let Predicates = [prd] in
4832    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4833                                 _SrcVTInfo.info512, _DstVTInfo.info512,
4834                                 v8i64_info, IsCommutable>,
4835                                  EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4836  let Predicates = [HasVLX, prd] in {
4837    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4838                                      _SrcVTInfo.info256, _DstVTInfo.info256,
4839                                      v4i64x_info, IsCommutable>,
4840                                      EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4841    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4842                                      _SrcVTInfo.info128, _DstVTInfo.info128,
4843                                      v2i64x_info, IsCommutable>,
4844                                     EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4845  }
4846}
4847
4848defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4849                                avx512vl_i8_info, avx512vl_i8_info,
4850                                X86multishift, HasVBMI, 0>, T8PD;
4851
4852multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4853                            X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4854                            X86FoldableSchedWrite sched> {
4855  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4856                    (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4857                    OpcodeStr,
4858                    "${src2}"##_Src.BroadcastStr##", $src1",
4859                     "$src1, ${src2}"##_Src.BroadcastStr,
4860                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4861                                 (_Src.VT (X86VBroadcast
4862                                          (_Src.ScalarLdFrag addr:$src2))))))>,
4863                    EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4864                    Sched<[sched.Folded, ReadAfterLd]>;
4865}
4866
4867multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4868                            SDNode OpNode,X86VectorVTInfo _Src,
4869                            X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4870                            bit IsCommutable = 0> {
4871  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4872                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4873                            "$src2, $src1","$src1, $src2",
4874                            (_Dst.VT (OpNode
4875                                         (_Src.VT _Src.RC:$src1),
4876                                         (_Src.VT _Src.RC:$src2))),
4877                            IsCommutable>,
4878                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4879  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4880                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4881                        "$src2, $src1", "$src1, $src2",
4882                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4883                                      (bitconvert (_Src.LdFrag addr:$src2))))>,
4884                         EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4885                         Sched<[sched.Folded, ReadAfterLd]>;
4886}
4887
4888multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4889                                    SDNode OpNode> {
4890  let Predicates = [HasBWI] in
4891  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4892                                 v32i16_info, SchedWriteShuffle.ZMM>,
4893                avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4894                                 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4895  let Predicates = [HasBWI, HasVLX] in {
4896    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4897                                     v16i16x_info, SchedWriteShuffle.YMM>,
4898                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4899                                      v16i16x_info, SchedWriteShuffle.YMM>,
4900                                      EVEX_V256;
4901    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4902                                     v8i16x_info, SchedWriteShuffle.XMM>,
4903                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4904                                      v8i16x_info, SchedWriteShuffle.XMM>,
4905                                      EVEX_V128;
4906  }
4907}
4908multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4909                            SDNode OpNode> {
4910  let Predicates = [HasBWI] in
4911  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4912                                SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4913  let Predicates = [HasBWI, HasVLX] in {
4914    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4915                                     v32i8x_info, SchedWriteShuffle.YMM>,
4916                                     EVEX_V256, VEX_WIG;
4917    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4918                                     v16i8x_info, SchedWriteShuffle.XMM>,
4919                                     EVEX_V128, VEX_WIG;
4920  }
4921}
4922
4923multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4924                            SDNode OpNode, AVX512VLVectorVTInfo _Src,
4925                            AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4926  let Predicates = [HasBWI] in
4927  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4928                                _Dst.info512, SchedWriteVecIMul.ZMM,
4929                                IsCommutable>, EVEX_V512;
4930  let Predicates = [HasBWI, HasVLX] in {
4931    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4932                                     _Dst.info256, SchedWriteVecIMul.YMM,
4933                                     IsCommutable>, EVEX_V256;
4934    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4935                                     _Dst.info128, SchedWriteVecIMul.XMM,
4936                                     IsCommutable>, EVEX_V128;
4937  }
4938}
4939
4940defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4941defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4942defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4943defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4944
4945defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4946                     avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4947defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4948                     avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4949
4950defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4951                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4952defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4953                                    SchedWriteVecALU, HasBWI, 1>;
4954defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4955                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4956defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4957                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4958                                    NotEVEX2VEXConvertible;
4959
4960defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4961                                    SchedWriteVecALU, HasBWI, 1>;
4962defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4963                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4964defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4965                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4966defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4967                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4968                                    NotEVEX2VEXConvertible;
4969
4970defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4971                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4972defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4973                                    SchedWriteVecALU, HasBWI, 1>;
4974defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4975                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4976defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4977                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4978                                    NotEVEX2VEXConvertible;
4979
4980defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4981                                    SchedWriteVecALU, HasBWI, 1>;
4982defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4983                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4984defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4985                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4986defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4987                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4988                                    NotEVEX2VEXConvertible;
4989
4990// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4991let Predicates = [HasDQI, NoVLX] in {
4992  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4993            (EXTRACT_SUBREG
4994                (VPMULLQZrr
4995                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4996                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4997             sub_ymm)>;
4998
4999  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5000            (EXTRACT_SUBREG
5001                (VPMULLQZrr
5002                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5003                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5004             sub_xmm)>;
5005}
5006
5007// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5008let Predicates = [HasDQI, NoVLX] in {
5009  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5010            (EXTRACT_SUBREG
5011                (VPMULLQZrr
5012                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5013                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5014             sub_ymm)>;
5015
5016  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5017            (EXTRACT_SUBREG
5018                (VPMULLQZrr
5019                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5020                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5021             sub_xmm)>;
5022}
5023
5024multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
5025  def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5026            (EXTRACT_SUBREG
5027                (Instr
5028                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5029                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5030             sub_ymm)>;
5031
5032  def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5033            (EXTRACT_SUBREG
5034                (Instr
5035                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5036                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5037             sub_xmm)>;
5038}
5039
5040let Predicates = [HasAVX512, NoVLX] in {
5041  defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
5042  defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
5043  defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
5044  defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
5045}
5046
5047//===----------------------------------------------------------------------===//
5048// AVX-512  Logical Instructions
5049//===----------------------------------------------------------------------===//
5050
5051// OpNodeMsk is the OpNode to use when element size is important. OpNode will
5052// be set to null_frag for 32-bit elements.
5053multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr,
5054                           SDPatternOperator OpNode,
5055                           SDNode OpNodeMsk, X86FoldableSchedWrite sched,
5056                           X86VectorVTInfo _, bit IsCommutable = 0> {
5057  let hasSideEffects = 0 in
5058  defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
5059                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5060                    "$src2, $src1", "$src1, $src2",
5061                    (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
5062                                     (bitconvert (_.VT _.RC:$src2)))),
5063                    (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
5064                                                          _.RC:$src2)))),
5065                    IsCommutable>, AVX512BIBase, EVEX_4V,
5066                    Sched<[sched]>;
5067
5068  let hasSideEffects = 0, mayLoad = 1 in
5069  defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
5070                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5071                  "$src2, $src1", "$src1, $src2",
5072                  (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
5073                                   (bitconvert (_.LdFrag addr:$src2)))),
5074                  (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
5075                                     (bitconvert (_.LdFrag addr:$src2))))))>,
5076                  AVX512BIBase, EVEX_4V,
5077                  Sched<[sched.Folded, ReadAfterLd]>;
5078}
5079
5080// OpNodeMsk is the OpNode to use where element size is important. So use
5081// for all of the broadcast patterns.
5082multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
5083                            SDPatternOperator OpNode,
5084                            SDNode OpNodeMsk, X86FoldableSchedWrite sched, X86VectorVTInfo _,
5085                            bit IsCommutable = 0> :
5086           avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, sched, _,
5087                           IsCommutable> {
5088  defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
5089                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5090                  "${src2}"##_.BroadcastStr##", $src1",
5091                  "$src1, ${src2}"##_.BroadcastStr,
5092                  (_.i64VT (OpNodeMsk _.RC:$src1,
5093                                   (bitconvert
5094                                    (_.VT (X86VBroadcast
5095                                            (_.ScalarLdFrag addr:$src2)))))),
5096                  (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
5097                                     (bitconvert
5098                                      (_.VT (X86VBroadcast
5099                                             (_.ScalarLdFrag addr:$src2))))))))>,
5100                  AVX512BIBase, EVEX_4V, EVEX_B,
5101                  Sched<[sched.Folded, ReadAfterLd]>;
5102}
5103
5104multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
5105                               SDPatternOperator OpNode,
5106                               SDNode OpNodeMsk, X86SchedWriteWidths sched,
5107                               AVX512VLVectorVTInfo VTInfo,
5108                               bit IsCommutable = 0> {
5109  let Predicates = [HasAVX512] in
5110    defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.ZMM,
5111                              VTInfo.info512, IsCommutable>, EVEX_V512;
5112
5113  let Predicates = [HasAVX512, HasVLX] in {
5114    defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.YMM,
5115                                 VTInfo.info256, IsCommutable>, EVEX_V256;
5116    defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.XMM,
5117                                 VTInfo.info128, IsCommutable>, EVEX_V128;
5118  }
5119}
5120
5121multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
5122                                 SDNode OpNode, X86SchedWriteWidths sched,
5123                                 bit IsCommutable = 0> {
5124  defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, sched,
5125                               avx512vl_i64_info, IsCommutable>,
5126                               VEX_W, EVEX_CD8<64, CD8VF>;
5127  defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, sched,
5128                               avx512vl_i32_info, IsCommutable>,
5129                               EVEX_CD8<32, CD8VF>;
5130}
5131
5132defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5133                                   SchedWriteVecLogic, 1>;
5134defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5135                                  SchedWriteVecLogic, 1>;
5136defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5137                                   SchedWriteVecLogic, 1>;
5138defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5139                                    SchedWriteVecLogic>;
5140
5141//===----------------------------------------------------------------------===//
5142// AVX-512  FP arithmetic
5143//===----------------------------------------------------------------------===//
5144
5145multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5146                            SDNode OpNode, SDNode VecNode,
5147                            X86FoldableSchedWrite sched, bit IsCommutable> {
5148  let ExeDomain = _.ExeDomain in {
5149  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5150                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5151                           "$src2, $src1", "$src1, $src2",
5152                           (_.VT (VecNode _.RC:$src1, _.RC:$src2,
5153                                          (i32 FROUND_CURRENT)))>,
5154                           Sched<[sched]>;
5155
5156  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5157                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5158                         "$src2, $src1", "$src1, $src2",
5159                         (_.VT (VecNode _.RC:$src1,
5160                                        _.ScalarIntMemCPat:$src2,
5161                                        (i32 FROUND_CURRENT)))>,
5162                         Sched<[sched.Folded, ReadAfterLd]>;
5163  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5164  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5165                         (ins _.FRC:$src1, _.FRC:$src2),
5166                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5167                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5168                          Sched<[sched]> {
5169    let isCommutable = IsCommutable;
5170  }
5171  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5172                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5173                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5174                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5175                         (_.ScalarLdFrag addr:$src2)))]>,
5176                         Sched<[sched.Folded, ReadAfterLd]>;
5177  }
5178  }
5179}
5180
5181multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5182                                  SDNode VecNode, X86FoldableSchedWrite sched,
5183                                  bit IsCommutable = 0> {
5184  let ExeDomain = _.ExeDomain in
5185  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5186                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5187                          "$rc, $src2, $src1", "$src1, $src2, $rc",
5188                          (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5189                          (i32 imm:$rc)), IsCommutable>,
5190                          EVEX_B, EVEX_RC, Sched<[sched]>;
5191}
5192multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5193                                SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5194                                X86FoldableSchedWrite sched, bit IsCommutable> {
5195  let ExeDomain = _.ExeDomain in {
5196  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5197                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5198                           "$src2, $src1", "$src1, $src2",
5199                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5200                           Sched<[sched]>;
5201
5202  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5203                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5204                         "$src2, $src1", "$src1, $src2",
5205                         (_.VT (VecNode _.RC:$src1,
5206                                        _.ScalarIntMemCPat:$src2))>,
5207                         Sched<[sched.Folded, ReadAfterLd]>;
5208
5209  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5210  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5211                         (ins _.FRC:$src1, _.FRC:$src2),
5212                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5213                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5214                          Sched<[sched]> {
5215    let isCommutable = IsCommutable;
5216  }
5217  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5218                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5219                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5220                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5221                         (_.ScalarLdFrag addr:$src2)))]>,
5222                         Sched<[sched.Folded, ReadAfterLd]>;
5223  }
5224
5225  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5226                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5227                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5228                            (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5229                            (i32 FROUND_NO_EXC))>, EVEX_B,
5230                            Sched<[sched]>;
5231  }
5232}
5233
5234multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5235                                SDNode VecNode, X86SchedWriteSizes sched,
5236                                bit IsCommutable> {
5237  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5238                              sched.PS.Scl, IsCommutable>,
5239             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
5240                              sched.PS.Scl, IsCommutable>,
5241                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5242  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5243                              sched.PD.Scl, IsCommutable>,
5244             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
5245                              sched.PD.Scl, IsCommutable>,
5246                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5247}
5248
5249multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5250                              SDNode VecNode, SDNode SaeNode,
5251                              X86SchedWriteSizes sched, bit IsCommutable> {
5252  defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5253                              VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
5254                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5255  defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5256                              VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
5257                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5258}
5259defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds,
5260                                 SchedWriteFAddSizes, 1>;
5261defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds,
5262                                 SchedWriteFMulSizes, 1>;
5263defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds,
5264                                 SchedWriteFAddSizes, 0>;
5265defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds,
5266                                 SchedWriteFDivSizes, 0>;
5267defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
5268                               SchedWriteFCmpSizes, 0>;
5269defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
5270                               SchedWriteFCmpSizes, 0>;
5271
5272// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5273// X86fminc and X86fmaxc instead of X86fmin and X86fmax
5274multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5275                                    X86VectorVTInfo _, SDNode OpNode,
5276                                    X86FoldableSchedWrite sched> {
5277  let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5278  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5279                         (ins _.FRC:$src1, _.FRC:$src2),
5280                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5281                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5282                          Sched<[sched]> {
5283    let isCommutable = 1;
5284  }
5285  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5286                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5287                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5288                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5289                         (_.ScalarLdFrag addr:$src2)))]>,
5290                         Sched<[sched.Folded, ReadAfterLd]>;
5291  }
5292}
5293defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5294                                         SchedWriteFCmp.Scl>, XS, EVEX_4V,
5295                                         VEX_LIG, EVEX_CD8<32, CD8VT1>;
5296
5297defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5298                                         SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5299                                         VEX_LIG, EVEX_CD8<64, CD8VT1>;
5300
5301defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5302                                         SchedWriteFCmp.Scl>, XS, EVEX_4V,
5303                                         VEX_LIG, EVEX_CD8<32, CD8VT1>;
5304
5305defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5306                                         SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5307                                         VEX_LIG, EVEX_CD8<64, CD8VT1>;
5308
5309multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5310                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
5311                            bit IsCommutable,
5312                            bit IsKZCommutable = IsCommutable> {
5313  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5314  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5315                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5316                  "$src2, $src1", "$src1, $src2",
5317                  (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable, 0,
5318                  IsKZCommutable>,
5319                  EVEX_4V, Sched<[sched]>;
5320  let mayLoad = 1 in {
5321    defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5322                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5323                    "$src2, $src1", "$src1, $src2",
5324                    (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5325                    EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
5326    defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5327                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5328                     "${src2}"##_.BroadcastStr##", $src1",
5329                     "$src1, ${src2}"##_.BroadcastStr,
5330                     (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
5331                                                (_.ScalarLdFrag addr:$src2))))>,
5332                     EVEX_4V, EVEX_B,
5333                     Sched<[sched.Folded, ReadAfterLd]>;
5334    }
5335  }
5336}
5337
5338multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5339                                  SDPatternOperator OpNodeRnd,
5340                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5341  let ExeDomain = _.ExeDomain in
5342  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5343                  (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
5344                  "$rc, $src2, $src1", "$src1, $src2, $rc",
5345                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc)))>,
5346                  EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5347}
5348
5349multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5350                                SDPatternOperator OpNodeRnd,
5351                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5352  let ExeDomain = _.ExeDomain in
5353  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5354                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5355                  "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5356                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC)))>,
5357                  EVEX_4V, EVEX_B, Sched<[sched]>;
5358}
5359
5360multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5361                             Predicate prd, X86SchedWriteSizes sched,
5362                             bit IsCommutable = 0,
5363                             bit IsPD128Commutable = IsCommutable> {
5364  let Predicates = [prd] in {
5365  defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
5366                              sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5367                              EVEX_CD8<32, CD8VF>;
5368  defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
5369                              sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5370                              EVEX_CD8<64, CD8VF>;
5371  }
5372
5373    // Define only if AVX512VL feature is present.
5374  let Predicates = [prd, HasVLX] in {
5375    defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
5376                                   sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5377                                   EVEX_CD8<32, CD8VF>;
5378    defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
5379                                   sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5380                                   EVEX_CD8<32, CD8VF>;
5381    defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
5382                                   sched.PD.XMM, IsPD128Commutable,
5383                                   IsCommutable>, EVEX_V128, PD, VEX_W,
5384                                   EVEX_CD8<64, CD8VF>;
5385    defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
5386                                   sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5387                                   EVEX_CD8<64, CD8VF>;
5388  }
5389}
5390
5391multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5392                                   X86SchedWriteSizes sched> {
5393  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5394                                    v16f32_info>,
5395                                    EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5396  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5397                                    v8f64_info>,
5398                                    EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5399}
5400
5401multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5402                                 X86SchedWriteSizes sched> {
5403  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5404                                  v16f32_info>,
5405                                  EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5406  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5407                                  v8f64_info>,
5408                                  EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5409}
5410
5411defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
5412                              SchedWriteFAddSizes, 1>,
5413            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5414defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
5415                              SchedWriteFMulSizes, 1>,
5416            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5417defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
5418                              SchedWriteFAddSizes>,
5419            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5420defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
5421                              SchedWriteFDivSizes>,
5422            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5423defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5424                              SchedWriteFCmpSizes, 0>,
5425            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmpSizes>;
5426defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5427                              SchedWriteFCmpSizes, 0>,
5428            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmpSizes>;
5429let isCodeGenOnly = 1 in {
5430  defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5431                                 SchedWriteFCmpSizes, 1>;
5432  defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5433                                 SchedWriteFCmpSizes, 1>;
5434}
5435defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
5436                               SchedWriteFLogicSizes, 1>;
5437defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
5438                               SchedWriteFLogicSizes, 0>;
5439defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
5440                               SchedWriteFLogicSizes, 1>;
5441defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
5442                               SchedWriteFLogicSizes, 1>;
5443
5444// Patterns catch floating point selects with bitcasted integer logic ops.
5445multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
5446                                      X86VectorVTInfo _, Predicate prd> {
5447let Predicates = [prd] in {
5448  // Masked register-register logical operations.
5449  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5450                   (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5451                   _.RC:$src0)),
5452            (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5453             _.RC:$src1, _.RC:$src2)>;
5454  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5455                   (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5456                   _.ImmAllZerosV)),
5457            (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5458             _.RC:$src2)>;
5459  // Masked register-memory logical operations.
5460  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5461                   (bitconvert (_.i64VT (OpNode _.RC:$src1,
5462                                         (load addr:$src2)))),
5463                   _.RC:$src0)),
5464            (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5465             _.RC:$src1, addr:$src2)>;
5466  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5467                   (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
5468                   _.ImmAllZerosV)),
5469            (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5470             addr:$src2)>;
5471  // Register-broadcast logical operations.
5472  def : Pat<(_.i64VT (OpNode _.RC:$src1,
5473                      (bitconvert (_.VT (X86VBroadcast
5474                                         (_.ScalarLdFrag addr:$src2)))))),
5475            (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
5476  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5477                   (bitconvert
5478                    (_.i64VT (OpNode _.RC:$src1,
5479                              (bitconvert (_.VT
5480                                           (X86VBroadcast
5481                                            (_.ScalarLdFrag addr:$src2))))))),
5482                   _.RC:$src0)),
5483            (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5484             _.RC:$src1, addr:$src2)>;
5485  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5486                   (bitconvert
5487                    (_.i64VT (OpNode _.RC:$src1,
5488                              (bitconvert (_.VT
5489                                           (X86VBroadcast
5490                                            (_.ScalarLdFrag addr:$src2))))))),
5491                   _.ImmAllZerosV)),
5492            (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5493             _.RC:$src1, addr:$src2)>;
5494}
5495}
5496
5497multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
5498  defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
5499  defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
5500  defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
5501  defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
5502  defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
5503  defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
5504}
5505
5506defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
5507defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
5508defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
5509defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
5510
5511let Predicates = [HasVLX,HasDQI] in {
5512  // Use packed logical operations for scalar ops.
5513  def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
5514            (COPY_TO_REGCLASS
5515             (v2f64 (VANDPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
5516                                  (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
5517             FR64X)>;
5518  def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
5519            (COPY_TO_REGCLASS
5520             (v2f64 (VORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
5521                                 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
5522             FR64X)>;
5523  def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
5524            (COPY_TO_REGCLASS
5525             (v2f64 (VXORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
5526                                  (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
5527             FR64X)>;
5528  def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
5529            (COPY_TO_REGCLASS
5530             (v2f64 (VANDNPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
5531                                   (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
5532             FR64X)>;
5533
5534  def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
5535            (COPY_TO_REGCLASS
5536             (v4f32 (VANDPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
5537                                  (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
5538             FR32X)>;
5539  def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
5540            (COPY_TO_REGCLASS
5541             (v4f32 (VORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
5542                                 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
5543             FR32X)>;
5544  def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
5545            (COPY_TO_REGCLASS
5546             (v4f32 (VXORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
5547                                  (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
5548             FR32X)>;
5549  def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
5550            (COPY_TO_REGCLASS
5551             (v4f32 (VANDNPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
5552                                   (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
5553             FR32X)>;
5554}
5555
5556multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5557                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5558  let ExeDomain = _.ExeDomain in {
5559  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5560                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5561                  "$src2, $src1", "$src1, $src2",
5562                  (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>,
5563                  EVEX_4V, Sched<[sched]>;
5564  defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5565                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5566                  "$src2, $src1", "$src1, $src2",
5567                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>,
5568                  EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
5569  defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5570                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5571                   "${src2}"##_.BroadcastStr##", $src1",
5572                   "$src1, ${src2}"##_.BroadcastStr,
5573                   (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
5574                                              (_.ScalarLdFrag addr:$src2))),
5575                                              (i32 FROUND_CURRENT))>,
5576                   EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
5577  }
5578}
5579
5580multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5581                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5582  let ExeDomain = _.ExeDomain in {
5583  defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5584                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5585                  "$src2, $src1", "$src1, $src2",
5586                  (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>,
5587                  Sched<[sched]>;
5588  defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5589                  (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5590                  "$src2, $src1", "$src1, $src2",
5591                  (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
5592                          (i32 FROUND_CURRENT))>,
5593                  Sched<[sched.Folded, ReadAfterLd]>;
5594  }
5595}
5596
5597multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5598                                SDNode OpNode, SDNode OpNodeScal,
5599                                X86SchedWriteWidths sched> {
5600  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
5601             avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
5602                              EVEX_V512, EVEX_CD8<32, CD8VF>;
5603  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
5604             avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
5605                              EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5606  defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f32x_info>,
5607             avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, sched.Scl>,
5608                           EVEX_4V,EVEX_CD8<32, CD8VT1>;
5609  defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f64x_info>,
5610             avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, sched.Scl>,
5611                           EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
5612
5613  // Define only if AVX512VL feature is present.
5614  let Predicates = [HasVLX] in {
5615    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v4f32x_info>,
5616                                   EVEX_V128, EVEX_CD8<32, CD8VF>;
5617    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v8f32x_info>,
5618                                   EVEX_V256, EVEX_CD8<32, CD8VF>;
5619    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v2f64x_info>,
5620                                   EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5621    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v4f64x_info>,
5622                                   EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5623  }
5624}
5625defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs,
5626                                    SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5627
5628//===----------------------------------------------------------------------===//
5629// AVX-512  VPTESTM instructions
5630//===----------------------------------------------------------------------===//
5631
5632multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5633                         X86FoldableSchedWrite sched, X86VectorVTInfo _,
5634                         string Name> {
5635  let ExeDomain = _.ExeDomain in {
5636  let isCommutable = 1 in
5637  defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5638                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5639                      "$src2, $src1", "$src1, $src2",
5640                   (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5641                           _.ImmAllZerosV)>,
5642                   EVEX_4V, Sched<[sched]>;
5643  defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5644                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5645                       "$src2, $src1", "$src1, $src2",
5646                   (OpNode (bitconvert
5647                            (_.i64VT (and _.RC:$src1,
5648                                          (bitconvert (_.LdFrag addr:$src2))))),
5649                           _.ImmAllZerosV)>,
5650                   EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5651                   Sched<[sched.Folded, ReadAfterLd]>;
5652  }
5653
5654  // Patterns for compare with 0 that just use the same source twice.
5655  def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5656            (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rr")
5657                                      _.RC:$src, _.RC:$src))>;
5658
5659  def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5660            (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rrk")
5661                                      _.KRC:$mask, _.RC:$src, _.RC:$src))>;
5662}
5663
5664multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5665                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5666  let ExeDomain = _.ExeDomain in
5667  defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5668                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5669                    "${src2}"##_.BroadcastStr##", $src1",
5670                    "$src1, ${src2}"##_.BroadcastStr,
5671                    (OpNode (and _.RC:$src1,
5672                                       (X86VBroadcast
5673                                        (_.ScalarLdFrag addr:$src2))),
5674                            _.ImmAllZerosV)>,
5675                    EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5676                    Sched<[sched.Folded, ReadAfterLd]>;
5677}
5678
5679// Use 512bit version to implement 128/256 bit in case NoVLX.
5680multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
5681                                  X86VectorVTInfo _, string Name> {
5682  def : Pat<(_.KVT (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5683                           _.ImmAllZerosV)),
5684            (_.KVT (COPY_TO_REGCLASS
5685                     (!cast<Instruction>(Name # "Zrr")
5686                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5687                                      _.RC:$src1, _.SubRegIdx),
5688                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5689                                      _.RC:$src2, _.SubRegIdx)),
5690                   _.KRC))>;
5691
5692  def : Pat<(_.KVT (and _.KRC:$mask,
5693                        (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5694                                _.ImmAllZerosV))),
5695            (COPY_TO_REGCLASS
5696             (!cast<Instruction>(Name # "Zrrk")
5697              (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5698              (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5699                             _.RC:$src1, _.SubRegIdx),
5700              (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5701                             _.RC:$src2, _.SubRegIdx)),
5702             _.KRC)>;
5703
5704  def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5705            (_.KVT (COPY_TO_REGCLASS
5706                     (!cast<Instruction>(Name # "Zrr")
5707                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5708                                      _.RC:$src, _.SubRegIdx),
5709                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5710                                      _.RC:$src, _.SubRegIdx)),
5711                   _.KRC))>;
5712
5713  def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5714            (COPY_TO_REGCLASS
5715             (!cast<Instruction>(Name # "Zrrk")
5716              (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5717              (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5718                             _.RC:$src, _.SubRegIdx),
5719              (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5720                             _.RC:$src, _.SubRegIdx)),
5721             _.KRC)>;
5722}
5723
5724multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5725                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5726  let Predicates  = [HasAVX512] in
5727  defm Z : avx512_vptest<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, NAME>,
5728           avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5729
5730  let Predicates = [HasAVX512, HasVLX] in {
5731  defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, sched.YMM, _.info256, NAME>,
5732              avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5733  defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, sched.XMM, _.info128, NAME>,
5734              avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5735  }
5736  let Predicates = [HasAVX512, NoVLX] in {
5737  defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, NAME>;
5738  defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, NAME>;
5739  }
5740}
5741
5742multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5743                            X86SchedWriteWidths sched> {
5744  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, sched,
5745                                 avx512vl_i32_info>;
5746  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, sched,
5747                                 avx512vl_i64_info>, VEX_W;
5748}
5749
5750multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5751                            PatFrag OpNode, X86SchedWriteWidths sched> {
5752  let Predicates = [HasBWI] in {
5753  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.ZMM,
5754                            v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5755  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.ZMM,
5756                            v64i8_info, NAME#"B">, EVEX_V512;
5757  }
5758  let Predicates = [HasVLX, HasBWI] in {
5759
5760  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.YMM,
5761                            v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5762  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.XMM,
5763                            v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5764  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.YMM,
5765                            v32i8x_info, NAME#"B">, EVEX_V256;
5766  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.XMM,
5767                            v16i8x_info, NAME#"B">, EVEX_V128;
5768  }
5769
5770  let Predicates = [HasAVX512, NoVLX] in {
5771  defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, NAME#"B">;
5772  defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, NAME#"B">;
5773  defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, NAME#"W">;
5774  defm WZ128_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v8i16x_info, NAME#"W">;
5775  }
5776}
5777
5778// These patterns are used to match vptestm/vptestnm. We don't treat pcmpeqm
5779// as commutable here because we already canonicalized all zeros vectors to the
5780// RHS during lowering.
5781def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
5782                         (setcc node:$src1, node:$src2, SETEQ)>;
5783def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
5784                         (setcc node:$src1, node:$src2, SETNE)>;
5785
5786multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5787                                   PatFrag OpNode, X86SchedWriteWidths sched> :
5788  avx512_vptest_wb<opc_wb, OpcodeStr, OpNode, sched>,
5789  avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, sched>;
5790
5791defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
5792                                         SchedWriteVecLogic>, T8PD;
5793defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
5794                                         SchedWriteVecLogic>, T8XS;
5795
5796//===----------------------------------------------------------------------===//
5797// AVX-512  Shift instructions
5798//===----------------------------------------------------------------------===//
5799
5800multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5801                            string OpcodeStr, SDNode OpNode,
5802                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5803  let ExeDomain = _.ExeDomain in {
5804  defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5805                   (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5806                      "$src2, $src1", "$src1, $src2",
5807                   (_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>,
5808                   Sched<[sched]>;
5809  defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5810                   (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5811                       "$src2, $src1", "$src1, $src2",
5812                   (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
5813                          (i8 imm:$src2)))>,
5814                   Sched<[sched.Folded]>;
5815  }
5816}
5817
5818multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5819                             string OpcodeStr, SDNode OpNode,
5820                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5821  let ExeDomain = _.ExeDomain in
5822  defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5823                   (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5824      "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5825     (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>,
5826     EVEX_B, Sched<[sched.Folded]>;
5827}
5828
5829multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5830                            X86FoldableSchedWrite sched, ValueType SrcVT,
5831                            PatFrag bc_frag, X86VectorVTInfo _> {
5832   // src2 is always 128-bit
5833  let ExeDomain = _.ExeDomain in {
5834  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5835                   (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5836                      "$src2, $src1", "$src1, $src2",
5837                   (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5838                   AVX512BIBase, EVEX_4V, Sched<[sched]>;
5839  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5840                   (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5841                       "$src2, $src1", "$src1, $src2",
5842                   (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2))))>,
5843                   AVX512BIBase,
5844                   EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
5845  }
5846}
5847
5848multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5849                              X86SchedWriteWidths sched, ValueType SrcVT,
5850                              PatFrag bc_frag, AVX512VLVectorVTInfo VTInfo,
5851                              Predicate prd> {
5852  let Predicates = [prd] in
5853  defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5854                               bc_frag, VTInfo.info512>, EVEX_V512,
5855                               EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5856  let Predicates = [prd, HasVLX] in {
5857  defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5858                               bc_frag, VTInfo.info256>, EVEX_V256,
5859                               EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5860  defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5861                               bc_frag, VTInfo.info128>, EVEX_V128,
5862                               EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5863  }
5864}
5865
5866multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5867                              string OpcodeStr, SDNode OpNode,
5868                              X86SchedWriteWidths sched,
5869                              bit NotEVEX2VEXConvertibleQ = 0> {
5870  defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5871                              bc_v4i32, avx512vl_i32_info, HasAVX512>;
5872  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5873  defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5874                              bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W;
5875  defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5876                              bc_v2i64, avx512vl_i16_info, HasBWI>;
5877}
5878
5879multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5880                                  string OpcodeStr, SDNode OpNode,
5881                                  X86SchedWriteWidths sched,
5882                                  AVX512VLVectorVTInfo VTInfo> {
5883  let Predicates = [HasAVX512] in
5884  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5885                              sched.ZMM, VTInfo.info512>,
5886             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5887                               VTInfo.info512>, EVEX_V512;
5888  let Predicates = [HasAVX512, HasVLX] in {
5889  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5890                              sched.YMM, VTInfo.info256>,
5891             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5892                               VTInfo.info256>, EVEX_V256;
5893  defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5894                              sched.XMM, VTInfo.info128>,
5895             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5896                               VTInfo.info128>, EVEX_V128;
5897  }
5898}
5899
5900multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5901                              string OpcodeStr, SDNode OpNode,
5902                              X86SchedWriteWidths sched> {
5903  let Predicates = [HasBWI] in
5904  defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5905                               sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5906  let Predicates = [HasVLX, HasBWI] in {
5907  defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5908                               sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5909  defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5910                               sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5911  }
5912}
5913
5914multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5915                               Format ImmFormR, Format ImmFormM,
5916                               string OpcodeStr, SDNode OpNode,
5917                               X86SchedWriteWidths sched,
5918                               bit NotEVEX2VEXConvertibleQ = 0> {
5919  defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5920                                 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5921  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5922  defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5923                                 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5924}
5925
5926defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5927                                 SchedWriteVecShiftImm>,
5928             avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5929                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5930
5931defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5932                                 SchedWriteVecShiftImm>,
5933             avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5934                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5935
5936defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5937                                 SchedWriteVecShiftImm, 1>,
5938             avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5939                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5940
5941defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5942                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5943defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5944                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5945
5946defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5947                                SchedWriteVecShift>;
5948defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5949                                SchedWriteVecShift, 1>;
5950defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5951                                SchedWriteVecShift>;
5952
5953// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5954let Predicates = [HasAVX512, NoVLX] in {
5955  def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5956            (EXTRACT_SUBREG (v8i64
5957              (VPSRAQZrr
5958                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5959                 VR128X:$src2)), sub_ymm)>;
5960
5961  def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5962            (EXTRACT_SUBREG (v8i64
5963              (VPSRAQZrr
5964                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5965                 VR128X:$src2)), sub_xmm)>;
5966
5967  def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
5968            (EXTRACT_SUBREG (v8i64
5969              (VPSRAQZri
5970                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5971                 imm:$src2)), sub_ymm)>;
5972
5973  def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
5974            (EXTRACT_SUBREG (v8i64
5975              (VPSRAQZri
5976                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5977                 imm:$src2)), sub_xmm)>;
5978}
5979
5980//===-------------------------------------------------------------------===//
5981// Variable Bit Shifts
5982//===-------------------------------------------------------------------===//
5983
5984multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5985                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5986  let ExeDomain = _.ExeDomain in {
5987  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5988                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5989                      "$src2, $src1", "$src1, $src2",
5990                   (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5991                   AVX5128IBase, EVEX_4V, Sched<[sched]>;
5992  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5993                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5994                       "$src2, $src1", "$src1, $src2",
5995                   (_.VT (OpNode _.RC:$src1,
5996                   (_.VT (bitconvert (_.LdFrag addr:$src2)))))>,
5997                   AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5998                   Sched<[sched.Folded, ReadAfterLd]>;
5999  }
6000}
6001
6002multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6003                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6004  let ExeDomain = _.ExeDomain in
6005  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6006                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6007                    "${src2}"##_.BroadcastStr##", $src1",
6008                    "$src1, ${src2}"##_.BroadcastStr,
6009                    (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
6010                                                (_.ScalarLdFrag addr:$src2)))))>,
6011                    AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6012                    Sched<[sched.Folded, ReadAfterLd]>;
6013}
6014
6015multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6016                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6017  let Predicates  = [HasAVX512] in
6018  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6019           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6020
6021  let Predicates = [HasAVX512, HasVLX] in {
6022  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6023              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6024  defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6025              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6026  }
6027}
6028
6029multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6030                                  SDNode OpNode, X86SchedWriteWidths sched> {
6031  defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6032                                 avx512vl_i32_info>;
6033  defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6034                                 avx512vl_i64_info>, VEX_W;
6035}
6036
6037// Use 512bit version to implement 128/256 bit in case NoVLX.
6038multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6039                                     SDNode OpNode, list<Predicate> p> {
6040  let Predicates = p in {
6041  def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6042                                  (_.info256.VT _.info256.RC:$src2))),
6043            (EXTRACT_SUBREG
6044                (!cast<Instruction>(OpcodeStr#"Zrr")
6045                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6046                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6047             sub_ymm)>;
6048
6049  def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6050                                  (_.info128.VT _.info128.RC:$src2))),
6051            (EXTRACT_SUBREG
6052                (!cast<Instruction>(OpcodeStr#"Zrr")
6053                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6054                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6055             sub_xmm)>;
6056  }
6057}
6058multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6059                              SDNode OpNode, X86SchedWriteWidths sched> {
6060  let Predicates = [HasBWI] in
6061  defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6062              EVEX_V512, VEX_W;
6063  let Predicates = [HasVLX, HasBWI] in {
6064
6065  defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6066              EVEX_V256, VEX_W;
6067  defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6068              EVEX_V128, VEX_W;
6069  }
6070}
6071
6072defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SchedWriteVarVecShift>,
6073              avx512_var_shift_w<0x12, "vpsllvw", shl, SchedWriteVarVecShift>;
6074
6075defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SchedWriteVarVecShift>,
6076              avx512_var_shift_w<0x11, "vpsravw", sra, SchedWriteVarVecShift>;
6077
6078defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SchedWriteVarVecShift>,
6079              avx512_var_shift_w<0x10, "vpsrlvw", srl, SchedWriteVarVecShift>;
6080
6081defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6082defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6083
6084defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
6085defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
6086defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
6087defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
6088
6089// Special handing for handling VPSRAV intrinsics.
6090multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
6091                                         list<Predicate> p> {
6092  let Predicates = p in {
6093    def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
6094              (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
6095               _.RC:$src2)>;
6096    def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
6097              (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
6098               _.RC:$src1, addr:$src2)>;
6099    def : Pat<(_.VT (vselect _.KRCWM:$mask,
6100                     (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
6101              (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
6102               _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
6103    def : Pat<(_.VT (vselect _.KRCWM:$mask,
6104                     (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
6105                     _.RC:$src0)),
6106              (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
6107               _.KRC:$mask, _.RC:$src1, addr:$src2)>;
6108    def : Pat<(_.VT (vselect _.KRCWM:$mask,
6109                     (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
6110              (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
6111               _.RC:$src1, _.RC:$src2)>;
6112    def : Pat<(_.VT (vselect _.KRCWM:$mask,
6113                     (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
6114                     _.ImmAllZerosV)),
6115              (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
6116               _.RC:$src1, addr:$src2)>;
6117  }
6118}
6119
6120multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
6121                                         list<Predicate> p> :
6122           avx512_var_shift_int_lowering<InstrStr, _, p> {
6123  let Predicates = p in {
6124    def : Pat<(_.VT (X86vsrav _.RC:$src1,
6125                     (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
6126              (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
6127               _.RC:$src1, addr:$src2)>;
6128    def : Pat<(_.VT (vselect _.KRCWM:$mask,
6129                     (X86vsrav _.RC:$src1,
6130                      (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
6131                     _.RC:$src0)),
6132              (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
6133               _.KRC:$mask, _.RC:$src1, addr:$src2)>;
6134    def : Pat<(_.VT (vselect _.KRCWM:$mask,
6135                     (X86vsrav _.RC:$src1,
6136                      (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
6137                     _.ImmAllZerosV)),
6138              (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
6139               _.RC:$src1, addr:$src2)>;
6140  }
6141}
6142
6143defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
6144defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
6145defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
6146defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
6147defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
6148defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
6149defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
6150defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
6151defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
6152
6153// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6154let Predicates = [HasAVX512, NoVLX] in {
6155  def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6156            (EXTRACT_SUBREG (v8i64
6157              (VPROLVQZrr
6158                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6159                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6160                       sub_xmm)>;
6161  def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6162            (EXTRACT_SUBREG (v8i64
6163              (VPROLVQZrr
6164                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6165                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6166                       sub_ymm)>;
6167
6168  def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6169            (EXTRACT_SUBREG (v16i32
6170              (VPROLVDZrr
6171                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6172                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6173                        sub_xmm)>;
6174  def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6175            (EXTRACT_SUBREG (v16i32
6176              (VPROLVDZrr
6177                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6178                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6179                        sub_ymm)>;
6180
6181  def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
6182            (EXTRACT_SUBREG (v8i64
6183              (VPROLQZri
6184                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6185                        imm:$src2)), sub_xmm)>;
6186  def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
6187            (EXTRACT_SUBREG (v8i64
6188              (VPROLQZri
6189                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6190                       imm:$src2)), sub_ymm)>;
6191
6192  def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
6193            (EXTRACT_SUBREG (v16i32
6194              (VPROLDZri
6195                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6196                        imm:$src2)), sub_xmm)>;
6197  def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
6198            (EXTRACT_SUBREG (v16i32
6199              (VPROLDZri
6200                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6201                        imm:$src2)), sub_ymm)>;
6202}
6203
6204// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6205let Predicates = [HasAVX512, NoVLX] in {
6206  def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6207            (EXTRACT_SUBREG (v8i64
6208              (VPRORVQZrr
6209                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6210                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6211                       sub_xmm)>;
6212  def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6213            (EXTRACT_SUBREG (v8i64
6214              (VPRORVQZrr
6215                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6216                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6217                       sub_ymm)>;
6218
6219  def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6220            (EXTRACT_SUBREG (v16i32
6221              (VPRORVDZrr
6222                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6223                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6224                        sub_xmm)>;
6225  def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6226            (EXTRACT_SUBREG (v16i32
6227              (VPRORVDZrr
6228                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6229                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6230                        sub_ymm)>;
6231
6232  def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
6233            (EXTRACT_SUBREG (v8i64
6234              (VPRORQZri
6235                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6236                        imm:$src2)), sub_xmm)>;
6237  def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
6238            (EXTRACT_SUBREG (v8i64
6239              (VPRORQZri
6240                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6241                       imm:$src2)), sub_ymm)>;
6242
6243  def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
6244            (EXTRACT_SUBREG (v16i32
6245              (VPRORDZri
6246                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6247                        imm:$src2)), sub_xmm)>;
6248  def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
6249            (EXTRACT_SUBREG (v16i32
6250              (VPRORDZri
6251                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6252                        imm:$src2)), sub_ymm)>;
6253}
6254
6255//===-------------------------------------------------------------------===//
6256// 1-src variable permutation VPERMW/D/Q
6257//===-------------------------------------------------------------------===//
6258
6259multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6260                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6261  let Predicates  = [HasAVX512] in
6262  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6263           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6264
6265  let Predicates = [HasAVX512, HasVLX] in
6266  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6267              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6268}
6269
6270multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6271                                 string OpcodeStr, SDNode OpNode,
6272                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6273  let Predicates = [HasAVX512] in
6274  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6275                              sched, VTInfo.info512>,
6276             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6277                               sched, VTInfo.info512>, EVEX_V512;
6278  let Predicates = [HasAVX512, HasVLX] in
6279  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6280                              sched, VTInfo.info256>,
6281             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6282                               sched, VTInfo.info256>, EVEX_V256;
6283}
6284
6285multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6286                              Predicate prd, SDNode OpNode,
6287                              X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6288  let Predicates = [prd] in
6289  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6290              EVEX_V512 ;
6291  let Predicates = [HasVLX, prd] in {
6292  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6293              EVEX_V256 ;
6294  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6295              EVEX_V128 ;
6296  }
6297}
6298
6299defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6300                               WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6301defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6302                               WriteVarShuffle256, avx512vl_i8_info>;
6303
6304defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6305                                    WriteVarShuffle256, avx512vl_i32_info>;
6306defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6307                                    WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6308defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6309                                     WriteFVarShuffle256, avx512vl_f32_info>;
6310defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6311                                     WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6312
6313defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6314                             X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6315                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6316defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6317                             X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6318                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6319
6320//===----------------------------------------------------------------------===//
6321// AVX-512 - VPERMIL
6322//===----------------------------------------------------------------------===//
6323
6324multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6325                             X86FoldableSchedWrite sched, X86VectorVTInfo _,
6326                             X86VectorVTInfo Ctrl> {
6327  defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6328                  (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6329                  "$src2, $src1", "$src1, $src2",
6330                  (_.VT (OpNode _.RC:$src1,
6331                               (Ctrl.VT Ctrl.RC:$src2)))>,
6332                  T8PD, EVEX_4V, Sched<[sched]>;
6333  defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6334                  (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6335                  "$src2, $src1", "$src1, $src2",
6336                  (_.VT (OpNode
6337                           _.RC:$src1,
6338                           (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>,
6339                  T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6340                  Sched<[sched.Folded, ReadAfterLd]>;
6341  defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6342                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6343                   "${src2}"##_.BroadcastStr##", $src1",
6344                   "$src1, ${src2}"##_.BroadcastStr,
6345                   (_.VT (OpNode
6346                            _.RC:$src1,
6347                            (Ctrl.VT (X86VBroadcast
6348                                       (Ctrl.ScalarLdFrag addr:$src2)))))>,
6349                   T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6350                   Sched<[sched.Folded, ReadAfterLd]>;
6351}
6352
6353multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6354                                    X86SchedWriteWidths sched,
6355                                    AVX512VLVectorVTInfo _,
6356                                    AVX512VLVectorVTInfo Ctrl> {
6357  let Predicates = [HasAVX512] in {
6358    defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6359                                  _.info512, Ctrl.info512>, EVEX_V512;
6360  }
6361  let Predicates = [HasAVX512, HasVLX] in {
6362    defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6363                                  _.info128, Ctrl.info128>, EVEX_V128;
6364    defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6365                                  _.info256, Ctrl.info256>, EVEX_V256;
6366  }
6367}
6368
6369multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6370                         AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6371  defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6372                                      _, Ctrl>;
6373  defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6374                                    X86VPermilpi, SchedWriteFShuffle, _>,
6375                    EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6376}
6377
6378let ExeDomain = SSEPackedSingle in
6379defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6380                               avx512vl_i32_info>;
6381let ExeDomain = SSEPackedDouble in
6382defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6383                               avx512vl_i64_info>, VEX_W1X;
6384
6385//===----------------------------------------------------------------------===//
6386// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6387//===----------------------------------------------------------------------===//
6388
6389defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6390                             X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6391                             EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6392defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6393                                  X86PShufhw, SchedWriteShuffle>,
6394                                  EVEX, AVX512XSIi8Base;
6395defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6396                                  X86PShuflw, SchedWriteShuffle>,
6397                                  EVEX, AVX512XDIi8Base;
6398
6399//===----------------------------------------------------------------------===//
6400// AVX-512 - VPSHUFB
6401//===----------------------------------------------------------------------===//
6402
6403multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6404                               X86SchedWriteWidths sched> {
6405  let Predicates = [HasBWI] in
6406  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6407                              EVEX_V512;
6408
6409  let Predicates = [HasVLX, HasBWI] in {
6410  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6411                              EVEX_V256;
6412  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6413                              EVEX_V128;
6414  }
6415}
6416
6417defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6418                                  SchedWriteVarShuffle>, VEX_WIG;
6419
6420//===----------------------------------------------------------------------===//
6421// Move Low to High and High to Low packed FP Instructions
6422//===----------------------------------------------------------------------===//
6423
6424def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6425          (ins VR128X:$src1, VR128X:$src2),
6426          "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6427          [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6428          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6429let isCommutable = 1 in
6430def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6431          (ins VR128X:$src1, VR128X:$src2),
6432          "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6433          [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6434          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6435
6436//===----------------------------------------------------------------------===//
6437// VMOVHPS/PD VMOVLPS Instructions
6438// All patterns was taken from SSS implementation.
6439//===----------------------------------------------------------------------===//
6440
6441multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6442                                  SDPatternOperator OpNode,
6443                                  X86VectorVTInfo _> {
6444  let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6445  def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6446                  (ins _.RC:$src1, f64mem:$src2),
6447                  !strconcat(OpcodeStr,
6448                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6449                  [(set _.RC:$dst,
6450                     (OpNode _.RC:$src1,
6451                       (_.VT (bitconvert
6452                         (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6453                  Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>, EVEX_4V;
6454}
6455
6456// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6457// SSE1. And MOVLPS pattern is even more complex.
6458defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6459                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6460defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6461                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6462defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6463                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6464defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6465                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6466
6467let Predicates = [HasAVX512] in {
6468  // VMOVHPD patterns
6469  def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
6470                    (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6471           (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6472}
6473
6474let SchedRW = [WriteFStore] in {
6475def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6476                       (ins f64mem:$dst, VR128X:$src),
6477                       "vmovhps\t{$src, $dst|$dst, $src}",
6478                       [(store (f64 (extractelt
6479                                     (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
6480                                                (bc_v2f64 (v4f32 VR128X:$src))),
6481                                     (iPTR 0))), addr:$dst)]>,
6482                       EVEX, EVEX_CD8<32, CD8VT2>;
6483def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6484                       (ins f64mem:$dst, VR128X:$src),
6485                       "vmovhpd\t{$src, $dst|$dst, $src}",
6486                       [(store (f64 (extractelt
6487                                     (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6488                                     (iPTR 0))), addr:$dst)]>,
6489                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6490def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6491                       (ins f64mem:$dst, VR128X:$src),
6492                       "vmovlps\t{$src, $dst|$dst, $src}",
6493                       [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
6494                                     (iPTR 0))), addr:$dst)]>,
6495                       EVEX, EVEX_CD8<32, CD8VT2>;
6496def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6497                       (ins f64mem:$dst, VR128X:$src),
6498                       "vmovlpd\t{$src, $dst|$dst, $src}",
6499                       [(store (f64 (extractelt (v2f64 VR128X:$src),
6500                                     (iPTR 0))), addr:$dst)]>,
6501                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6502} // SchedRW
6503
6504let Predicates = [HasAVX512] in {
6505  // VMOVHPD patterns
6506  def : Pat<(store (f64 (extractelt
6507                           (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6508                           (iPTR 0))), addr:$dst),
6509           (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6510}
6511//===----------------------------------------------------------------------===//
6512// FMA - Fused Multiply Operations
6513//
6514
6515multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6516                               X86FoldableSchedWrite sched,
6517                               X86VectorVTInfo _, string Suff> {
6518  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6519  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6520          (ins _.RC:$src2, _.RC:$src3),
6521          OpcodeStr, "$src3, $src2", "$src2, $src3",
6522          (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6523          AVX512FMA3Base, Sched<[sched]>;
6524
6525  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6526          (ins _.RC:$src2, _.MemOp:$src3),
6527          OpcodeStr, "$src3, $src2", "$src2, $src3",
6528          (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6529          AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
6530
6531  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6532            (ins _.RC:$src2, _.ScalarMemOp:$src3),
6533            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6534            !strconcat("$src2, ${src3}", _.BroadcastStr ),
6535            (OpNode _.RC:$src2,
6536             _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
6537             AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
6538  }
6539}
6540
6541multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6542                                 X86FoldableSchedWrite sched,
6543                                 X86VectorVTInfo _, string Suff> {
6544  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6545  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6546          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6547          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6548          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))), 1, 1>,
6549          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6550}
6551
6552multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6553                                   SDNode OpNodeRnd, X86SchedWriteWidths sched,
6554                                   AVX512VLVectorVTInfo _, string Suff> {
6555  let Predicates = [HasAVX512] in {
6556    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6557                                      _.info512, Suff>,
6558                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6559                                        _.info512, Suff>,
6560                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6561  }
6562  let Predicates = [HasVLX, HasAVX512] in {
6563    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM,
6564                                    _.info256, Suff>,
6565                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6566    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM,
6567                                    _.info128, Suff>,
6568                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6569  }
6570}
6571
6572multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6573                              SDNode OpNodeRnd> {
6574    defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6575                                      SchedWriteFMA, avx512vl_f32_info, "PS">;
6576    defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6577                                      SchedWriteFMA, avx512vl_f64_info, "PD">,
6578                                      VEX_W;
6579}
6580
6581defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
6582defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6583defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6584defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6585defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6586defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6587
6588
6589multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6590                               X86FoldableSchedWrite sched,
6591                               X86VectorVTInfo _, string Suff> {
6592  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6593  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6594          (ins _.RC:$src2, _.RC:$src3),
6595          OpcodeStr, "$src3, $src2", "$src2, $src3",
6596          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1,
6597          vselect, 1>, AVX512FMA3Base, Sched<[sched]>;
6598
6599  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6600          (ins _.RC:$src2, _.MemOp:$src3),
6601          OpcodeStr, "$src3, $src2", "$src2, $src3",
6602          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6603          AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
6604
6605  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6606         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6607         OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6608         "$src2, ${src3}"##_.BroadcastStr,
6609         (_.VT (OpNode _.RC:$src2,
6610                      (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6611                      _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6612         Sched<[sched.Folded, ReadAfterLd]>;
6613  }
6614}
6615
6616multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6617                                 X86FoldableSchedWrite sched,
6618                                 X86VectorVTInfo _, string Suff> {
6619  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6620  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6621          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6622          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6623          (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))),
6624          1, 1, vselect, 1>,
6625          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6626}
6627
6628multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6629                                   SDNode OpNodeRnd, X86SchedWriteWidths sched,
6630                                   AVX512VLVectorVTInfo _, string Suff> {
6631  let Predicates = [HasAVX512] in {
6632    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6633                                      _.info512, Suff>,
6634                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6635                                        _.info512, Suff>,
6636                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6637  }
6638  let Predicates = [HasVLX, HasAVX512] in {
6639    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM,
6640                                    _.info256, Suff>,
6641                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6642    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM,
6643                                    _.info128, Suff>,
6644                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6645  }
6646}
6647
6648multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6649                              SDNode OpNodeRnd > {
6650    defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6651                                      SchedWriteFMA, avx512vl_f32_info, "PS">;
6652    defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6653                                      SchedWriteFMA, avx512vl_f64_info, "PD">,
6654                                      VEX_W;
6655}
6656
6657defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
6658defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6659defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6660defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6661defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6662defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6663
6664multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6665                               X86FoldableSchedWrite sched,
6666                               X86VectorVTInfo _, string Suff> {
6667  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6668  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6669          (ins _.RC:$src2, _.RC:$src3),
6670          OpcodeStr, "$src3, $src2", "$src2, $src3",
6671          (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>,
6672          AVX512FMA3Base, Sched<[sched]>;
6673
6674  // Pattern is 312 order so that the load is in a different place from the
6675  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6676  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6677          (ins _.RC:$src2, _.MemOp:$src3),
6678          OpcodeStr, "$src3, $src2", "$src2, $src3",
6679          (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6680          AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
6681
6682  // Pattern is 312 order so that the load is in a different place from the
6683  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6684  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6685         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6686         OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6687         "$src2, ${src3}"##_.BroadcastStr,
6688         (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6689                       _.RC:$src1, _.RC:$src2)), 1, 0>,
6690         AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
6691  }
6692}
6693
6694multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6695                                 X86FoldableSchedWrite sched,
6696                                 X86VectorVTInfo _, string Suff> {
6697  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6698  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6699          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6700          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6701          (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))),
6702          1, 1, vselect, 1>,
6703          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6704}
6705
6706multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6707                                   SDNode OpNodeRnd, X86SchedWriteWidths sched,
6708                                   AVX512VLVectorVTInfo _, string Suff> {
6709  let Predicates = [HasAVX512] in {
6710    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6711                                      _.info512, Suff>,
6712                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6713                                        _.info512, Suff>,
6714                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6715  }
6716  let Predicates = [HasVLX, HasAVX512] in {
6717    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM,
6718                                    _.info256, Suff>,
6719                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6720    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM,
6721                                    _.info128, Suff>,
6722                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6723  }
6724}
6725
6726multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6727                              SDNode OpNodeRnd > {
6728    defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6729                                      SchedWriteFMA, avx512vl_f32_info, "PS">;
6730    defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6731                                      SchedWriteFMA, avx512vl_f64_info, "PD">,
6732                                      VEX_W;
6733}
6734
6735defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
6736defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6737defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6738defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6739defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6740defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6741
6742// Scalar FMA
6743multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6744                               dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6745let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6746  defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6747          (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6748          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6749          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>;
6750
6751  let mayLoad = 1 in
6752  defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6753          (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6754          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6755          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>;
6756
6757  defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6758         (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6759         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6760         AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6761
6762  let isCodeGenOnly = 1, isCommutable = 1 in {
6763    def r     : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6764                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6765                     !strconcat(OpcodeStr,
6766                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6767                     !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>;
6768    def m     : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6769                    (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6770                    !strconcat(OpcodeStr,
6771                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6772                    [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>;
6773
6774    def rb    : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6775                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6776                     !strconcat(OpcodeStr,
6777                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6778                     !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6779                     Sched<[SchedWriteFMA.Scl]>;
6780  }// isCodeGenOnly = 1
6781}// Constraints = "$src1 = $dst"
6782}
6783
6784multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6785                            string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
6786                            X86VectorVTInfo _, string SUFF> {
6787  let ExeDomain = _.ExeDomain in {
6788  defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6789                // Operands for intrinsic are in 123 order to preserve passthu
6790                // semantics.
6791                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6792                         _.FRC:$src3))),
6793                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6794                         (_.ScalarLdFrag addr:$src3)))),
6795                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6796                         _.FRC:$src3, (i32 imm:$rc)))), 0>;
6797
6798  defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6799                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6800                                          _.FRC:$src1))),
6801                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6802                            (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6803                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6804                         _.FRC:$src1, (i32 imm:$rc)))), 1>;
6805
6806  // One pattern is 312 order so that the load is in a different place from the
6807  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6808  defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6809                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6810                         _.FRC:$src2))),
6811                (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6812                                 _.FRC:$src1, _.FRC:$src2))),
6813                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6814                         _.FRC:$src2, (i32 imm:$rc)))), 1>;
6815  }
6816}
6817
6818multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6819                        string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> {
6820  let Predicates = [HasAVX512] in {
6821    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6822                                 OpNodeRnd, f32x_info, "SS">,
6823                                 EVEX_CD8<32, CD8VT1>, VEX_LIG;
6824    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6825                                 OpNodeRnd, f64x_info, "SD">,
6826                                 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6827  }
6828}
6829
6830defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
6831defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
6832defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
6833defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
6834
6835multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6836                                      string Suffix, SDNode Move,
6837                                      X86VectorVTInfo _, PatLeaf ZeroFP> {
6838  let Predicates = [HasAVX512] in {
6839    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6840                (Op _.FRC:$src2,
6841                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6842                    _.FRC:$src3))))),
6843              (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6844               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6845               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6846
6847    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6848                (Op _.FRC:$src2, _.FRC:$src3,
6849                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6850              (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6851               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6852               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6853
6854    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6855                (Op _.FRC:$src2,
6856                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6857                    (_.ScalarLdFrag addr:$src3)))))),
6858              (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6859               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6860               addr:$src3)>;
6861
6862    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6863                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6864                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6865              (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6866               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6867               addr:$src3)>;
6868
6869    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6870                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6871                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6872              (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6873               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6874               addr:$src3)>;
6875
6876    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6877               (X86selects VK1WM:$mask,
6878                (Op _.FRC:$src2,
6879                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6880                    _.FRC:$src3),
6881                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6882              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6883               VR128X:$src1, VK1WM:$mask,
6884               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6885               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6886
6887    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6888               (X86selects VK1WM:$mask,
6889                (Op _.FRC:$src2,
6890                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6891                    (_.ScalarLdFrag addr:$src3)),
6892                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6893              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6894               VR128X:$src1, VK1WM:$mask,
6895               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6896
6897    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6898               (X86selects VK1WM:$mask,
6899                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6900                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6901                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6902              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6903               VR128X:$src1, VK1WM:$mask,
6904               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6905
6906    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6907               (X86selects VK1WM:$mask,
6908                (Op _.FRC:$src2, _.FRC:$src3,
6909                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6910                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6911              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6912               VR128X:$src1, VK1WM:$mask,
6913               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6914               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6915
6916    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6917               (X86selects VK1WM:$mask,
6918                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6919                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6920                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6921              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6922               VR128X:$src1, VK1WM:$mask,
6923               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6924
6925    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6926               (X86selects VK1WM:$mask,
6927                (Op _.FRC:$src2,
6928                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6929                    _.FRC:$src3),
6930                (_.EltVT ZeroFP)))))),
6931              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6932               VR128X:$src1, VK1WM:$mask,
6933               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6934               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6935
6936    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6937               (X86selects VK1WM:$mask,
6938                (Op _.FRC:$src2, _.FRC:$src3,
6939                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6940                (_.EltVT ZeroFP)))))),
6941              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6942               VR128X:$src1, VK1WM:$mask,
6943               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6944               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6945
6946    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6947               (X86selects VK1WM:$mask,
6948                (Op _.FRC:$src2,
6949                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6950                    (_.ScalarLdFrag addr:$src3)),
6951                (_.EltVT ZeroFP)))))),
6952              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6953               VR128X:$src1, VK1WM:$mask,
6954               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6955
6956    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6957               (X86selects VK1WM:$mask,
6958                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6959                    _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6960                (_.EltVT ZeroFP)))))),
6961              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6962               VR128X:$src1, VK1WM:$mask,
6963               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6964
6965    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6966               (X86selects VK1WM:$mask,
6967                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6968                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6969                (_.EltVT ZeroFP)))))),
6970              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6971               VR128X:$src1, VK1WM:$mask,
6972               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6973
6974    // Patterns with rounding mode.
6975    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6976                (RndOp _.FRC:$src2,
6977                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6978                       _.FRC:$src3, (i32 imm:$rc)))))),
6979              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
6980               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6981               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
6982
6983    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6984                (RndOp _.FRC:$src2, _.FRC:$src3,
6985                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6986                       (i32 imm:$rc)))))),
6987              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
6988               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6989               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
6990
6991    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6992               (X86selects VK1WM:$mask,
6993                (RndOp _.FRC:$src2,
6994                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6995                       _.FRC:$src3, (i32 imm:$rc)),
6996                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6997              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
6998               VR128X:$src1, VK1WM:$mask,
6999               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7000               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
7001
7002    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7003               (X86selects VK1WM:$mask,
7004                (RndOp _.FRC:$src2, _.FRC:$src3,
7005                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7006                       (i32 imm:$rc)),
7007                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7008              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
7009               VR128X:$src1, VK1WM:$mask,
7010               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7011               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
7012
7013    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7014               (X86selects VK1WM:$mask,
7015                (RndOp _.FRC:$src2,
7016                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7017                       _.FRC:$src3, (i32 imm:$rc)),
7018                (_.EltVT ZeroFP)))))),
7019              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
7020               VR128X:$src1, VK1WM:$mask,
7021               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7022               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
7023
7024    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7025               (X86selects VK1WM:$mask,
7026                (RndOp _.FRC:$src2, _.FRC:$src3,
7027                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7028                       (i32 imm:$rc)),
7029                (_.EltVT ZeroFP)))))),
7030              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7031               VR128X:$src1, VK1WM:$mask,
7032               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7033               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
7034  }
7035}
7036
7037defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SS",
7038                                  X86Movss, v4f32x_info, fp32imm0>;
7039defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
7040                                  X86Movss, v4f32x_info, fp32imm0>;
7041defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
7042                                  X86Movss, v4f32x_info, fp32imm0>;
7043defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
7044                                  X86Movss, v4f32x_info, fp32imm0>;
7045
7046defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SD",
7047                                  X86Movsd, v2f64x_info, fp64imm0>;
7048defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
7049                                  X86Movsd, v2f64x_info, fp64imm0>;
7050defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD",
7051                                  X86Movsd, v2f64x_info, fp64imm0>;
7052defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD",
7053                                  X86Movsd, v2f64x_info, fp64imm0>;
7054
7055//===----------------------------------------------------------------------===//
7056// AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7057//===----------------------------------------------------------------------===//
7058let Constraints = "$src1 = $dst" in {
7059multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7060                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7061  // NOTE: The SDNode have the multiply operands first with the add last.
7062  // This enables commuted load patterns to be autogenerated by tablegen.
7063  let ExeDomain = _.ExeDomain in {
7064  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7065          (ins _.RC:$src2, _.RC:$src3),
7066          OpcodeStr, "$src3, $src2", "$src2, $src3",
7067          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7068         AVX512FMA3Base, Sched<[sched]>;
7069
7070  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7071          (ins _.RC:$src2, _.MemOp:$src3),
7072          OpcodeStr, "$src3, $src2", "$src2, $src3",
7073          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7074          AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
7075
7076  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7077            (ins _.RC:$src2, _.ScalarMemOp:$src3),
7078            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
7079            !strconcat("$src2, ${src3}", _.BroadcastStr ),
7080            (OpNode _.RC:$src2,
7081                    (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
7082                    _.RC:$src1)>,
7083            AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
7084  }
7085}
7086} // Constraints = "$src1 = $dst"
7087
7088multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7089                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7090  let Predicates = [HasIFMA] in {
7091    defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7092                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7093  }
7094  let Predicates = [HasVLX, HasIFMA] in {
7095    defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7096                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7097    defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7098                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7099  }
7100}
7101
7102defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7103                                         SchedWriteVecIMul, avx512vl_i64_info>,
7104                                         VEX_W;
7105defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7106                                         SchedWriteVecIMul, avx512vl_i64_info>,
7107                                         VEX_W;
7108
7109//===----------------------------------------------------------------------===//
7110// AVX-512  Scalar convert from sign integer to float/double
7111//===----------------------------------------------------------------------===//
7112
7113multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, X86FoldableSchedWrite sched,
7114                    RegisterClass SrcRC, X86VectorVTInfo DstVT,
7115                    X86MemOperand x86memop, PatFrag ld_frag, string asm> {
7116  let hasSideEffects = 0 in {
7117    def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7118              (ins DstVT.FRC:$src1, SrcRC:$src),
7119              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7120              EVEX_4V, Sched<[sched]>;
7121    let mayLoad = 1 in
7122      def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7123              (ins DstVT.FRC:$src1, x86memop:$src),
7124              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7125              EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
7126  } // hasSideEffects = 0
7127  let isCodeGenOnly = 1 in {
7128    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7129                  (ins DstVT.RC:$src1, SrcRC:$src2),
7130                  !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7131                  [(set DstVT.RC:$dst,
7132                        (OpNode (DstVT.VT DstVT.RC:$src1),
7133                                 SrcRC:$src2,
7134                                 (i32 FROUND_CURRENT)))]>,
7135                 EVEX_4V, Sched<[sched]>;
7136
7137    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7138                  (ins DstVT.RC:$src1, x86memop:$src2),
7139                  !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7140                  [(set DstVT.RC:$dst,
7141                        (OpNode (DstVT.VT DstVT.RC:$src1),
7142                                 (ld_frag addr:$src2),
7143                                 (i32 FROUND_CURRENT)))]>,
7144                  EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
7145  }//isCodeGenOnly = 1
7146}
7147
7148multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7149                               X86FoldableSchedWrite sched, RegisterClass SrcRC,
7150                               X86VectorVTInfo DstVT, string asm> {
7151  def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7152              (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7153              !strconcat(asm,
7154                  "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7155              [(set DstVT.RC:$dst,
7156                    (OpNode (DstVT.VT DstVT.RC:$src1),
7157                             SrcRC:$src2,
7158                             (i32 imm:$rc)))]>,
7159              EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
7160}
7161
7162multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode,
7163                                X86FoldableSchedWrite sched,
7164                                RegisterClass SrcRC, X86VectorVTInfo DstVT,
7165                                X86MemOperand x86memop, PatFrag ld_frag, string asm> {
7166  defm NAME : avx512_vcvtsi_round<opc, OpNode, sched, SrcRC, DstVT, asm>,
7167              avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7168                            ld_frag, asm>, VEX_LIG;
7169}
7170
7171let Predicates = [HasAVX512] in {
7172defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR32,
7173                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
7174                                 XS, EVEX_CD8<32, CD8VT1>;
7175defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR64,
7176                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
7177                                 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7178defm VCVTSI2SDZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR32,
7179                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
7180                                 XD, EVEX_CD8<32, CD8VT1>;
7181defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR64,
7182                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
7183                                 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7184
7185def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7186              (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7187def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7188              (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7189
7190def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
7191          (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7192def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
7193          (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7194def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
7195          (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7196def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
7197          (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7198
7199def : Pat<(f32 (sint_to_fp GR32:$src)),
7200          (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7201def : Pat<(f32 (sint_to_fp GR64:$src)),
7202          (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7203def : Pat<(f64 (sint_to_fp GR32:$src)),
7204          (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7205def : Pat<(f64 (sint_to_fp GR64:$src)),
7206          (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7207
7208defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR32,
7209                                  v4f32x_info, i32mem, loadi32,
7210                                  "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
7211defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR64,
7212                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
7213                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7214defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR32, v2f64x_info,
7215                                  i32mem, loadi32, "cvtusi2sd{l}">,
7216                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7217defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR64,
7218                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
7219                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7220
7221def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7222              (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7223def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7224              (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7225
7226def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
7227          (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7228def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
7229          (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7230def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
7231          (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7232def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
7233          (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7234
7235def : Pat<(f32 (uint_to_fp GR32:$src)),
7236          (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7237def : Pat<(f32 (uint_to_fp GR64:$src)),
7238          (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7239def : Pat<(f64 (uint_to_fp GR32:$src)),
7240          (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7241def : Pat<(f64 (uint_to_fp GR64:$src)),
7242          (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7243}
7244
7245//===----------------------------------------------------------------------===//
7246// AVX-512  Scalar convert from float/double to integer
7247//===----------------------------------------------------------------------===//
7248
7249multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7250                                  X86VectorVTInfo DstVT, SDNode OpNode,
7251                                  X86FoldableSchedWrite sched, string asm,
7252                                  string aliasStr,
7253                                  bit CodeGenOnly = 1> {
7254  let Predicates = [HasAVX512] in {
7255    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7256                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7257                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))]>,
7258                EVEX, VEX_LIG, Sched<[sched]>;
7259    def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7260                 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7261                 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
7262                 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7263                 Sched<[sched]>;
7264    let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
7265    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7266                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7267                [(set DstVT.RC:$dst, (OpNode
7268                      (SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
7269                      (i32 FROUND_CURRENT)))]>,
7270                EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
7271
7272    def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7273            (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7274    def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7275            (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7276  } // Predicates = [HasAVX512]
7277}
7278
7279multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT,
7280                                          X86VectorVTInfo DstVT, SDNode OpNode,
7281                                          X86FoldableSchedWrite sched, string asm,
7282                                          string aliasStr> :
7283  avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, sched, asm, aliasStr, 0> {
7284  let Predicates = [HasAVX512] in {
7285    def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7286            (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7287                                            SrcVT.IntScalarMemOp:$src), 0, "att">;
7288  } // Predicates = [HasAVX512]
7289}
7290
7291// Convert float/double to signed/unsigned int 32/64
7292defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,
7293                                   X86cvts2si, WriteCvtSS2I, "cvtss2si", "{l}">,
7294                                   XS, EVEX_CD8<32, CD8VT1>;
7295defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info,
7296                                   X86cvts2si, WriteCvtSS2I, "cvtss2si", "{q}">,
7297                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7298defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info,
7299                                   X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{l}">,
7300                                   XS, EVEX_CD8<32, CD8VT1>;
7301defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info,
7302                                   X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{q}">,
7303                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7304defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info,
7305                                   X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{l}">,
7306                                   XD, EVEX_CD8<64, CD8VT1>;
7307defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
7308                                   X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{q}">,
7309                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7310defm VCVTSD2USIZ:   avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info,
7311                                   X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7312                                   XD, EVEX_CD8<64, CD8VT1>;
7313defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info,
7314                                   X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7315                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7316
7317// The SSE version of these instructions are disabled for AVX512.
7318// Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
7319let Predicates = [HasAVX512] in {
7320  def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
7321            (VCVTSS2SIZrr_Int VR128X:$src)>;
7322  def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
7323            (VCVTSS2SIZrm_Int sse_load_f32:$src)>;
7324  def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
7325            (VCVTSS2SI64Zrr_Int VR128X:$src)>;
7326  def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
7327            (VCVTSS2SI64Zrm_Int sse_load_f32:$src)>;
7328  def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
7329            (VCVTSD2SIZrr_Int VR128X:$src)>;
7330  def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
7331            (VCVTSD2SIZrm_Int sse_load_f64:$src)>;
7332  def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
7333            (VCVTSD2SI64Zrr_Int VR128X:$src)>;
7334  def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
7335            (VCVTSD2SI64Zrm_Int sse_load_f64:$src)>;
7336} // HasAVX512
7337
7338// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7339// which produce unnecessary vmovs{s,d} instructions
7340let Predicates = [HasAVX512] in {
7341def : Pat<(v4f32 (X86Movss
7342                   (v4f32 VR128X:$dst),
7343                   (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
7344          (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7345
7346def : Pat<(v4f32 (X86Movss
7347                   (v4f32 VR128X:$dst),
7348                   (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
7349          (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7350
7351def : Pat<(v4f32 (X86Movss
7352                   (v4f32 VR128X:$dst),
7353                   (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
7354          (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7355
7356def : Pat<(v4f32 (X86Movss
7357                   (v4f32 VR128X:$dst),
7358                   (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
7359          (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7360
7361def : Pat<(v2f64 (X86Movsd
7362                   (v2f64 VR128X:$dst),
7363                   (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
7364          (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7365
7366def : Pat<(v2f64 (X86Movsd
7367                   (v2f64 VR128X:$dst),
7368                   (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
7369          (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7370
7371def : Pat<(v2f64 (X86Movsd
7372                   (v2f64 VR128X:$dst),
7373                   (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
7374          (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7375
7376def : Pat<(v2f64 (X86Movsd
7377                   (v2f64 VR128X:$dst),
7378                   (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
7379          (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7380
7381def : Pat<(v4f32 (X86Movss
7382                   (v4f32 VR128X:$dst),
7383                   (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))),
7384          (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7385
7386def : Pat<(v4f32 (X86Movss
7387                   (v4f32 VR128X:$dst),
7388                   (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))),
7389          (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7390
7391def : Pat<(v4f32 (X86Movss
7392                   (v4f32 VR128X:$dst),
7393                   (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))),
7394          (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7395
7396def : Pat<(v4f32 (X86Movss
7397                   (v4f32 VR128X:$dst),
7398                   (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))),
7399          (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7400
7401def : Pat<(v2f64 (X86Movsd
7402                   (v2f64 VR128X:$dst),
7403                   (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))),
7404          (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7405
7406def : Pat<(v2f64 (X86Movsd
7407                   (v2f64 VR128X:$dst),
7408                   (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))),
7409          (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7410
7411def : Pat<(v2f64 (X86Movsd
7412                   (v2f64 VR128X:$dst),
7413                   (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))),
7414          (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7415
7416def : Pat<(v2f64 (X86Movsd
7417                   (v2f64 VR128X:$dst),
7418                   (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))),
7419          (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7420} // Predicates = [HasAVX512]
7421
7422// Convert float/double to signed/unsigned int 32/64 with truncation
7423multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7424                            X86VectorVTInfo _DstRC, SDNode OpNode,
7425                            SDNode OpNodeRnd, X86FoldableSchedWrite sched,
7426                            string aliasStr, bit CodeGenOnly = 1>{
7427let Predicates = [HasAVX512] in {
7428  let isCodeGenOnly = 1 in {
7429  def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7430              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7431              [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7432              EVEX, Sched<[sched]>;
7433  def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7434              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7435              [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7436              EVEX, Sched<[sched.Folded, ReadAfterLd]>;
7437  }
7438
7439  def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7440            !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7441           [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
7442                                 (i32 FROUND_CURRENT)))]>,
7443           EVEX, VEX_LIG, Sched<[sched]>;
7444  def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7445            !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7446            [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
7447                                  (i32 FROUND_NO_EXC)))]>,
7448                                  EVEX,VEX_LIG , EVEX_B, Sched<[sched]>;
7449  let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
7450  def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7451              (ins _SrcRC.IntScalarMemOp:$src),
7452              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7453              [(set _DstRC.RC:$dst, (OpNodeRnd
7454                                     (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src),
7455                                     (i32 FROUND_CURRENT)))]>,
7456              EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
7457
7458  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7459          (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7460  def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7461          (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7462} //HasAVX512
7463}
7464
7465multiclass avx512_cvt_s_all_unsigned<bits<8> opc, string asm,
7466                                     X86VectorVTInfo _SrcRC,
7467                                     X86VectorVTInfo _DstRC, SDNode OpNode,
7468                                     SDNode OpNodeRnd, X86FoldableSchedWrite sched,
7469                                     string aliasStr> :
7470  avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeRnd, sched,
7471                   aliasStr, 0> {
7472let Predicates = [HasAVX512] in {
7473  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7474          (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7475                                          _SrcRC.IntScalarMemOp:$src), 0, "att">;
7476}
7477}
7478
7479defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7480                        fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{l}">,
7481                        XS, EVEX_CD8<32, CD8VT1>;
7482defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7483                        fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{q}">,
7484                        VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7485defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7486                        fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{l}">,
7487                        XD, EVEX_CD8<64, CD8VT1>;
7488defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7489                        fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{q}">,
7490                        VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7491
7492defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info,
7493                        fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{l}">,
7494                        XS, EVEX_CD8<32, CD8VT1>;
7495defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info,
7496                        fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{q}">,
7497                        XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7498defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7499                        fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{l}">,
7500                        XD, EVEX_CD8<64, CD8VT1>;
7501defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7502                        fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{q}">,
7503                        XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7504
7505let Predicates = [HasAVX512] in {
7506  def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
7507            (VCVTTSS2SIZrr_Int VR128X:$src)>;
7508  def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
7509            (VCVTTSS2SIZrm_Int ssmem:$src)>;
7510  def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
7511            (VCVTTSS2SI64Zrr_Int VR128X:$src)>;
7512  def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
7513            (VCVTTSS2SI64Zrm_Int ssmem:$src)>;
7514  def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
7515            (VCVTTSD2SIZrr_Int VR128X:$src)>;
7516  def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
7517            (VCVTTSD2SIZrm_Int sdmem:$src)>;
7518  def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
7519            (VCVTTSD2SI64Zrr_Int VR128X:$src)>;
7520  def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
7521            (VCVTTSD2SI64Zrm_Int sdmem:$src)>;
7522} // HasAVX512
7523
7524//===----------------------------------------------------------------------===//
7525// AVX-512  Convert form float to double and back
7526//===----------------------------------------------------------------------===//
7527
7528multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7529                                X86VectorVTInfo _Src, SDNode OpNode,
7530                                X86FoldableSchedWrite sched> {
7531  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7532                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7533                         "$src2, $src1", "$src1, $src2",
7534                         (_.VT (OpNode (_.VT _.RC:$src1),
7535                                       (_Src.VT _Src.RC:$src2),
7536                                       (i32 FROUND_CURRENT)))>,
7537                         EVEX_4V, VEX_LIG, Sched<[sched]>;
7538  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7539                         (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7540                         "$src2, $src1", "$src1, $src2",
7541                         (_.VT (OpNode (_.VT _.RC:$src1),
7542                                  (_Src.VT _Src.ScalarIntMemCPat:$src2),
7543                                  (i32 FROUND_CURRENT)))>,
7544                         EVEX_4V, VEX_LIG,
7545                         Sched<[sched.Folded, ReadAfterLd]>;
7546
7547  let isCodeGenOnly = 1, hasSideEffects = 0 in {
7548    def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7549               (ins _.FRC:$src1, _Src.FRC:$src2),
7550               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7551               EVEX_4V, VEX_LIG, Sched<[sched]>;
7552    let mayLoad = 1 in
7553    def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7554               (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7555               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7556               EVEX_4V, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
7557  }
7558}
7559
7560// Scalar Coversion with SAE - suppress all exceptions
7561multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7562                                    X86VectorVTInfo _Src, SDNode OpNodeRnd,
7563                                    X86FoldableSchedWrite sched> {
7564  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7565                        (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7566                        "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7567                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7568                                         (_Src.VT _Src.RC:$src2),
7569                                         (i32 FROUND_NO_EXC)))>,
7570                        EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7571}
7572
7573// Scalar Conversion with rounding control (RC)
7574multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7575                                   X86VectorVTInfo _Src, SDNode OpNodeRnd,
7576                                   X86FoldableSchedWrite sched> {
7577  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7578                        (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7579                        "$rc, $src2, $src1", "$src1, $src2, $rc",
7580                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7581                                         (_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>,
7582                        EVEX_4V, VEX_LIG, Sched<[sched]>,
7583                        EVEX_B, EVEX_RC;
7584}
7585multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7586                                  SDNode OpNodeRnd, X86FoldableSchedWrite sched,
7587                                  X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7588  let Predicates = [HasAVX512] in {
7589    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
7590             avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7591                               OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7592  }
7593}
7594
7595multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
7596                                      X86FoldableSchedWrite sched,
7597                                      X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7598  let Predicates = [HasAVX512] in {
7599    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
7600             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
7601             EVEX_CD8<32, CD8VT1>, XS;
7602  }
7603}
7604defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
7605                                         X86froundRnd, WriteCvtSD2SS, f64x_info,
7606                                         f32x_info>;
7607defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
7608                                          X86fpextRnd, WriteCvtSS2SD, f32x_info,
7609                                          f64x_info>;
7610
7611def : Pat<(f64 (fpextend FR32X:$src)),
7612          (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7613          Requires<[HasAVX512]>;
7614def : Pat<(f64 (fpextend (loadf32 addr:$src))),
7615          (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7616          Requires<[HasAVX512, OptForSize]>;
7617
7618def : Pat<(f64 (extloadf32 addr:$src)),
7619          (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7620      Requires<[HasAVX512, OptForSize]>;
7621
7622def : Pat<(f64 (extloadf32 addr:$src)),
7623          (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
7624          Requires<[HasAVX512, OptForSpeed]>;
7625
7626def : Pat<(f32 (fpround FR64X:$src)),
7627          (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7628           Requires<[HasAVX512]>;
7629
7630def : Pat<(v4f32 (X86Movss
7631                   (v4f32 VR128X:$dst),
7632                   (v4f32 (scalar_to_vector
7633                     (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7634          (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7635          Requires<[HasAVX512]>;
7636
7637def : Pat<(v2f64 (X86Movsd
7638                   (v2f64 VR128X:$dst),
7639                   (v2f64 (scalar_to_vector
7640                     (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7641          (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7642          Requires<[HasAVX512]>;
7643
7644//===----------------------------------------------------------------------===//
7645// AVX-512  Vector convert from signed/unsigned integer to float/double
7646//          and from float/double to signed/unsigned integer
7647//===----------------------------------------------------------------------===//
7648
7649multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7650                          X86VectorVTInfo _Src, SDNode OpNode,
7651                          X86FoldableSchedWrite sched,
7652                          string Broadcast = _.BroadcastStr,
7653                          string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
7654
7655  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7656                         (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
7657                         (_.VT (OpNode (_Src.VT _Src.RC:$src)))>,
7658                         EVEX, Sched<[sched]>;
7659
7660  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7661                         (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
7662                         (_.VT (OpNode (_Src.VT
7663                             (bitconvert (_Src.LdFrag addr:$src)))))>,
7664                         EVEX, Sched<[sched.Folded]>;
7665
7666  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7667                         (ins _Src.ScalarMemOp:$src), OpcodeStr,
7668                         "${src}"##Broadcast, "${src}"##Broadcast,
7669                         (_.VT (OpNode (_Src.VT
7670                                  (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
7671                            ))>, EVEX, EVEX_B,
7672                         Sched<[sched.Folded]>;
7673}
7674// Coversion with SAE - suppress all exceptions
7675multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7676                              X86VectorVTInfo _Src, SDNode OpNodeRnd,
7677                              X86FoldableSchedWrite sched> {
7678  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7679                        (ins _Src.RC:$src), OpcodeStr,
7680                        "{sae}, $src", "$src, {sae}",
7681                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
7682                               (i32 FROUND_NO_EXC)))>,
7683                        EVEX, EVEX_B, Sched<[sched]>;
7684}
7685
7686// Conversion with rounding control (RC)
7687multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7688                         X86VectorVTInfo _Src, SDNode OpNodeRnd,
7689                         X86FoldableSchedWrite sched> {
7690  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7691                        (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7692                        "$rc, $src", "$src, $rc",
7693                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>,
7694                        EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7695}
7696
7697// Extend Float to Double
7698multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7699                           X86SchedWriteWidths sched> {
7700  let Predicates = [HasAVX512] in {
7701    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
7702                            fpextend, sched.ZMM>,
7703             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7704                                X86vfpextRnd, sched.ZMM>, EVEX_V512;
7705  }
7706  let Predicates = [HasVLX] in {
7707    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7708                               X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7709    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
7710                               sched.YMM>, EVEX_V256;
7711  }
7712}
7713
7714// Truncate Double to Float
7715multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7716  let Predicates = [HasAVX512] in {
7717    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, sched.ZMM>,
7718             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7719                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
7720  }
7721  let Predicates = [HasVLX] in {
7722    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7723                               X86vfpround, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
7724    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
7725                               sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7726
7727    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7728                    (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7729    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7730                    (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">;
7731    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7732                    (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7733    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7734                    (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">;
7735  }
7736}
7737
7738defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7739                                  VEX_W, PD, EVEX_CD8<64, CD8VF>;
7740defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7741                                  PS, EVEX_CD8<32, CD8VH>;
7742
7743def : Pat<(v8f64 (extloadv8f32 addr:$src)),
7744            (VCVTPS2PDZrm addr:$src)>;
7745
7746let Predicates = [HasVLX] in {
7747  def : Pat<(X86vzmovl (v2f64 (bitconvert
7748                               (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
7749            (VCVTPD2PSZ128rr VR128X:$src)>;
7750  def : Pat<(X86vzmovl (v2f64 (bitconvert
7751                               (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
7752            (VCVTPD2PSZ128rm addr:$src)>;
7753  def : Pat<(v2f64 (extloadv2f32 addr:$src)),
7754              (VCVTPS2PDZ128rm addr:$src)>;
7755  def : Pat<(v4f64 (extloadv4f32 addr:$src)),
7756              (VCVTPS2PDZ256rm addr:$src)>;
7757}
7758
7759// Convert Signed/Unsigned Doubleword to Double
7760multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7761                           SDNode OpNode128, X86SchedWriteWidths sched> {
7762  // No rounding in this op
7763  let Predicates = [HasAVX512] in
7764    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7765                            sched.ZMM>, EVEX_V512;
7766
7767  let Predicates = [HasVLX] in {
7768    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7769                               OpNode128, sched.XMM, "{1to2}", "", i64mem>, EVEX_V128;
7770    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7771                               sched.YMM>, EVEX_V256;
7772  }
7773}
7774
7775// Convert Signed/Unsigned Doubleword to Float
7776multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7777                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7778  let Predicates = [HasAVX512] in
7779    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7780                            sched.ZMM>,
7781             avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7782                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7783
7784  let Predicates = [HasVLX] in {
7785    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7786                               sched.XMM>, EVEX_V128;
7787    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7788                               sched.YMM>, EVEX_V256;
7789  }
7790}
7791
7792// Convert Float to Signed/Unsigned Doubleword with truncation
7793multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7794                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7795  let Predicates = [HasAVX512] in {
7796    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7797                            sched.ZMM>,
7798             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7799                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7800  }
7801  let Predicates = [HasVLX] in {
7802    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7803                               sched.XMM>, EVEX_V128;
7804    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7805                               sched.YMM>, EVEX_V256;
7806  }
7807}
7808
7809// Convert Float to Signed/Unsigned Doubleword
7810multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7811                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7812  let Predicates = [HasAVX512] in {
7813    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7814                            sched.ZMM>,
7815             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7816                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7817  }
7818  let Predicates = [HasVLX] in {
7819    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7820                               sched.XMM>, EVEX_V128;
7821    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7822                               sched.YMM>, EVEX_V256;
7823  }
7824}
7825
7826// Convert Double to Signed/Unsigned Doubleword with truncation
7827multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7828                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7829  let Predicates = [HasAVX512] in {
7830    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7831                            sched.ZMM>,
7832             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7833                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7834  }
7835  let Predicates = [HasVLX] in {
7836    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7837    // memory forms of these instructions in Asm Parser. They have the same
7838    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7839    // due to the same reason.
7840    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7841                               OpNode, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
7842    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7843                               sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7844
7845    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7846                    (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7847    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7848                    (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">;
7849    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7850                    (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7851    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7852                    (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">;
7853  }
7854}
7855
7856// Convert Double to Signed/Unsigned Doubleword
7857multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7858                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7859  let Predicates = [HasAVX512] in {
7860    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7861                            sched.ZMM>,
7862             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7863                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7864  }
7865  let Predicates = [HasVLX] in {
7866    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7867    // memory forms of these instructions in Asm Parcer. They have the same
7868    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7869    // due to the same reason.
7870    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
7871                               sched.XMM, "{1to2}", "{x}">, EVEX_V128;
7872    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7873                               sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7874
7875    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7876                    (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7877    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7878                    (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">;
7879    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7880                    (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7881    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7882                    (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">;
7883  }
7884}
7885
7886// Convert Double to Signed/Unsigned Quardword
7887multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7888                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7889  let Predicates = [HasDQI] in {
7890    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7891                            sched.ZMM>,
7892             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7893                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7894  }
7895  let Predicates = [HasDQI, HasVLX] in {
7896    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7897                               sched.XMM>, EVEX_V128;
7898    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7899                               sched.YMM>, EVEX_V256;
7900  }
7901}
7902
7903// Convert Double to Signed/Unsigned Quardword with truncation
7904multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7905                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7906  let Predicates = [HasDQI] in {
7907    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7908                            sched.ZMM>,
7909             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7910                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7911  }
7912  let Predicates = [HasDQI, HasVLX] in {
7913    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7914                               sched.XMM>, EVEX_V128;
7915    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7916                               sched.YMM>, EVEX_V256;
7917  }
7918}
7919
7920// Convert Signed/Unsigned Quardword to Double
7921multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7922                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7923  let Predicates = [HasDQI] in {
7924    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7925                            sched.ZMM>,
7926             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7927                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7928  }
7929  let Predicates = [HasDQI, HasVLX] in {
7930    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7931                               sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
7932    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7933                               sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
7934  }
7935}
7936
7937// Convert Float to Signed/Unsigned Quardword
7938multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7939                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7940  let Predicates = [HasDQI] in {
7941    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7942                            sched.ZMM>,
7943             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7944                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7945  }
7946  let Predicates = [HasDQI, HasVLX] in {
7947    // Explicitly specified broadcast string, since we take only 2 elements
7948    // from v4f32x_info source
7949    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7950                               sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7951    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7952                               sched.YMM>, EVEX_V256;
7953  }
7954}
7955
7956// Convert Float to Signed/Unsigned Quardword with truncation
7957multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7958                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7959  let Predicates = [HasDQI] in {
7960    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
7961             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
7962                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7963  }
7964  let Predicates = [HasDQI, HasVLX] in {
7965    // Explicitly specified broadcast string, since we take only 2 elements
7966    // from v4f32x_info source
7967    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7968                               sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7969    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7970                               sched.YMM>, EVEX_V256;
7971  }
7972}
7973
7974// Convert Signed/Unsigned Quardword to Float
7975multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7976                           SDNode OpNode128, SDNode OpNodeRnd,
7977                           X86SchedWriteWidths sched> {
7978  let Predicates = [HasDQI] in {
7979    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
7980                            sched.ZMM>,
7981             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
7982                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7983  }
7984  let Predicates = [HasDQI, HasVLX] in {
7985    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7986    // memory forms of these instructions in Asm Parcer. They have the same
7987    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7988    // due to the same reason.
7989    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
7990                               sched.XMM, "{1to2}", "{x}">, EVEX_V128,
7991                               NotEVEX2VEXConvertible;
7992    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
7993                               sched.YMM, "{1to4}", "{y}">, EVEX_V256,
7994                               NotEVEX2VEXConvertible;
7995
7996    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7997                    (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7998    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7999                    (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">;
8000    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8001                    (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
8002    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8003                    (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">;
8004  }
8005}
8006
8007defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
8008                                 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8009
8010defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
8011                                X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8012                                PS, EVEX_CD8<32, CD8VF>;
8013
8014defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si,
8015                                X86cvttp2siRnd, SchedWriteCvtPS2DQ>,
8016                                XS, EVEX_CD8<32, CD8VF>;
8017
8018defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si,
8019                                 X86cvttp2siRnd, SchedWriteCvtPD2DQ>,
8020                                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
8021
8022defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui,
8023                                 X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PS,
8024                                 EVEX_CD8<32, CD8VF>;
8025
8026defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui,
8027                                 X86cvttp2uiRnd, SchedWriteCvtPD2DQ>,
8028                                 PS, VEX_W, EVEX_CD8<64, CD8VF>;
8029
8030defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
8031                                  X86VUintToFP, SchedWriteCvtDQ2PD>, XS,
8032                                  EVEX_CD8<32, CD8VH>;
8033
8034defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
8035                                 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
8036                                 EVEX_CD8<32, CD8VF>;
8037
8038defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
8039                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8040                                 EVEX_CD8<32, CD8VF>;
8041
8042defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
8043                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8044                                 VEX_W, EVEX_CD8<64, CD8VF>;
8045
8046defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
8047                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8048                                 PS, EVEX_CD8<32, CD8VF>;
8049
8050defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
8051                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8052                                 PS, EVEX_CD8<64, CD8VF>;
8053
8054defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
8055                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8056                                 PD, EVEX_CD8<64, CD8VF>;
8057
8058defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
8059                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8060                                 EVEX_CD8<32, CD8VH>;
8061
8062defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
8063                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8064                                 PD, EVEX_CD8<64, CD8VF>;
8065
8066defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
8067                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8068                                 EVEX_CD8<32, CD8VH>;
8069
8070defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si,
8071                                 X86cvttp2siRnd, SchedWriteCvtPD2DQ>, VEX_W,
8072                                 PD, EVEX_CD8<64, CD8VF>;
8073
8074defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si,
8075                                 X86cvttp2siRnd, SchedWriteCvtPS2DQ>, PD,
8076                                 EVEX_CD8<32, CD8VH>;
8077
8078defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui,
8079                                 X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, VEX_W,
8080                                 PD, EVEX_CD8<64, CD8VF>;
8081
8082defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui,
8083                                 X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PD,
8084                                 EVEX_CD8<32, CD8VH>;
8085
8086defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
8087                            X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8088                            EVEX_CD8<64, CD8VF>;
8089
8090defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
8091                            X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8092                            EVEX_CD8<64, CD8VF>;
8093
8094defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
8095                            X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
8096                            EVEX_CD8<64, CD8VF>;
8097
8098defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
8099                            X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
8100                            EVEX_CD8<64, CD8VF>;
8101
8102let Predicates = [HasAVX512] in  {
8103  def : Pat<(v16i32 (fp_to_sint (v16f32 VR512:$src))),
8104            (VCVTTPS2DQZrr VR512:$src)>;
8105  def : Pat<(v16i32 (fp_to_sint (loadv16f32 addr:$src))),
8106            (VCVTTPS2DQZrm addr:$src)>;
8107
8108  def : Pat<(v16i32 (fp_to_uint (v16f32 VR512:$src))),
8109            (VCVTTPS2UDQZrr VR512:$src)>;
8110  def : Pat<(v16i32 (fp_to_uint (loadv16f32 addr:$src))),
8111            (VCVTTPS2UDQZrm addr:$src)>;
8112
8113  def : Pat<(v8i32 (fp_to_sint (v8f64 VR512:$src))),
8114            (VCVTTPD2DQZrr VR512:$src)>;
8115  def : Pat<(v8i32 (fp_to_sint (loadv8f64 addr:$src))),
8116            (VCVTTPD2DQZrm addr:$src)>;
8117
8118  def : Pat<(v8i32 (fp_to_uint (v8f64 VR512:$src))),
8119            (VCVTTPD2UDQZrr VR512:$src)>;
8120  def : Pat<(v8i32 (fp_to_uint (loadv8f64 addr:$src))),
8121            (VCVTTPD2UDQZrm addr:$src)>;
8122}
8123
8124let Predicates = [HasVLX] in {
8125  def : Pat<(v4i32 (fp_to_sint (v4f32 VR128X:$src))),
8126            (VCVTTPS2DQZ128rr VR128X:$src)>;
8127  def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))),
8128            (VCVTTPS2DQZ128rm addr:$src)>;
8129
8130  def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src))),
8131            (VCVTTPS2UDQZ128rr VR128X:$src)>;
8132  def : Pat<(v4i32 (fp_to_uint (loadv4f32 addr:$src))),
8133            (VCVTTPS2UDQZ128rm addr:$src)>;
8134
8135  def : Pat<(v8i32 (fp_to_sint (v8f32 VR256X:$src))),
8136            (VCVTTPS2DQZ256rr VR256X:$src)>;
8137  def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))),
8138            (VCVTTPS2DQZ256rm addr:$src)>;
8139
8140  def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src))),
8141            (VCVTTPS2UDQZ256rr VR256X:$src)>;
8142  def : Pat<(v8i32 (fp_to_uint (loadv8f32 addr:$src))),
8143            (VCVTTPS2UDQZ256rm addr:$src)>;
8144
8145  def : Pat<(v4i32 (fp_to_sint (v4f64 VR256X:$src))),
8146            (VCVTTPD2DQZ256rr VR256X:$src)>;
8147  def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
8148            (VCVTTPD2DQZ256rm addr:$src)>;
8149
8150  def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src))),
8151            (VCVTTPD2UDQZ256rr VR256X:$src)>;
8152  def : Pat<(v4i32 (fp_to_uint (loadv4f64 addr:$src))),
8153            (VCVTTPD2UDQZ256rm addr:$src)>;
8154}
8155
8156let Predicates = [HasDQI] in {
8157  def : Pat<(v8i64 (fp_to_sint (v8f32 VR256X:$src))),
8158            (VCVTTPS2QQZrr VR256X:$src)>;
8159  def : Pat<(v8i64 (fp_to_sint (loadv8f32 addr:$src))),
8160            (VCVTTPS2QQZrm addr:$src)>;
8161
8162  def : Pat<(v8i64 (fp_to_uint (v8f32 VR256X:$src))),
8163            (VCVTTPS2UQQZrr VR256X:$src)>;
8164  def : Pat<(v8i64 (fp_to_uint (loadv8f32 addr:$src))),
8165            (VCVTTPS2UQQZrm addr:$src)>;
8166
8167  def : Pat<(v8i64 (fp_to_sint (v8f64 VR512:$src))),
8168            (VCVTTPD2QQZrr VR512:$src)>;
8169  def : Pat<(v8i64 (fp_to_sint (loadv8f64 addr:$src))),
8170            (VCVTTPD2QQZrm addr:$src)>;
8171
8172  def : Pat<(v8i64 (fp_to_uint (v8f64 VR512:$src))),
8173            (VCVTTPD2UQQZrr VR512:$src)>;
8174  def : Pat<(v8i64 (fp_to_uint (loadv8f64 addr:$src))),
8175            (VCVTTPD2UQQZrm addr:$src)>;
8176}
8177
8178let Predicates = [HasDQI, HasVLX] in {
8179  def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src))),
8180            (VCVTTPS2QQZ256rr VR128X:$src)>;
8181  def : Pat<(v4i64 (fp_to_sint (loadv4f32 addr:$src))),
8182            (VCVTTPS2QQZ256rm addr:$src)>;
8183
8184  def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src))),
8185            (VCVTTPS2UQQZ256rr VR128X:$src)>;
8186  def : Pat<(v4i64 (fp_to_uint (loadv4f32 addr:$src))),
8187            (VCVTTPS2UQQZ256rm addr:$src)>;
8188
8189  def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src))),
8190            (VCVTTPD2QQZ128rr VR128X:$src)>;
8191  def : Pat<(v2i64 (fp_to_sint (loadv2f64 addr:$src))),
8192            (VCVTTPD2QQZ128rm addr:$src)>;
8193
8194  def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src))),
8195            (VCVTTPD2UQQZ128rr VR128X:$src)>;
8196  def : Pat<(v2i64 (fp_to_uint (loadv2f64 addr:$src))),
8197            (VCVTTPD2UQQZ128rm addr:$src)>;
8198
8199  def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src))),
8200            (VCVTTPD2QQZ256rr VR256X:$src)>;
8201  def : Pat<(v4i64 (fp_to_sint (loadv4f64 addr:$src))),
8202            (VCVTTPD2QQZ256rm addr:$src)>;
8203
8204  def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src))),
8205            (VCVTTPD2UQQZ256rr VR256X:$src)>;
8206  def : Pat<(v4i64 (fp_to_uint (loadv4f64 addr:$src))),
8207            (VCVTTPD2UQQZ256rm addr:$src)>;
8208}
8209
8210let Predicates = [HasAVX512, NoVLX] in {
8211def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
8212          (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8213           (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8214                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8215
8216def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
8217          (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8218           (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8219                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8220
8221def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
8222          (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
8223           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8224                                 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8225
8226def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
8227          (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8228           (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8229                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8230
8231def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
8232          (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8233           (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8234                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8235
8236def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
8237          (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8238           (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8239                                 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8240
8241def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
8242          (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8243           (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8244                                 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8245}
8246
8247let Predicates = [HasAVX512, HasVLX] in {
8248  def : Pat<(X86vzmovl (v2i64 (bitconvert
8249                              (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
8250            (VCVTPD2DQZ128rr VR128X:$src)>;
8251  def : Pat<(X86vzmovl (v2i64 (bitconvert
8252                              (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
8253            (VCVTPD2DQZ128rm addr:$src)>;
8254  def : Pat<(X86vzmovl (v2i64 (bitconvert
8255                               (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
8256            (VCVTPD2UDQZ128rr VR128X:$src)>;
8257  def : Pat<(X86vzmovl (v2i64 (bitconvert
8258                              (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
8259            (VCVTTPD2DQZ128rr VR128X:$src)>;
8260  def : Pat<(X86vzmovl (v2i64 (bitconvert
8261                              (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
8262            (VCVTTPD2DQZ128rm addr:$src)>;
8263  def : Pat<(X86vzmovl (v2i64 (bitconvert
8264                               (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
8265            (VCVTTPD2UDQZ128rr VR128X:$src)>;
8266
8267  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8268            (VCVTDQ2PDZ128rm addr:$src)>;
8269  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
8270            (VCVTDQ2PDZ128rm addr:$src)>;
8271
8272  def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8273            (VCVTUDQ2PDZ128rm addr:$src)>;
8274  def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
8275            (VCVTUDQ2PDZ128rm addr:$src)>;
8276}
8277
8278let Predicates = [HasAVX512] in {
8279  def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
8280            (VCVTPD2PSZrm addr:$src)>;
8281  def : Pat<(v8f64 (extloadv8f32 addr:$src)),
8282            (VCVTPS2PDZrm addr:$src)>;
8283}
8284
8285let Predicates = [HasDQI, HasVLX] in {
8286  def : Pat<(X86vzmovl (v2f64 (bitconvert
8287                              (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
8288            (VCVTQQ2PSZ128rr VR128X:$src)>;
8289  def : Pat<(X86vzmovl (v2f64 (bitconvert
8290                              (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
8291            (VCVTUQQ2PSZ128rr VR128X:$src)>;
8292}
8293
8294let Predicates = [HasDQI, NoVLX] in {
8295def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
8296          (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8297           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8298                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8299
8300def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
8301          (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
8302           (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8303                                  VR128X:$src1, sub_xmm)))), sub_ymm)>;
8304
8305def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
8306          (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8307           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8308                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8309
8310def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
8311          (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8312           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8313                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8314
8315def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
8316          (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
8317           (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8318                                  VR128X:$src1, sub_xmm)))), sub_ymm)>;
8319
8320def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
8321          (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8322           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8323                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8324
8325def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
8326          (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
8327           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8328                                  VR256X:$src1, sub_ymm)))), sub_xmm)>;
8329
8330def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
8331          (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8332           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8333                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8334
8335def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
8336          (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8337           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8338                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8339
8340def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
8341          (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
8342           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8343                                  VR256X:$src1, sub_ymm)))), sub_xmm)>;
8344
8345def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
8346          (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8347           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8348                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8349
8350def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
8351          (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8352           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8353                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8354}
8355
8356//===----------------------------------------------------------------------===//
8357// Half precision conversion instructions
8358//===----------------------------------------------------------------------===//
8359
8360multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8361                           X86MemOperand x86memop, PatFrag ld_frag,
8362                           X86FoldableSchedWrite sched> {
8363  defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8364                            (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8365                            (X86cvtph2ps (_src.VT _src.RC:$src))>,
8366                            T8PD, Sched<[sched]>;
8367  defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8368                            (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8369                            (X86cvtph2ps (_src.VT
8370                                          (bitconvert
8371                                           (ld_frag addr:$src))))>,
8372                            T8PD, Sched<[sched.Folded]>;
8373}
8374
8375multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8376                               X86FoldableSchedWrite sched> {
8377  defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8378                             (ins _src.RC:$src), "vcvtph2ps",
8379                             "{sae}, $src", "$src, {sae}",
8380                             (X86cvtph2psRnd (_src.VT _src.RC:$src),
8381                                             (i32 FROUND_NO_EXC))>,
8382                             T8PD, EVEX_B, Sched<[sched]>;
8383}
8384
8385let Predicates = [HasAVX512] in
8386  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
8387                                    WriteCvtPH2PSZ>,
8388                    avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8389                    EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8390
8391let Predicates = [HasVLX] in {
8392  defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8393                       loadv2i64, WriteCvtPH2PSY>, EVEX, EVEX_V256,
8394                       EVEX_CD8<32, CD8VH>;
8395  defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8396                       loadv2i64, WriteCvtPH2PS>, EVEX, EVEX_V128,
8397                       EVEX_CD8<32, CD8VH>;
8398
8399  // Pattern match vcvtph2ps of a scalar i64 load.
8400  def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
8401            (VCVTPH2PSZ128rm addr:$src)>;
8402  def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
8403            (VCVTPH2PSZ128rm addr:$src)>;
8404  def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
8405              (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8406            (VCVTPH2PSZ128rm addr:$src)>;
8407}
8408
8409multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8410                           X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8411  defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
8412                   (ins _src.RC:$src1, i32u8imm:$src2),
8413                   "vcvtps2ph", "$src2, $src1", "$src1, $src2",
8414                   (X86cvtps2ph (_src.VT _src.RC:$src1),
8415                                (i32 imm:$src2)), 0, 0>,
8416                   AVX512AIi8Base, Sched<[RR]>;
8417  let hasSideEffects = 0, mayStore = 1 in {
8418    def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8419               (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8420               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8421               Sched<[MR]>;
8422    def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8423               (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8424               "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8425                EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8426  }
8427}
8428
8429multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8430                               SchedWrite Sched> {
8431  let hasSideEffects = 0 in
8432  defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8433                   (outs _dest.RC:$dst),
8434                   (ins _src.RC:$src1, i32u8imm:$src2),
8435                   "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8436                   EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8437}
8438
8439let Predicates = [HasAVX512] in {
8440  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8441                                    WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8442                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8443                                        EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8444  let Predicates = [HasVLX] in {
8445    defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8446                                         WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8447                                         EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8448    defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8449                                         WriteCvtPS2PH, WriteCvtPS2PHSt>,
8450                                         EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8451  }
8452
8453  def : Pat<(store (f64 (extractelt
8454                         (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8455                         (iPTR 0))), addr:$dst),
8456            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8457  def : Pat<(store (i64 (extractelt
8458                         (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8459                         (iPTR 0))), addr:$dst),
8460            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8461  def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
8462            (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
8463  def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
8464            (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
8465}
8466
8467// Patterns for matching conversions from float to half-float and vice versa.
8468let Predicates = [HasVLX] in {
8469  // Use MXCSR.RC for rounding instead of explicitly specifying the default
8470  // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
8471  // configurations we support (the default). However, falling back to MXCSR is
8472  // more consistent with other instructions, which are always controlled by it.
8473  // It's encoded as 0b100.
8474  def : Pat<(fp_to_f16 FR32X:$src),
8475            (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr
8476              (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>;
8477
8478  def : Pat<(f16_to_fp GR16:$src),
8479            (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8480              (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >;
8481
8482  def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
8483            (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8484              (v8i16 (VCVTPS2PHZ128rr
8485               (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
8486}
8487
8488//  Unordered/Ordered scalar fp compare with Sea and set EFLAGS
8489multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8490                            string OpcodeStr, X86FoldableSchedWrite sched> {
8491  let hasSideEffects = 0 in
8492  def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8493                  !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8494                  EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8495}
8496
8497let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8498  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>,
8499                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8500  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>,
8501                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8502  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>,
8503                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8504  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>,
8505                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8506}
8507
8508let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8509  defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
8510                                 "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8511                                 EVEX_CD8<32, CD8VT1>;
8512  defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
8513                                  "ucomisd", WriteFCom>, PD, EVEX,
8514                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8515  let Pattern = []<dag> in {
8516    defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
8517                                   "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8518                                   EVEX_CD8<32, CD8VT1>;
8519    defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
8520                                   "comisd", WriteFCom>, PD, EVEX,
8521                                    VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8522  }
8523  let isCodeGenOnly = 1 in {
8524    defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8525                          sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8526                          EVEX_CD8<32, CD8VT1>;
8527    defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8528                          sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX,
8529                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8530
8531    defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8532                          sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8533                          EVEX_CD8<32, CD8VT1>;
8534    defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8535                          sse_load_f64, "comisd", WriteFCom>, PD, EVEX,
8536                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8537  }
8538}
8539
8540/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8541multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8542                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8543  let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
8544  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8545                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8546                           "$src2, $src1", "$src1, $src2",
8547                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8548                           EVEX_4V, Sched<[sched]>;
8549  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8550                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8551                         "$src2, $src1", "$src1, $src2",
8552                         (OpNode (_.VT _.RC:$src1),
8553                          _.ScalarIntMemCPat:$src2)>, EVEX_4V,
8554                          Sched<[sched.Folded, ReadAfterLd]>;
8555}
8556}
8557
8558defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8559                               f32x_info>, EVEX_CD8<32, CD8VT1>,
8560                               T8PD;
8561defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8562                               f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8563                               T8PD;
8564defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8565                                 SchedWriteFRsqrt.Scl, f32x_info>,
8566                                 EVEX_CD8<32, CD8VT1>, T8PD;
8567defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8568                                 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8569                                 EVEX_CD8<64, CD8VT1>, T8PD;
8570
8571/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8572multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8573                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8574  let ExeDomain = _.ExeDomain in {
8575  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8576                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8577                         (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
8578                         Sched<[sched]>;
8579  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8580                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8581                         (OpNode (_.VT
8582                           (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8583                         Sched<[sched.Folded, ReadAfterLd]>;
8584  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8585                          (ins _.ScalarMemOp:$src), OpcodeStr,
8586                          "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8587                          (OpNode (_.VT
8588                            (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8589                          EVEX, T8PD, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
8590  }
8591}
8592
8593multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8594                                X86SchedWriteWidths sched> {
8595  defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8596                           v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8597  defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8598                           v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8599
8600  // Define only if AVX512VL feature is present.
8601  let Predicates = [HasVLX] in {
8602    defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8603                                OpNode, sched.XMM, v4f32x_info>,
8604                               EVEX_V128, EVEX_CD8<32, CD8VF>;
8605    defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8606                                OpNode, sched.YMM, v8f32x_info>,
8607                               EVEX_V256, EVEX_CD8<32, CD8VF>;
8608    defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8609                                OpNode, sched.XMM, v2f64x_info>,
8610                               EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8611    defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8612                                OpNode, sched.YMM, v4f64x_info>,
8613                               EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8614  }
8615}
8616
8617defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8618defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8619
8620/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8621multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8622                         SDNode OpNode, X86FoldableSchedWrite sched> {
8623  let ExeDomain = _.ExeDomain in {
8624  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8625                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8626                           "$src2, $src1", "$src1, $src2",
8627                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8628                           (i32 FROUND_CURRENT))>,
8629                           Sched<[sched]>;
8630
8631  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8632                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8633                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8634                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8635                            (i32 FROUND_NO_EXC))>, EVEX_B,
8636                            Sched<[sched]>;
8637
8638  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8639                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8640                         "$src2, $src1", "$src1, $src2",
8641                         (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
8642                         (i32 FROUND_CURRENT))>,
8643                         Sched<[sched.Folded, ReadAfterLd]>;
8644  }
8645}
8646
8647multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8648                        X86FoldableSchedWrite sched> {
8649  defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, sched>,
8650               EVEX_CD8<32, CD8VT1>;
8651  defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, sched>,
8652               EVEX_CD8<64, CD8VT1>, VEX_W;
8653}
8654
8655let Predicates = [HasERI] in {
8656  defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SchedWriteFRcp.Scl>,
8657                              T8PD, EVEX_4V;
8658  defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s,
8659                               SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8660}
8661
8662defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds,
8663                              SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8664/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8665
8666multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8667                         SDNode OpNode, X86FoldableSchedWrite sched> {
8668  let ExeDomain = _.ExeDomain in {
8669  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8670                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8671                         (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>,
8672                         Sched<[sched]>;
8673
8674  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8675                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8676                         (OpNode (_.VT
8677                             (bitconvert (_.LdFrag addr:$src))),
8678                          (i32 FROUND_CURRENT))>,
8679                          Sched<[sched.Folded, ReadAfterLd]>;
8680
8681  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8682                         (ins _.ScalarMemOp:$src), OpcodeStr,
8683                         "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8684                         (OpNode (_.VT
8685                                  (X86VBroadcast (_.ScalarLdFrag addr:$src))),
8686                                 (i32 FROUND_CURRENT))>, EVEX_B,
8687                         Sched<[sched.Folded, ReadAfterLd]>;
8688  }
8689}
8690multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8691                         SDNode OpNode, X86FoldableSchedWrite sched> {
8692  let ExeDomain = _.ExeDomain in
8693  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8694                        (ins _.RC:$src), OpcodeStr,
8695                        "{sae}, $src", "$src, {sae}",
8696                        (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>,
8697                        EVEX_B, Sched<[sched]>;
8698}
8699
8700multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8701                       X86SchedWriteWidths sched> {
8702   defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8703              avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8704              T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8705   defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8706              avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8707              T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8708}
8709
8710multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8711                                  SDNode OpNode, X86SchedWriteWidths sched> {
8712  // Define only if AVX512VL feature is present.
8713  let Predicates = [HasVLX] in {
8714    defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, sched.XMM>,
8715                                     EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8716    defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, sched.YMM>,
8717                                     EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8718    defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, sched.XMM>,
8719                                     EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8720    defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, sched.YMM>,
8721                                     EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8722  }
8723}
8724
8725let Predicates = [HasERI] in {
8726 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SchedWriteFRsqrt>, EVEX;
8727 defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, SchedWriteFRcp>, EVEX;
8728 defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, SchedWriteFAdd>, EVEX;
8729}
8730defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFRnd>,
8731                 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
8732                                          SchedWriteFRnd>, EVEX;
8733
8734multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8735                                    X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8736  let ExeDomain = _.ExeDomain in
8737  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8738                         (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8739                         (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc)))>,
8740                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8741}
8742
8743multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8744                              X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8745  let ExeDomain = _.ExeDomain in {
8746  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8747                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8748                         (_.VT (fsqrt _.RC:$src))>, EVEX,
8749                         Sched<[sched]>;
8750  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8751                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8752                         (fsqrt (_.VT
8753                           (bitconvert (_.LdFrag addr:$src))))>, EVEX,
8754                           Sched<[sched.Folded, ReadAfterLd]>;
8755  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8756                          (ins _.ScalarMemOp:$src), OpcodeStr,
8757                          "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8758                          (fsqrt (_.VT
8759                            (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8760                          EVEX, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
8761  }
8762}
8763
8764multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8765                                  X86SchedWriteSizes sched> {
8766  defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8767                                sched.PS.ZMM, v16f32_info>,
8768                                EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8769  defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8770                                sched.PD.ZMM, v8f64_info>,
8771                                EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8772  // Define only if AVX512VL feature is present.
8773  let Predicates = [HasVLX] in {
8774    defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8775                                     sched.PS.XMM, v4f32x_info>,
8776                                     EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8777    defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8778                                     sched.PS.YMM, v8f32x_info>,
8779                                     EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8780    defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8781                                     sched.PD.XMM, v2f64x_info>,
8782                                     EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8783    defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8784                                     sched.PD.YMM, v4f64x_info>,
8785                                     EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8786  }
8787}
8788
8789multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8790                                        X86SchedWriteSizes sched> {
8791  defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8792                                      sched.PS.ZMM, v16f32_info>,
8793                                      EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8794  defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8795                                      sched.PD.ZMM, v8f64_info>,
8796                                      EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8797}
8798
8799multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8800                              X86VectorVTInfo _, string Name> {
8801  let ExeDomain = _.ExeDomain in {
8802    defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8803                         (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8804                         "$src2, $src1", "$src1, $src2",
8805                         (X86fsqrtRnds (_.VT _.RC:$src1),
8806                                    (_.VT _.RC:$src2),
8807                                    (i32 FROUND_CURRENT))>,
8808                         Sched<[sched]>;
8809    defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8810                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8811                         "$src2, $src1", "$src1, $src2",
8812                         (X86fsqrtRnds (_.VT _.RC:$src1),
8813                                    _.ScalarIntMemCPat:$src2,
8814                                    (i32 FROUND_CURRENT))>,
8815                         Sched<[sched.Folded, ReadAfterLd]>;
8816    defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8817                         (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8818                         "$rc, $src2, $src1", "$src1, $src2, $rc",
8819                         (X86fsqrtRnds (_.VT _.RC:$src1),
8820                                     (_.VT _.RC:$src2),
8821                                     (i32 imm:$rc))>,
8822                         EVEX_B, EVEX_RC, Sched<[sched]>;
8823
8824    let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
8825      def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8826                (ins _.FRC:$src1, _.FRC:$src2),
8827                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8828                Sched<[sched]>;
8829      let mayLoad = 1 in
8830        def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8831                  (ins _.FRC:$src1, _.ScalarMemOp:$src2),
8832                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8833                  Sched<[sched.Folded, ReadAfterLd]>;
8834    }
8835  }
8836
8837  let Predicates = [HasAVX512] in {
8838    def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
8839              (!cast<Instruction>(Name#Zr)
8840                  (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
8841  }
8842
8843  let Predicates = [HasAVX512, OptForSize] in {
8844    def : Pat<(_.EltVT (fsqrt (load addr:$src))),
8845              (!cast<Instruction>(Name#Zm)
8846                  (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
8847  }
8848}
8849
8850multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
8851                                  X86SchedWriteSizes sched> {
8852  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
8853                        EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
8854  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
8855                        EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
8856}
8857
8858defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
8859             avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
8860
8861defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
8862
8863multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
8864                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8865  let ExeDomain = _.ExeDomain in {
8866  defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8867                           (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8868                           "$src3, $src2, $src1", "$src1, $src2, $src3",
8869                           (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8870                           (i32 imm:$src3)))>,
8871                           Sched<[sched]>;
8872
8873  defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8874                         (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8875                         "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
8876                         (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8877                         (i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B,
8878                         Sched<[sched]>;
8879
8880  defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8881                         (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
8882                         OpcodeStr,
8883                         "$src3, $src2, $src1", "$src1, $src2, $src3",
8884                         (_.VT (X86RndScales _.RC:$src1,
8885                                _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>,
8886                         Sched<[sched.Folded, ReadAfterLd]>;
8887
8888  let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
8889    def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8890               (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
8891               OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8892               []>, Sched<[sched]>;
8893
8894    let mayLoad = 1 in
8895      def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8896                 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8897                 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8898                 []>, Sched<[sched.Folded, ReadAfterLd]>;
8899  }
8900  }
8901
8902  let Predicates = [HasAVX512] in {
8903    def : Pat<(ffloor _.FRC:$src),
8904              (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8905               _.FRC:$src, (i32 0x9)))>;
8906    def : Pat<(fceil _.FRC:$src),
8907              (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8908               _.FRC:$src, (i32 0xa)))>;
8909    def : Pat<(ftrunc _.FRC:$src),
8910              (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8911               _.FRC:$src, (i32 0xb)))>;
8912    def : Pat<(frint _.FRC:$src),
8913              (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8914               _.FRC:$src, (i32 0x4)))>;
8915    def : Pat<(fnearbyint _.FRC:$src),
8916              (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8917               _.FRC:$src, (i32 0xc)))>;
8918  }
8919
8920  let Predicates = [HasAVX512, OptForSize] in {
8921    def : Pat<(ffloor (_.ScalarLdFrag addr:$src)),
8922              (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8923               addr:$src, (i32 0x9)))>;
8924    def : Pat<(fceil (_.ScalarLdFrag addr:$src)),
8925              (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8926               addr:$src, (i32 0xa)))>;
8927    def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)),
8928              (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8929               addr:$src, (i32 0xb)))>;
8930    def : Pat<(frint (_.ScalarLdFrag addr:$src)),
8931              (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8932               addr:$src, (i32 0x4)))>;
8933    def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)),
8934              (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8935               addr:$src, (i32 0xc)))>;
8936  }
8937}
8938
8939defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
8940                                           SchedWriteFRnd.Scl, f32x_info>,
8941                                           AVX512AIi8Base, EVEX_4V,
8942                                           EVEX_CD8<32, CD8VT1>;
8943
8944defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
8945                                           SchedWriteFRnd.Scl, f64x_info>,
8946                                           VEX_W, AVX512AIi8Base, EVEX_4V,
8947                                           EVEX_CD8<64, CD8VT1>;
8948
8949multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
8950                                dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
8951                                dag OutMask, Predicate BasePredicate> {
8952  let Predicates = [BasePredicate] in {
8953    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
8954               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
8955               (extractelt _.VT:$dst, (iPTR 0))))),
8956              (!cast<Instruction>("V"#OpcPrefix#r_Intk)
8957               _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
8958
8959    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
8960               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
8961               ZeroFP))),
8962              (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
8963               OutMask, _.VT:$src2, _.VT:$src1)>;
8964  }
8965}
8966
8967defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
8968                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
8969                            fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
8970defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
8971                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
8972                            fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
8973
8974multiclass avx512_masked_scalar_imm<SDNode OpNode, string OpcPrefix, SDNode Move,
8975                                    X86VectorVTInfo _, PatLeaf ZeroFP,
8976                                    bits<8> ImmV, Predicate BasePredicate> {
8977  let Predicates = [BasePredicate] in {
8978    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
8979               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
8980               (extractelt _.VT:$dst, (iPTR 0))))),
8981              (!cast<Instruction>("V"#OpcPrefix#Zr_Intk)
8982               _.VT:$dst, VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
8983
8984    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
8985               (OpNode (extractelt _.VT:$src2, (iPTR 0))), ZeroFP))),
8986              (!cast<Instruction>("V"#OpcPrefix#Zr_Intkz)
8987               VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
8988  }
8989}
8990
8991defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss,
8992                                v4f32x_info, fp32imm0, 0x01, HasAVX512>;
8993defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss,
8994                                v4f32x_info, fp32imm0, 0x02, HasAVX512>;
8995defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd,
8996                                v2f64x_info, fp64imm0, 0x01, HasAVX512>;
8997defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
8998                                v2f64x_info, fp64imm0, 0x02,  HasAVX512>;
8999
9000
9001//-------------------------------------------------
9002// Integer truncate and extend operations
9003//-------------------------------------------------
9004
9005multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9006                              X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9007                              X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9008  let ExeDomain = DestInfo.ExeDomain in
9009  defm rr  : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
9010                      (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
9011                      (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>,
9012                      EVEX, T8XS, Sched<[sched]>;
9013
9014  let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9015    def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9016               (ins x86memop:$dst, SrcInfo.RC:$src),
9017               OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9018               EVEX, Sched<[sched.Folded]>;
9019
9020    def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9021               (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9022               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9023               EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9024  }//mayStore = 1, hasSideEffects = 0
9025}
9026
9027multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9028                                    X86VectorVTInfo DestInfo,
9029                                    PatFrag truncFrag, PatFrag mtruncFrag,
9030                                    string Name> {
9031
9032  def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9033            (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
9034                                    addr:$dst, SrcInfo.RC:$src)>;
9035
9036  def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
9037                                               (SrcInfo.VT SrcInfo.RC:$src)),
9038            (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
9039                            addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9040}
9041
9042multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9043                        SDNode OpNode256, SDNode OpNode512, X86FoldableSchedWrite sched,
9044                        AVX512VLVectorVTInfo VTSrcInfo,
9045                        X86VectorVTInfo DestInfoZ128,
9046                        X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9047                        X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9048                        X86MemOperand x86memopZ, PatFrag truncFrag,
9049                        PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9050
9051  let Predicates = [HasVLX, prd] in {
9052    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, sched,
9053                             VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9054                avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9055                             truncFrag, mtruncFrag, NAME>, EVEX_V128;
9056
9057    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, sched,
9058                             VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9059                avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9060                             truncFrag, mtruncFrag, NAME>, EVEX_V256;
9061  }
9062  let Predicates = [prd] in
9063    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, sched,
9064                             VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9065                avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9066                             truncFrag, mtruncFrag, NAME>, EVEX_V512;
9067}
9068
9069multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9070                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9071                           PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9072  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, sched,
9073                          avx512vl_i64_info, v16i8x_info, v16i8x_info,
9074                          v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9075                          MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9076}
9077
9078multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9079                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9080                           PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9081  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
9082                          avx512vl_i64_info, v8i16x_info, v8i16x_info,
9083                          v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9084                          MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9085}
9086
9087multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9088                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9089                           PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9090  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
9091                          avx512vl_i64_info, v4i32x_info, v4i32x_info,
9092                          v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9093                          MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9094}
9095
9096multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9097                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9098                           PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9099  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
9100                          avx512vl_i32_info, v16i8x_info, v16i8x_info,
9101                          v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9102                          MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9103}
9104
9105multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9106                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9107                           PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9108  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
9109                          avx512vl_i32_info, v8i16x_info, v8i16x_info,
9110                          v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9111                          MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9112}
9113
9114multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9115                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9116                           PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9117  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9118                          sched, avx512vl_i16_info, v16i8x_info, v16i8x_info,
9119                          v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9120                          MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9121}
9122
9123defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",   trunc, WriteShuffle256,
9124                                  truncstorevi8, masked_truncstorevi8, X86vtrunc>;
9125defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",  X86vtruncs, WriteShuffle256,
9126                                  truncstore_s_vi8, masked_truncstore_s_vi8>;
9127defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, WriteShuffle256,
9128                                  truncstore_us_vi8, masked_truncstore_us_vi8>;
9129
9130defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw",   trunc, WriteShuffle256,
9131                                  truncstorevi16, masked_truncstorevi16, X86vtrunc>;
9132defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, WriteShuffle256,
9133                                  truncstore_s_vi16, masked_truncstore_s_vi16>;
9134defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, WriteShuffle256,
9135                                  truncstore_us_vi16, masked_truncstore_us_vi16>;
9136
9137defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd",   trunc, WriteShuffle256,
9138                                  truncstorevi32, masked_truncstorevi32, X86vtrunc>;
9139defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, WriteShuffle256,
9140                                  truncstore_s_vi32, masked_truncstore_s_vi32>;
9141defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, WriteShuffle256,
9142                                  truncstore_us_vi32, masked_truncstore_us_vi32>;
9143
9144defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, WriteShuffle256,
9145                                  truncstorevi8, masked_truncstorevi8, X86vtrunc>;
9146defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb",   X86vtruncs, WriteShuffle256,
9147                                  truncstore_s_vi8, masked_truncstore_s_vi8>;
9148defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus, WriteShuffle256,
9149                                  truncstore_us_vi8, masked_truncstore_us_vi8>;
9150
9151defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, WriteShuffle256,
9152                                  truncstorevi16, masked_truncstorevi16, X86vtrunc>;
9153defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw",   X86vtruncs, WriteShuffle256,
9154                                  truncstore_s_vi16, masked_truncstore_s_vi16>;
9155defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw",  X86vtruncus, WriteShuffle256,
9156                                  truncstore_us_vi16, masked_truncstore_us_vi16>;
9157
9158defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, WriteShuffle256,
9159                                  truncstorevi8, masked_truncstorevi8, X86vtrunc>;
9160defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb",   X86vtruncs, WriteShuffle256,
9161                                  truncstore_s_vi8, masked_truncstore_s_vi8>;
9162defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb",  X86vtruncus, WriteShuffle256,
9163                                  truncstore_us_vi8, masked_truncstore_us_vi8>;
9164
9165let Predicates = [HasAVX512, NoVLX] in {
9166def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9167         (v8i16 (EXTRACT_SUBREG
9168                 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9169                                          VR256X:$src, sub_ymm)))), sub_xmm))>;
9170def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9171         (v4i32 (EXTRACT_SUBREG
9172                 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9173                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
9174}
9175
9176let Predicates = [HasBWI, NoVLX] in {
9177def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9178         (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9179                                            VR256X:$src, sub_ymm))), sub_xmm))>;
9180}
9181
9182multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9183              X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9184              X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9185  let ExeDomain = DestInfo.ExeDomain in {
9186  defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9187                    (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9188                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9189                  EVEX, Sched<[sched]>;
9190
9191  defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9192                  (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9193                  (DestInfo.VT (LdFrag addr:$src))>,
9194                EVEX, Sched<[sched.Folded]>;
9195  }
9196}
9197
9198multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9199          SDNode OpNode, SDNode InVecNode, string ExtTy,
9200          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9201  let Predicates = [HasVLX, HasBWI] in {
9202    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9203                    v16i8x_info, i64mem, LdFrag, InVecNode>,
9204                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9205
9206    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9207                    v16i8x_info, i128mem, LdFrag, OpNode>,
9208                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9209  }
9210  let Predicates = [HasBWI] in {
9211    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9212                    v32i8x_info, i256mem, LdFrag, OpNode>,
9213                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9214  }
9215}
9216
9217multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9218          SDNode OpNode, SDNode InVecNode, string ExtTy,
9219          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9220  let Predicates = [HasVLX, HasAVX512] in {
9221    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9222                   v16i8x_info, i32mem, LdFrag, InVecNode>,
9223                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9224
9225    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9226                   v16i8x_info, i64mem, LdFrag, OpNode>,
9227                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9228  }
9229  let Predicates = [HasAVX512] in {
9230    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9231                   v16i8x_info, i128mem, LdFrag, OpNode>,
9232                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9233  }
9234}
9235
9236multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9237          SDNode OpNode, SDNode InVecNode, string ExtTy,
9238          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9239  let Predicates = [HasVLX, HasAVX512] in {
9240    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9241                   v16i8x_info, i16mem, LdFrag, InVecNode>,
9242                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9243
9244    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9245                   v16i8x_info, i32mem, LdFrag, OpNode>,
9246                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9247  }
9248  let Predicates = [HasAVX512] in {
9249    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9250                   v16i8x_info, i64mem, LdFrag, OpNode>,
9251                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9252  }
9253}
9254
9255multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9256         SDNode OpNode, SDNode InVecNode, string ExtTy,
9257         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9258  let Predicates = [HasVLX, HasAVX512] in {
9259    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9260                   v8i16x_info, i64mem, LdFrag, InVecNode>,
9261                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9262
9263    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9264                   v8i16x_info, i128mem, LdFrag, OpNode>,
9265                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9266  }
9267  let Predicates = [HasAVX512] in {
9268    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9269                   v16i16x_info, i256mem, LdFrag, OpNode>,
9270                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9271  }
9272}
9273
9274multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9275         SDNode OpNode, SDNode InVecNode, string ExtTy,
9276         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9277  let Predicates = [HasVLX, HasAVX512] in {
9278    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9279                   v8i16x_info, i32mem, LdFrag, InVecNode>,
9280                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9281
9282    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9283                   v8i16x_info, i64mem, LdFrag, OpNode>,
9284                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9285  }
9286  let Predicates = [HasAVX512] in {
9287    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9288                   v8i16x_info, i128mem, LdFrag, OpNode>,
9289                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9290  }
9291}
9292
9293multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9294         SDNode OpNode, SDNode InVecNode, string ExtTy,
9295         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9296
9297  let Predicates = [HasVLX, HasAVX512] in {
9298    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9299                   v4i32x_info, i64mem, LdFrag, InVecNode>,
9300                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9301
9302    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9303                   v4i32x_info, i128mem, LdFrag, OpNode>,
9304                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9305  }
9306  let Predicates = [HasAVX512] in {
9307    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9308                   v8i32x_info, i256mem, LdFrag, OpNode>,
9309                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9310  }
9311}
9312
9313defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", WriteShuffle256>;
9314defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", WriteShuffle256>;
9315defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", WriteShuffle256>;
9316defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", WriteShuffle256>;
9317defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", WriteShuffle256>;
9318defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", WriteShuffle256>;
9319
9320defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", WriteShuffle256>;
9321defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", WriteShuffle256>;
9322defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", WriteShuffle256>;
9323defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", WriteShuffle256>;
9324defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", WriteShuffle256>;
9325defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", WriteShuffle256>;
9326
9327
9328multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9329                                 SDNode InVecOp> {
9330  // 128-bit patterns
9331  let Predicates = [HasVLX, HasBWI] in {
9332  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9333            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9334  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9335            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9336  def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
9337            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9338  def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
9339            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9340  def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
9341            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9342  }
9343  let Predicates = [HasVLX] in {
9344  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9345            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9346  def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
9347            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9348  def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
9349            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9350  def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
9351            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9352
9353  def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9354            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9355  def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
9356            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9357  def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
9358            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9359  def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
9360            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9361
9362  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9363            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9364  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9365            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9366  def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
9367            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9368  def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
9369            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9370  def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
9371            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9372
9373  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9374            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9375  def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
9376            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9377  def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
9378            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9379  def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
9380            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9381
9382  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9383            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9384  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9385            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9386  def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
9387            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9388  def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
9389            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9390  def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
9391            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9392  }
9393  // 256-bit patterns
9394  let Predicates = [HasVLX, HasBWI] in {
9395  def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9396            (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9397  def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
9398            (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9399  def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
9400            (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9401  }
9402  let Predicates = [HasVLX] in {
9403  def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9404            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9405  def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
9406            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9407  def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
9408            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9409  def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9410            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9411
9412  def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9413            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9414  def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
9415            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9416  def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
9417            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9418  def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9419            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9420
9421  def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
9422            (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9423  def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
9424            (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9425  def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
9426            (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9427
9428  def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9429            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9430  def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
9431            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9432  def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
9433            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9434  def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
9435            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9436
9437  def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
9438            (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9439  def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
9440            (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9441  def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
9442            (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9443  }
9444  // 512-bit patterns
9445  let Predicates = [HasBWI] in {
9446  def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
9447            (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9448  }
9449  let Predicates = [HasAVX512] in {
9450  def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9451            (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9452
9453  def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9454            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9455  def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9456            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9457
9458  def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
9459            (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9460
9461  def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
9462            (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9463
9464  def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
9465            (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9466  }
9467}
9468
9469defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec>;
9470defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec>;
9471
9472//===----------------------------------------------------------------------===//
9473// GATHER - SCATTER Operations
9474
9475// FIXME: Improve scheduling of gather/scatter instructions.
9476multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9477                         X86MemOperand memop, PatFrag GatherNode,
9478                         RegisterClass MaskRC = _.KRCWM> {
9479  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9480      ExeDomain = _.ExeDomain in
9481  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9482            (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9483            !strconcat(OpcodeStr#_.Suffix,
9484            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9485            [(set _.RC:$dst, MaskRC:$mask_wb,
9486              (GatherNode  (_.VT _.RC:$src1), MaskRC:$mask,
9487                     vectoraddr:$src2))]>, EVEX, EVEX_K,
9488             EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
9489}
9490
9491multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9492                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9493  defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
9494                                      vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
9495  defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
9496                                      vz512mem,  mgatherv8i64>, EVEX_V512, VEX_W;
9497let Predicates = [HasVLX] in {
9498  defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9499                              vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
9500  defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
9501                              vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
9502  defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9503                              vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
9504  defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9505                              vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
9506}
9507}
9508
9509multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9510                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9511  defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
9512                                       mgatherv16i32>, EVEX_V512;
9513  defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
9514                                       mgatherv8i64>, EVEX_V512;
9515let Predicates = [HasVLX] in {
9516  defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9517                                          vy256xmem, mgatherv8i32>, EVEX_V256;
9518  defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9519                                          vy128xmem, mgatherv4i64>, EVEX_V256;
9520  defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9521                                          vx128xmem, mgatherv4i32>, EVEX_V128;
9522  defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9523                                          vx64xmem, mgatherv2i64, VK2WM>,
9524                                          EVEX_V128;
9525}
9526}
9527
9528
9529defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9530               avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9531
9532defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9533                avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9534
9535multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9536                          X86MemOperand memop, PatFrag ScatterNode,
9537                          RegisterClass MaskRC = _.KRCWM> {
9538
9539let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
9540
9541  def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9542            (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9543            !strconcat(OpcodeStr#_.Suffix,
9544            "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9545            [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
9546                                    MaskRC:$mask,  vectoraddr:$dst))]>,
9547            EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9548            Sched<[WriteStore]>;
9549}
9550
9551multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9552                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9553  defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
9554                                      vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
9555  defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
9556                                      vz512mem,  mscatterv8i64>, EVEX_V512, VEX_W;
9557let Predicates = [HasVLX] in {
9558  defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9559                              vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
9560  defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
9561                              vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
9562  defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9563                              vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
9564  defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9565                              vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
9566}
9567}
9568
9569multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9570                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9571  defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
9572                                       mscatterv16i32>, EVEX_V512;
9573  defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
9574                                       mscatterv8i64>, EVEX_V512;
9575let Predicates = [HasVLX] in {
9576  defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9577                                          vy256xmem, mscatterv8i32>, EVEX_V256;
9578  defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9579                                          vy128xmem, mscatterv4i64>, EVEX_V256;
9580  defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9581                                          vx128xmem, mscatterv4i32>, EVEX_V128;
9582  defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9583                                          vx64xmem, mscatterv2i64, VK2WM>,
9584                                          EVEX_V128;
9585}
9586}
9587
9588defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9589               avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9590
9591defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9592                avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9593
9594// prefetch
9595multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9596                       RegisterClass KRC, X86MemOperand memop> {
9597  let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
9598  def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9599            !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9600            EVEX, EVEX_K, Sched<[WriteLoad]>;
9601}
9602
9603defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9604                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9605
9606defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9607                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9608
9609defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9610                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9611
9612defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9613                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9614
9615defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9616                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9617
9618defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9619                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9620
9621defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9622                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9623
9624defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9625                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9626
9627defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9628                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9629
9630defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9631                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9632
9633defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9634                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9635
9636defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9637                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9638
9639defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9640                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9641
9642defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9643                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9644
9645defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9646                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9647
9648defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9649                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9650
9651multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9652def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9653                  !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9654                  [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9655                  EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9656}
9657
9658multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9659                                 string OpcodeStr, Predicate prd> {
9660let Predicates = [prd] in
9661  defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9662
9663  let Predicates = [prd, HasVLX] in {
9664    defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9665    defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9666  }
9667}
9668
9669defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9670defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9671defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9672defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9673
9674multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9675    def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9676                        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9677                        [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9678                        EVEX, Sched<[WriteMove]>;
9679}
9680
9681// Use 512bit version to implement 128/256 bit in case NoVLX.
9682multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9683                                           X86VectorVTInfo _,
9684                                           string Name> {
9685
9686  def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9687            (_.KVT (COPY_TO_REGCLASS
9688                     (!cast<Instruction>(Name#"Zrr")
9689                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9690                                      _.RC:$src, _.SubRegIdx)),
9691                   _.KRC))>;
9692}
9693
9694multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9695                                   AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9696  let Predicates = [prd] in
9697    defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9698                                            EVEX_V512;
9699
9700  let Predicates = [prd, HasVLX] in {
9701    defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9702                                              EVEX_V256;
9703    defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9704                                               EVEX_V128;
9705  }
9706  let Predicates = [prd, NoVLX] in {
9707    defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9708    defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9709  }
9710}
9711
9712defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9713                                              avx512vl_i8_info, HasBWI>;
9714defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9715                                              avx512vl_i16_info, HasBWI>, VEX_W;
9716defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9717                                              avx512vl_i32_info, HasDQI>;
9718defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9719                                              avx512vl_i64_info, HasDQI>, VEX_W;
9720
9721// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9722// is available, but BWI is not. We can't handle this in lowering because
9723// a target independent DAG combine likes to combine sext and trunc.
9724let Predicates = [HasDQI, NoBWI] in {
9725  def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9726            (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9727  def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9728            (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9729}
9730
9731//===----------------------------------------------------------------------===//
9732// AVX-512 - COMPRESS and EXPAND
9733//
9734
9735multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9736                                 string OpcodeStr, X86FoldableSchedWrite sched> {
9737  defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9738              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9739              (_.VT (X86compress _.RC:$src1))>, AVX5128IBase,
9740              Sched<[sched]>;
9741
9742  let mayStore = 1, hasSideEffects = 0 in
9743  def mr : AVX5128I<opc, MRMDestMem, (outs),
9744              (ins _.MemOp:$dst, _.RC:$src),
9745              OpcodeStr # "\t{$src, $dst|$dst, $src}",
9746              []>, EVEX_CD8<_.EltSize, CD8VT1>,
9747              Sched<[sched.Folded]>;
9748
9749  def mrk : AVX5128I<opc, MRMDestMem, (outs),
9750              (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
9751              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9752              []>,
9753              EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9754              Sched<[sched.Folded]>;
9755}
9756
9757multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9758  def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
9759                                               (_.VT _.RC:$src)),
9760            (!cast<Instruction>(Name#_.ZSuffix##mrk)
9761                            addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
9762}
9763
9764multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
9765                                 X86FoldableSchedWrite sched,
9766                                 AVX512VLVectorVTInfo VTInfo,
9767                                 Predicate Pred = HasAVX512> {
9768  let Predicates = [Pred] in
9769  defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
9770           compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9771
9772  let Predicates = [Pred, HasVLX] in {
9773    defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
9774                compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9775    defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
9776                compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9777  }
9778}
9779
9780// FIXME: Is there a better scheduler class for VPCOMPRESS?
9781defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
9782                                          avx512vl_i32_info>, EVEX, NotMemoryFoldable;
9783defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
9784                                          avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
9785defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
9786                                          avx512vl_f32_info>, EVEX, NotMemoryFoldable;
9787defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
9788                                          avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
9789
9790// expand
9791multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
9792                                 string OpcodeStr, X86FoldableSchedWrite sched> {
9793  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9794              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9795              (_.VT (X86expand _.RC:$src1))>, AVX5128IBase,
9796              Sched<[sched]>;
9797
9798  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9799              (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
9800              (_.VT (X86expand (_.VT (bitconvert
9801                                      (_.LdFrag addr:$src1)))))>,
9802            AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
9803            Sched<[sched.Folded, ReadAfterLd]>;
9804}
9805
9806multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9807
9808  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
9809            (!cast<Instruction>(Name#_.ZSuffix##rmkz)
9810                                        _.KRCWM:$mask, addr:$src)>;
9811
9812  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
9813            (!cast<Instruction>(Name#_.ZSuffix##rmkz)
9814                                        _.KRCWM:$mask, addr:$src)>;
9815
9816  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
9817                                               (_.VT _.RC:$src0))),
9818            (!cast<Instruction>(Name#_.ZSuffix##rmk)
9819                            _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
9820}
9821
9822multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
9823                               X86FoldableSchedWrite sched,
9824                               AVX512VLVectorVTInfo VTInfo,
9825                               Predicate Pred = HasAVX512> {
9826  let Predicates = [Pred] in
9827  defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
9828           expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9829
9830  let Predicates = [Pred, HasVLX] in {
9831    defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
9832                expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9833    defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
9834                expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9835  }
9836}
9837
9838// FIXME: Is there a better scheduler class for VPEXPAND?
9839defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
9840                                      avx512vl_i32_info>, EVEX;
9841defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
9842                                      avx512vl_i64_info>, EVEX, VEX_W;
9843defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
9844                                      avx512vl_f32_info>, EVEX;
9845defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
9846                                      avx512vl_f64_info>, EVEX, VEX_W;
9847
9848//handle instruction  reg_vec1 = op(reg_vec,imm)
9849//                               op(mem_vec,imm)
9850//                               op(broadcast(eltVt),imm)
9851//all instruction created with FROUND_CURRENT
9852multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9853                                      X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9854  let ExeDomain = _.ExeDomain in {
9855  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9856                      (ins _.RC:$src1, i32u8imm:$src2),
9857                      OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
9858                      (OpNode (_.VT _.RC:$src1),
9859                              (i32 imm:$src2))>, Sched<[sched]>;
9860  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9861                    (ins _.MemOp:$src1, i32u8imm:$src2),
9862                    OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
9863                    (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
9864                            (i32 imm:$src2))>,
9865                    Sched<[sched.Folded, ReadAfterLd]>;
9866  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9867                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
9868                    OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
9869                    "${src1}"##_.BroadcastStr##", $src2",
9870                    (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
9871                            (i32 imm:$src2))>, EVEX_B,
9872                    Sched<[sched.Folded, ReadAfterLd]>;
9873  }
9874}
9875
9876//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9877multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
9878                                          SDNode OpNode, X86FoldableSchedWrite sched,
9879                                          X86VectorVTInfo _> {
9880  let ExeDomain = _.ExeDomain in
9881  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9882                      (ins _.RC:$src1, i32u8imm:$src2),
9883                      OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
9884                      "$src1, {sae}, $src2",
9885                      (OpNode (_.VT _.RC:$src1),
9886                              (i32 imm:$src2),
9887                              (i32 FROUND_NO_EXC))>,
9888                      EVEX_B, Sched<[sched]>;
9889}
9890
9891multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
9892            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
9893            SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
9894  let Predicates = [prd] in {
9895    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
9896                                           _.info512>,
9897                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd,
9898                                               sched.ZMM, _.info512>, EVEX_V512;
9899  }
9900  let Predicates = [prd, HasVLX] in {
9901    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
9902                                           _.info128>, EVEX_V128;
9903    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
9904                                           _.info256>, EVEX_V256;
9905  }
9906}
9907
9908//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
9909//                               op(reg_vec2,mem_vec,imm)
9910//                               op(reg_vec2,broadcast(eltVt),imm)
9911//all instruction created with FROUND_CURRENT
9912multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9913                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9914  let ExeDomain = _.ExeDomain in {
9915  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9916                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9917                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9918                      (OpNode (_.VT _.RC:$src1),
9919                              (_.VT _.RC:$src2),
9920                              (i32 imm:$src3))>,
9921                      Sched<[sched]>;
9922  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9923                    (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
9924                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9925                    (OpNode (_.VT _.RC:$src1),
9926                            (_.VT (bitconvert (_.LdFrag addr:$src2))),
9927                            (i32 imm:$src3))>,
9928                    Sched<[sched.Folded, ReadAfterLd]>;
9929  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9930                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9931                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9932                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
9933                    (OpNode (_.VT _.RC:$src1),
9934                            (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
9935                            (i32 imm:$src3))>, EVEX_B,
9936                    Sched<[sched.Folded, ReadAfterLd]>;
9937  }
9938}
9939
9940//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
9941//                               op(reg_vec2,mem_vec,imm)
9942multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
9943                              X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
9944                              X86VectorVTInfo SrcInfo>{
9945  let ExeDomain = DestInfo.ExeDomain in {
9946  defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9947                  (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
9948                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9949                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9950                               (SrcInfo.VT SrcInfo.RC:$src2),
9951                               (i8 imm:$src3)))>,
9952                  Sched<[sched]>;
9953  defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9954                (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
9955                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9956                (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9957                             (SrcInfo.VT (bitconvert
9958                                                (SrcInfo.LdFrag addr:$src2))),
9959                             (i8 imm:$src3)))>,
9960                Sched<[sched.Folded, ReadAfterLd]>;
9961  }
9962}
9963
9964//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
9965//                               op(reg_vec2,mem_vec,imm)
9966//                               op(reg_vec2,broadcast(eltVt),imm)
9967multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
9968                           X86FoldableSchedWrite sched, X86VectorVTInfo _>:
9969  avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
9970
9971  let ExeDomain = _.ExeDomain in
9972  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9973                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9974                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9975                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
9976                    (OpNode (_.VT _.RC:$src1),
9977                            (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
9978                            (i8 imm:$src3))>, EVEX_B,
9979                    Sched<[sched.Folded, ReadAfterLd]>;
9980}
9981
9982//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
9983//                                      op(reg_vec2,mem_scalar,imm)
9984multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9985                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9986  let ExeDomain = _.ExeDomain in {
9987  defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9988                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9989                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9990                      (OpNode (_.VT _.RC:$src1),
9991                              (_.VT _.RC:$src2),
9992                              (i32 imm:$src3))>,
9993                      Sched<[sched]>;
9994  defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9995                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9996                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9997                    (OpNode (_.VT _.RC:$src1),
9998                            (_.VT (scalar_to_vector
9999                                      (_.ScalarLdFrag addr:$src2))),
10000                            (i32 imm:$src3))>,
10001                    Sched<[sched.Folded, ReadAfterLd]>;
10002  }
10003}
10004
10005//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10006multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10007                                    SDNode OpNode, X86FoldableSchedWrite sched,
10008                                    X86VectorVTInfo _> {
10009  let ExeDomain = _.ExeDomain in
10010  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10011                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10012                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10013                      "$src1, $src2, {sae}, $src3",
10014                      (OpNode (_.VT _.RC:$src1),
10015                              (_.VT _.RC:$src2),
10016                              (i32 imm:$src3),
10017                              (i32 FROUND_NO_EXC))>,
10018                      EVEX_B, Sched<[sched]>;
10019}
10020
10021//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10022multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10023                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10024  let ExeDomain = _.ExeDomain in
10025  defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10026                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10027                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10028                      "$src1, $src2, {sae}, $src3",
10029                      (OpNode (_.VT _.RC:$src1),
10030                              (_.VT _.RC:$src2),
10031                              (i32 imm:$src3),
10032                              (i32 FROUND_NO_EXC))>,
10033                      EVEX_B, Sched<[sched]>;
10034}
10035
10036multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10037            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10038            SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
10039  let Predicates = [prd] in {
10040    defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10041                avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, sched.ZMM, _.info512>,
10042                                  EVEX_V512;
10043
10044  }
10045  let Predicates = [prd, HasVLX] in {
10046    defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10047                                  EVEX_V128;
10048    defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10049                                  EVEX_V256;
10050  }
10051}
10052
10053multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10054                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10055                   AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10056  let Predicates = [Pred] in {
10057    defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10058                           SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10059  }
10060  let Predicates = [Pred, HasVLX] in {
10061    defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10062                           SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10063    defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10064                           SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10065  }
10066}
10067
10068multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10069                                  bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10070                                  Predicate Pred = HasAVX512> {
10071  let Predicates = [Pred] in {
10072    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10073                                EVEX_V512;
10074  }
10075  let Predicates = [Pred, HasVLX] in {
10076    defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10077                                EVEX_V128;
10078    defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10079                                EVEX_V256;
10080  }
10081}
10082
10083multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10084                  X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10085                  SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd> {
10086  let Predicates = [prd] in {
10087     defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10088              avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, sched.XMM, _>;
10089  }
10090}
10091
10092multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10093                    bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10094                    SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
10095  defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10096                            opcPs, OpNode, OpNodeRnd, sched, prd>,
10097                            EVEX_CD8<32, CD8VF>;
10098  defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10099                            opcPd, OpNode, OpNodeRnd, sched, prd>,
10100                            EVEX_CD8<64, CD8VF>, VEX_W;
10101}
10102
10103defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10104                              X86VReduce, X86VReduceRnd, SchedWriteFRnd, HasDQI>,
10105                              AVX512AIi8Base, EVEX;
10106defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10107                              X86VRndScale, X86VRndScaleRnd, SchedWriteFRnd, HasAVX512>,
10108                              AVX512AIi8Base, EVEX;
10109defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10110                              X86VGetMant, X86VGetMantRnd, SchedWriteFRnd, HasAVX512>,
10111                              AVX512AIi8Base, EVEX;
10112
10113defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10114                                                0x50, X86VRange, X86VRangeRnd,
10115                                                SchedWriteFAdd, HasDQI>,
10116      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10117defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10118                                                0x50, X86VRange, X86VRangeRnd,
10119                                                SchedWriteFAdd, HasDQI>,
10120      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10121
10122defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10123      f64x_info, 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
10124      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10125defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10126      0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
10127      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10128
10129defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10130      0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
10131      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10132defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10133      0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
10134      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10135
10136defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10137      0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
10138      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10139defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10140      0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
10141      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10142
10143
10144multiclass AVX512_rndscale_lowering<X86VectorVTInfo _, string Suffix> {
10145  // Register
10146  def : Pat<(_.VT (ffloor _.RC:$src)),
10147            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10148             _.RC:$src, (i32 0x9))>;
10149  def : Pat<(_.VT (fnearbyint _.RC:$src)),
10150            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10151             _.RC:$src, (i32 0xC))>;
10152  def : Pat<(_.VT (fceil _.RC:$src)),
10153            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10154             _.RC:$src, (i32 0xA))>;
10155  def : Pat<(_.VT (frint _.RC:$src)),
10156            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10157             _.RC:$src, (i32 0x4))>;
10158  def : Pat<(_.VT (ftrunc _.RC:$src)),
10159            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10160             _.RC:$src, (i32 0xB))>;
10161
10162  // Merge-masking
10163  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src), _.RC:$dst)),
10164            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10165             _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
10166  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src), _.RC:$dst)),
10167            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10168             _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
10169  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src), _.RC:$dst)),
10170            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10171             _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
10172  def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src), _.RC:$dst)),
10173            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10174             _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
10175  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src), _.RC:$dst)),
10176            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10177             _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xB))>;
10178
10179  // Zero-masking
10180  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src),
10181                           _.ImmAllZerosV)),
10182            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10183             _.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
10184  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src),
10185                           _.ImmAllZerosV)),
10186            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10187             _.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
10188  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src),
10189                           _.ImmAllZerosV)),
10190            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10191             _.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
10192  def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src),
10193                           _.ImmAllZerosV)),
10194            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10195             _.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
10196  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src),
10197                           _.ImmAllZerosV)),
10198            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10199             _.KRCWM:$mask, _.RC:$src, (i32 0xB))>;
10200
10201  // Load
10202  def : Pat<(_.VT (ffloor (_.LdFrag addr:$src))),
10203            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10204             addr:$src, (i32 0x9))>;
10205  def : Pat<(_.VT (fnearbyint (_.LdFrag addr:$src))),
10206            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10207             addr:$src, (i32 0xC))>;
10208  def : Pat<(_.VT (fceil (_.LdFrag addr:$src))),
10209            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10210             addr:$src, (i32 0xA))>;
10211  def : Pat<(_.VT (frint (_.LdFrag addr:$src))),
10212            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10213             addr:$src, (i32 0x4))>;
10214  def : Pat<(_.VT (ftrunc (_.LdFrag addr:$src))),
10215            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10216             addr:$src, (i32 0xB))>;
10217
10218  // Merge-masking + load
10219  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
10220                           _.RC:$dst)),
10221            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10222             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
10223  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
10224                           _.RC:$dst)),
10225            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10226             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
10227  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
10228                           _.RC:$dst)),
10229            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10230             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
10231  def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
10232                           _.RC:$dst)),
10233            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10234             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
10235  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
10236                           _.RC:$dst)),
10237            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10238             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;
10239
10240  // Zero-masking + load
10241  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
10242                           _.ImmAllZerosV)),
10243            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
10244             _.KRCWM:$mask, addr:$src, (i32 0x9))>;
10245  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
10246                           _.ImmAllZerosV)),
10247            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
10248             _.KRCWM:$mask, addr:$src, (i32 0xC))>;
10249  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
10250                           _.ImmAllZerosV)),
10251            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
10252             _.KRCWM:$mask, addr:$src, (i32 0xA))>;
10253  def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
10254                           _.ImmAllZerosV)),
10255            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
10256             _.KRCWM:$mask, addr:$src, (i32 0x4))>;
10257  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
10258                           _.ImmAllZerosV)),
10259            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
10260             _.KRCWM:$mask, addr:$src, (i32 0xB))>;
10261
10262  // Broadcast load
10263  def : Pat<(_.VT (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
10264            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
10265             addr:$src, (i32 0x9))>;
10266  def : Pat<(_.VT (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
10267            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
10268             addr:$src, (i32 0xC))>;
10269  def : Pat<(_.VT (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
10270            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
10271             addr:$src, (i32 0xA))>;
10272  def : Pat<(_.VT (frint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
10273            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
10274             addr:$src, (i32 0x4))>;
10275  def : Pat<(_.VT (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
10276            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
10277             addr:$src, (i32 0xB))>;
10278
10279  // Merge-masking + broadcast load
10280  def : Pat<(_.VT (vselect _.KRCWM:$mask,
10281                           (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10282                           _.RC:$dst)),
10283            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
10284             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
10285  def : Pat<(_.VT (vselect _.KRCWM:$mask,
10286                           (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10287                           _.RC:$dst)),
10288            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
10289             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
10290  def : Pat<(_.VT (vselect _.KRCWM:$mask,
10291                           (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10292                           _.RC:$dst)),
10293            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
10294             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
10295  def : Pat<(_.VT (vselect _.KRCWM:$mask,
10296                           (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10297                           _.RC:$dst)),
10298            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
10299             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
10300  def : Pat<(_.VT (vselect _.KRCWM:$mask,
10301                           (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10302                           _.RC:$dst)),
10303            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
10304             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;
10305
10306  // Zero-masking + broadcast load
10307  def : Pat<(_.VT (vselect _.KRCWM:$mask,
10308                           (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10309                           _.ImmAllZerosV)),
10310            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
10311             _.KRCWM:$mask, addr:$src, (i32 0x9))>;
10312  def : Pat<(_.VT (vselect _.KRCWM:$mask,
10313                           (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10314                           _.ImmAllZerosV)),
10315            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
10316             _.KRCWM:$mask, addr:$src, (i32 0xC))>;
10317  def : Pat<(_.VT (vselect _.KRCWM:$mask,
10318                           (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10319                           _.ImmAllZerosV)),
10320            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
10321             _.KRCWM:$mask, addr:$src, (i32 0xA))>;
10322  def : Pat<(_.VT (vselect _.KRCWM:$mask,
10323                           (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10324                           _.ImmAllZerosV)),
10325            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
10326             _.KRCWM:$mask, addr:$src, (i32 0x4))>;
10327  def : Pat<(_.VT (vselect _.KRCWM:$mask,
10328                           (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10329                           _.ImmAllZerosV)),
10330            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
10331             _.KRCWM:$mask, addr:$src, (i32 0xB))>;
10332}
10333
10334let Predicates = [HasAVX512] in {
10335  defm : AVX512_rndscale_lowering<v16f32_info, "PS">;
10336  defm : AVX512_rndscale_lowering<v8f64_info,  "PD">;
10337}
10338
10339let Predicates = [HasVLX] in {
10340  defm : AVX512_rndscale_lowering<v8f32x_info, "PS">;
10341  defm : AVX512_rndscale_lowering<v4f64x_info, "PD">;
10342  defm : AVX512_rndscale_lowering<v4f32x_info, "PS">;
10343  defm : AVX512_rndscale_lowering<v2f64x_info, "PD">;
10344}
10345
10346multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10347                                          X86FoldableSchedWrite sched,
10348                                          X86VectorVTInfo _,
10349                                          X86VectorVTInfo CastInfo,
10350                                          string EVEX2VEXOvrd> {
10351  let ExeDomain = _.ExeDomain in {
10352  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10353                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10354                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10355                  (_.VT (bitconvert
10356                         (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10357                                                  (i8 imm:$src3)))))>,
10358                  Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10359  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10360                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10361                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10362                (_.VT
10363                 (bitconvert
10364                  (CastInfo.VT (X86Shuf128 _.RC:$src1,
10365                                           (bitconvert (_.LdFrag addr:$src2)),
10366                                           (i8 imm:$src3)))))>,
10367                Sched<[sched.Folded, ReadAfterLd]>,
10368                EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10369  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10370                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10371                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10372                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
10373                    (_.VT
10374                     (bitconvert
10375                      (CastInfo.VT
10376                       (X86Shuf128 _.RC:$src1,
10377                                   (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
10378                                   (i8 imm:$src3)))))>, EVEX_B,
10379                    Sched<[sched.Folded, ReadAfterLd]>;
10380  }
10381}
10382
10383multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10384                                   AVX512VLVectorVTInfo _,
10385                                   AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10386                                   string EVEX2VEXOvrd>{
10387  let Predicates = [HasAVX512] in
10388  defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10389                                          _.info512, CastInfo.info512, "">, EVEX_V512;
10390
10391  let Predicates = [HasAVX512, HasVLX] in
10392  defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10393                                             _.info256, CastInfo.info256,
10394                                             EVEX2VEXOvrd>, EVEX_V256;
10395}
10396
10397defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10398      avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10399defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10400      avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10401defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10402      avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10403defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10404      avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10405
10406let Predicates = [HasAVX512] in {
10407// Provide fallback in case the load node that is used in the broadcast
10408// patterns above is used by additional users, which prevents the pattern
10409// selection.
10410def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
10411          (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10412                          (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10413                          0)>;
10414def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
10415          (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10416                          (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10417                          0)>;
10418
10419def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
10420          (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10421                          (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10422                          0)>;
10423def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
10424          (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10425                          (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10426                          0)>;
10427
10428def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
10429          (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10430                          (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10431                          0)>;
10432
10433def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
10434          (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10435                          (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10436                          0)>;
10437}
10438
10439multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10440                         X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10441  // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10442  // instantiation of this class.
10443  let ExeDomain = _.ExeDomain in {
10444  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10445                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10446                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10447                  (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))>,
10448                  Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10449  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10450                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10451                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10452                (_.VT (X86VAlign _.RC:$src1,
10453                                 (bitconvert (_.LdFrag addr:$src2)),
10454                                 (i8 imm:$src3)))>,
10455                Sched<[sched.Folded, ReadAfterLd]>,
10456                EVEX2VEXOverride<"VPALIGNRrmi">;
10457
10458  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10459                   (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10460                   OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10461                   "$src1, ${src2}"##_.BroadcastStr##", $src3",
10462                   (X86VAlign _.RC:$src1,
10463                              (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10464                              (i8 imm:$src3))>, EVEX_B,
10465                   Sched<[sched.Folded, ReadAfterLd]>;
10466  }
10467}
10468
10469multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10470                                AVX512VLVectorVTInfo _> {
10471  let Predicates = [HasAVX512] in {
10472    defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10473                                AVX512AIi8Base, EVEX_4V, EVEX_V512;
10474  }
10475  let Predicates = [HasAVX512, HasVLX] in {
10476    defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10477                                AVX512AIi8Base, EVEX_4V, EVEX_V128;
10478    // We can't really override the 256-bit version so change it back to unset.
10479    let EVEX2VEXOverride = ? in
10480    defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10481                                AVX512AIi8Base, EVEX_4V, EVEX_V256;
10482  }
10483}
10484
10485defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10486                                   avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10487defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10488                                   avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10489                                   VEX_W;
10490
10491defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10492                                         SchedWriteShuffle, avx512vl_i8_info,
10493                                         avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10494
10495// Fragments to help convert valignq into masked valignd. Or valignq/valignd
10496// into vpalignr.
10497def ValignqImm32XForm : SDNodeXForm<imm, [{
10498  return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10499}]>;
10500def ValignqImm8XForm : SDNodeXForm<imm, [{
10501  return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10502}]>;
10503def ValigndImm8XForm : SDNodeXForm<imm, [{
10504  return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10505}]>;
10506
10507multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10508                                        X86VectorVTInfo From, X86VectorVTInfo To,
10509                                        SDNodeXForm ImmXForm> {
10510  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10511                            (bitconvert
10512                             (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10513                                              imm:$src3))),
10514                            To.RC:$src0)),
10515            (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10516                                                  To.RC:$src1, To.RC:$src2,
10517                                                  (ImmXForm imm:$src3))>;
10518
10519  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10520                            (bitconvert
10521                             (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10522                                              imm:$src3))),
10523                            To.ImmAllZerosV)),
10524            (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10525                                                   To.RC:$src1, To.RC:$src2,
10526                                                   (ImmXForm imm:$src3))>;
10527
10528  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10529                            (bitconvert
10530                             (From.VT (OpNode From.RC:$src1,
10531                                      (bitconvert (To.LdFrag addr:$src2)),
10532                                      imm:$src3))),
10533                            To.RC:$src0)),
10534            (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10535                                                  To.RC:$src1, addr:$src2,
10536                                                  (ImmXForm imm:$src3))>;
10537
10538  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10539                            (bitconvert
10540                             (From.VT (OpNode From.RC:$src1,
10541                                      (bitconvert (To.LdFrag addr:$src2)),
10542                                      imm:$src3))),
10543                            To.ImmAllZerosV)),
10544            (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10545                                                   To.RC:$src1, addr:$src2,
10546                                                   (ImmXForm imm:$src3))>;
10547}
10548
10549multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10550                                           X86VectorVTInfo From,
10551                                           X86VectorVTInfo To,
10552                                           SDNodeXForm ImmXForm> :
10553      avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10554  def : Pat<(From.VT (OpNode From.RC:$src1,
10555                             (bitconvert (To.VT (X86VBroadcast
10556                                                (To.ScalarLdFrag addr:$src2)))),
10557                             imm:$src3)),
10558            (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10559                                                  (ImmXForm imm:$src3))>;
10560
10561  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10562                            (bitconvert
10563                             (From.VT (OpNode From.RC:$src1,
10564                                      (bitconvert
10565                                       (To.VT (X86VBroadcast
10566                                               (To.ScalarLdFrag addr:$src2)))),
10567                                      imm:$src3))),
10568                            To.RC:$src0)),
10569            (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10570                                                   To.RC:$src1, addr:$src2,
10571                                                   (ImmXForm imm:$src3))>;
10572
10573  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10574                            (bitconvert
10575                             (From.VT (OpNode From.RC:$src1,
10576                                      (bitconvert
10577                                       (To.VT (X86VBroadcast
10578                                               (To.ScalarLdFrag addr:$src2)))),
10579                                      imm:$src3))),
10580                            To.ImmAllZerosV)),
10581            (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10582                                                    To.RC:$src1, addr:$src2,
10583                                                    (ImmXForm imm:$src3))>;
10584}
10585
10586let Predicates = [HasAVX512] in {
10587  // For 512-bit we lower to the widest element type we can. So we only need
10588  // to handle converting valignq to valignd.
10589  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10590                                         v16i32_info, ValignqImm32XForm>;
10591}
10592
10593let Predicates = [HasVLX] in {
10594  // For 128-bit we lower to the widest element type we can. So we only need
10595  // to handle converting valignq to valignd.
10596  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10597                                         v4i32x_info, ValignqImm32XForm>;
10598  // For 256-bit we lower to the widest element type we can. So we only need
10599  // to handle converting valignq to valignd.
10600  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10601                                         v8i32x_info, ValignqImm32XForm>;
10602}
10603
10604let Predicates = [HasVLX, HasBWI] in {
10605  // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10606  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10607                                      v16i8x_info, ValignqImm8XForm>;
10608  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10609                                      v16i8x_info, ValigndImm8XForm>;
10610}
10611
10612defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10613                SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10614                EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10615
10616multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10617                           X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10618  let ExeDomain = _.ExeDomain in {
10619  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10620                    (ins _.RC:$src1), OpcodeStr,
10621                    "$src1", "$src1",
10622                    (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase,
10623                    Sched<[sched]>;
10624
10625  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10626                  (ins _.MemOp:$src1), OpcodeStr,
10627                  "$src1", "$src1",
10628                  (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
10629            EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10630            Sched<[sched.Folded]>;
10631  }
10632}
10633
10634multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10635                            X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10636           avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10637  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10638                  (ins _.ScalarMemOp:$src1), OpcodeStr,
10639                  "${src1}"##_.BroadcastStr,
10640                  "${src1}"##_.BroadcastStr,
10641                  (_.VT (OpNode (X86VBroadcast
10642                                    (_.ScalarLdFrag addr:$src1))))>,
10643             EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10644             Sched<[sched.Folded]>;
10645}
10646
10647multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10648                              X86SchedWriteWidths sched,
10649                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10650  let Predicates = [prd] in
10651    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10652                             EVEX_V512;
10653
10654  let Predicates = [prd, HasVLX] in {
10655    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10656                              EVEX_V256;
10657    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10658                              EVEX_V128;
10659  }
10660}
10661
10662multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10663                               X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10664                               Predicate prd> {
10665  let Predicates = [prd] in
10666    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10667                              EVEX_V512;
10668
10669  let Predicates = [prd, HasVLX] in {
10670    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10671                                 EVEX_V256;
10672    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10673                                 EVEX_V128;
10674  }
10675}
10676
10677multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10678                                 SDNode OpNode, X86SchedWriteWidths sched,
10679                                 Predicate prd> {
10680  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10681                               avx512vl_i64_info, prd>, VEX_W;
10682  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10683                               avx512vl_i32_info, prd>;
10684}
10685
10686multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10687                                 SDNode OpNode, X86SchedWriteWidths sched,
10688                                 Predicate prd> {
10689  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10690                              avx512vl_i16_info, prd>, VEX_WIG;
10691  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10692                              avx512vl_i8_info, prd>, VEX_WIG;
10693}
10694
10695multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10696                                  bits<8> opc_d, bits<8> opc_q,
10697                                  string OpcodeStr, SDNode OpNode,
10698                                  X86SchedWriteWidths sched> {
10699  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10700                                    HasAVX512>,
10701              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10702                                    HasBWI>;
10703}
10704
10705defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10706                                    SchedWriteVecALU>;
10707
10708// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10709let Predicates = [HasAVX512, NoVLX] in {
10710  def : Pat<(v4i64 (abs VR256X:$src)),
10711            (EXTRACT_SUBREG
10712                (VPABSQZrr
10713                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10714             sub_ymm)>;
10715  def : Pat<(v2i64 (abs VR128X:$src)),
10716            (EXTRACT_SUBREG
10717                (VPABSQZrr
10718                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10719             sub_xmm)>;
10720}
10721
10722// Use 512bit version to implement 128/256 bit.
10723multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10724                                 AVX512VLVectorVTInfo _, Predicate prd> {
10725  let Predicates = [prd, NoVLX] in {
10726    def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
10727              (EXTRACT_SUBREG
10728                (!cast<Instruction>(InstrStr # "Zrr")
10729                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10730                                 _.info256.RC:$src1,
10731                                 _.info256.SubRegIdx)),
10732              _.info256.SubRegIdx)>;
10733
10734    def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
10735              (EXTRACT_SUBREG
10736                (!cast<Instruction>(InstrStr # "Zrr")
10737                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10738                                 _.info128.RC:$src1,
10739                                 _.info128.SubRegIdx)),
10740              _.info128.SubRegIdx)>;
10741  }
10742}
10743
10744defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10745                                        SchedWriteVecIMul, HasCDI>;
10746
10747// FIXME: Is there a better scheduler class for VPCONFLICT?
10748defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10749                                        SchedWriteVecALU, HasCDI>;
10750
10751// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10752defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10753defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10754
10755//===---------------------------------------------------------------------===//
10756// Counts number of ones - VPOPCNTD and VPOPCNTQ
10757//===---------------------------------------------------------------------===//
10758
10759// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10760defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10761                                     SchedWriteVecALU, HasVPOPCNTDQ>;
10762
10763defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10764defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10765
10766//===---------------------------------------------------------------------===//
10767// Replicate Single FP - MOVSHDUP and MOVSLDUP
10768//===---------------------------------------------------------------------===//
10769
10770multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10771                            X86SchedWriteWidths sched> {
10772  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10773                                      avx512vl_f32_info, HasAVX512>, XS;
10774}
10775
10776defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10777                                  SchedWriteFShuffle>;
10778defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10779                                  SchedWriteFShuffle>;
10780
10781//===----------------------------------------------------------------------===//
10782// AVX-512 - MOVDDUP
10783//===----------------------------------------------------------------------===//
10784
10785multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
10786                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10787  let ExeDomain = _.ExeDomain in {
10788  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10789                   (ins _.RC:$src), OpcodeStr, "$src", "$src",
10790                   (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX,
10791                   Sched<[sched]>;
10792  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10793                 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10794                 (_.VT (OpNode (_.VT (scalar_to_vector
10795                                       (_.ScalarLdFrag addr:$src)))))>,
10796                 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10797                 Sched<[sched.Folded]>;
10798  }
10799}
10800
10801multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10802                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10803  defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10804                           VTInfo.info512>, EVEX_V512;
10805
10806  let Predicates = [HasAVX512, HasVLX] in {
10807    defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10808                                VTInfo.info256>, EVEX_V256;
10809    defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, sched.XMM,
10810                                   VTInfo.info128>, EVEX_V128;
10811  }
10812}
10813
10814multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10815                          X86SchedWriteWidths sched> {
10816  defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10817                                        avx512vl_f64_info>, XD, VEX_W;
10818}
10819
10820defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10821
10822let Predicates = [HasVLX] in {
10823def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
10824          (VMOVDDUPZ128rm addr:$src)>;
10825def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10826          (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10827def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
10828          (VMOVDDUPZ128rm addr:$src)>;
10829
10830def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10831                   (v2f64 VR128X:$src0)),
10832          (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10833                           (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10834def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10835                   (bitconvert (v4i32 immAllZerosV))),
10836          (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10837
10838def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10839                   (v2f64 VR128X:$src0)),
10840          (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10841def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10842                   (bitconvert (v4i32 immAllZerosV))),
10843          (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10844
10845def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
10846                   (v2f64 VR128X:$src0)),
10847          (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10848def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
10849                   (bitconvert (v4i32 immAllZerosV))),
10850          (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10851}
10852
10853//===----------------------------------------------------------------------===//
10854// AVX-512 - Unpack Instructions
10855//===----------------------------------------------------------------------===//
10856
10857defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
10858                                 SchedWriteFShuffleSizes, 0, 1>;
10859defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
10860                                 SchedWriteFShuffleSizes>;
10861
10862defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10863                                       SchedWriteShuffle, HasBWI>;
10864defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10865                                       SchedWriteShuffle, HasBWI>;
10866defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10867                                       SchedWriteShuffle, HasBWI>;
10868defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10869                                       SchedWriteShuffle, HasBWI>;
10870
10871defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10872                                       SchedWriteShuffle, HasAVX512>;
10873defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10874                                       SchedWriteShuffle, HasAVX512>;
10875defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10876                                        SchedWriteShuffle, HasAVX512>;
10877defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10878                                        SchedWriteShuffle, HasAVX512>;
10879
10880//===----------------------------------------------------------------------===//
10881// AVX-512 - Extract & Insert Integer Instructions
10882//===----------------------------------------------------------------------===//
10883
10884multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10885                                                            X86VectorVTInfo _> {
10886  def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10887              (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10888              OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10889              [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
10890                       addr:$dst)]>,
10891              EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10892}
10893
10894multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10895  let Predicates = [HasBWI] in {
10896    def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10897                  (ins _.RC:$src1, u8imm:$src2),
10898                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10899                  [(set GR32orGR64:$dst,
10900                        (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
10901                  EVEX, TAPD, Sched<[WriteVecExtract]>;
10902
10903    defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10904  }
10905}
10906
10907multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10908  let Predicates = [HasBWI] in {
10909    def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10910                  (ins _.RC:$src1, u8imm:$src2),
10911                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10912                  [(set GR32orGR64:$dst,
10913                        (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
10914                  EVEX, PD, Sched<[WriteVecExtract]>;
10915
10916    let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10917    def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10918                   (ins _.RC:$src1, u8imm:$src2),
10919                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10920                   EVEX, TAPD, FoldGenData<NAME#rr>,
10921                   Sched<[WriteVecExtract]>;
10922
10923    defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10924  }
10925}
10926
10927multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10928                                                            RegisterClass GRC> {
10929  let Predicates = [HasDQI] in {
10930    def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10931                  (ins _.RC:$src1, u8imm:$src2),
10932                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10933                  [(set GRC:$dst,
10934                      (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10935                  EVEX, TAPD, Sched<[WriteVecExtract]>;
10936
10937    def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10938                (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10939                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10940                [(store (extractelt (_.VT _.RC:$src1),
10941                                    imm:$src2),addr:$dst)]>,
10942                EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
10943                Sched<[WriteVecExtractSt]>;
10944  }
10945}
10946
10947defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10948defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
10949defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10950defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
10951
10952multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10953                                            X86VectorVTInfo _, PatFrag LdFrag> {
10954  def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
10955      (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
10956      OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10957      [(set _.RC:$dst,
10958          (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
10959      EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
10960}
10961
10962multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10963                                            X86VectorVTInfo _, PatFrag LdFrag> {
10964  let Predicates = [HasBWI] in {
10965    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10966        (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
10967        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10968        [(set _.RC:$dst,
10969            (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
10970        Sched<[WriteVecInsert]>;
10971
10972    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
10973  }
10974}
10975
10976multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
10977                                         X86VectorVTInfo _, RegisterClass GRC> {
10978  let Predicates = [HasDQI] in {
10979    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10980        (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
10981        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10982        [(set _.RC:$dst,
10983            (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
10984        EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
10985
10986    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
10987                                    _.ScalarLdFrag>, TAPD;
10988  }
10989}
10990
10991defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
10992                                     extloadi8>, TAPD, VEX_WIG;
10993defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
10994                                     extloadi16>, PD, VEX_WIG;
10995defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
10996defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
10997
10998//===----------------------------------------------------------------------===//
10999// VSHUFPS - VSHUFPD Operations
11000//===----------------------------------------------------------------------===//
11001
11002multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
11003                        AVX512VLVectorVTInfo VTInfo_FP>{
11004  defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11005                                    SchedWriteFShuffle>,
11006                                    EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11007                                    AVX512AIi8Base, EVEX_4V;
11008}
11009
11010defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
11011defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
11012
11013//===----------------------------------------------------------------------===//
11014// AVX-512 - Byte shift Left/Right
11015//===----------------------------------------------------------------------===//
11016
11017// FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well?
11018multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11019                               Format MRMm, string OpcodeStr,
11020                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11021  def rr : AVX512<opc, MRMr,
11022             (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11023             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11024             [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
11025             Sched<[sched]>;
11026  def rm : AVX512<opc, MRMm,
11027           (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11028           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11029           [(set _.RC:$dst,(_.VT (OpNode
11030                                 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11031                                 (i8 imm:$src2))))]>,
11032           Sched<[sched.Folded, ReadAfterLd]>;
11033}
11034
11035multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11036                                   Format MRMm, string OpcodeStr,
11037                                   X86SchedWriteWidths sched, Predicate prd>{
11038  let Predicates = [prd] in
11039    defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11040                                 sched.ZMM, v64i8_info>, EVEX_V512;
11041  let Predicates = [prd, HasVLX] in {
11042    defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11043                                    sched.YMM, v32i8x_info>, EVEX_V256;
11044    defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11045                                    sched.XMM, v16i8x_info>, EVEX_V128;
11046  }
11047}
11048defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11049                                       SchedWriteShuffle, HasBWI>,
11050                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11051defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11052                                       SchedWriteShuffle, HasBWI>,
11053                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11054
11055multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11056                                string OpcodeStr, X86FoldableSchedWrite sched,
11057                                X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11058  def rr : AVX512BI<opc, MRMSrcReg,
11059             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11060             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11061             [(set _dst.RC:$dst,(_dst.VT
11062                                (OpNode (_src.VT _src.RC:$src1),
11063                                        (_src.VT _src.RC:$src2))))]>,
11064             Sched<[sched]>;
11065  def rm : AVX512BI<opc, MRMSrcMem,
11066           (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11067           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11068           [(set _dst.RC:$dst,(_dst.VT
11069                              (OpNode (_src.VT _src.RC:$src1),
11070                              (_src.VT (bitconvert
11071                                        (_src.LdFrag addr:$src2))))))]>,
11072           Sched<[sched.Folded, ReadAfterLd]>;
11073}
11074
11075multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11076                                    string OpcodeStr, X86SchedWriteWidths sched,
11077                                    Predicate prd> {
11078  let Predicates = [prd] in
11079    defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11080                                  v8i64_info, v64i8_info>, EVEX_V512;
11081  let Predicates = [prd, HasVLX] in {
11082    defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11083                                     v4i64x_info, v32i8x_info>, EVEX_V256;
11084    defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11085                                     v2i64x_info, v16i8x_info>, EVEX_V128;
11086  }
11087}
11088
11089defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11090                                        SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11091
11092// Transforms to swizzle an immediate to enable better matching when
11093// memory operand isn't in the right place.
11094def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
11095  // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11096  uint8_t Imm = N->getZExtValue();
11097  // Swap bits 1/4 and 3/6.
11098  uint8_t NewImm = Imm & 0xa5;
11099  if (Imm & 0x02) NewImm |= 0x10;
11100  if (Imm & 0x10) NewImm |= 0x02;
11101  if (Imm & 0x08) NewImm |= 0x40;
11102  if (Imm & 0x40) NewImm |= 0x08;
11103  return getI8Imm(NewImm, SDLoc(N));
11104}]>;
11105def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
11106  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11107  uint8_t Imm = N->getZExtValue();
11108  // Swap bits 2/4 and 3/5.
11109  uint8_t NewImm = Imm & 0xc3;
11110  if (Imm & 0x04) NewImm |= 0x10;
11111  if (Imm & 0x10) NewImm |= 0x04;
11112  if (Imm & 0x08) NewImm |= 0x20;
11113  if (Imm & 0x20) NewImm |= 0x08;
11114  return getI8Imm(NewImm, SDLoc(N));
11115}]>;
11116def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
11117  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11118  uint8_t Imm = N->getZExtValue();
11119  // Swap bits 1/2 and 5/6.
11120  uint8_t NewImm = Imm & 0x99;
11121  if (Imm & 0x02) NewImm |= 0x04;
11122  if (Imm & 0x04) NewImm |= 0x02;
11123  if (Imm & 0x20) NewImm |= 0x40;
11124  if (Imm & 0x40) NewImm |= 0x20;
11125  return getI8Imm(NewImm, SDLoc(N));
11126}]>;
11127def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
11128  // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11129  uint8_t Imm = N->getZExtValue();
11130  // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11131  uint8_t NewImm = Imm & 0x81;
11132  if (Imm & 0x02) NewImm |= 0x04;
11133  if (Imm & 0x04) NewImm |= 0x10;
11134  if (Imm & 0x08) NewImm |= 0x40;
11135  if (Imm & 0x10) NewImm |= 0x02;
11136  if (Imm & 0x20) NewImm |= 0x08;
11137  if (Imm & 0x40) NewImm |= 0x20;
11138  return getI8Imm(NewImm, SDLoc(N));
11139}]>;
11140def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
11141  // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11142  uint8_t Imm = N->getZExtValue();
11143  // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11144  uint8_t NewImm = Imm & 0x81;
11145  if (Imm & 0x02) NewImm |= 0x10;
11146  if (Imm & 0x04) NewImm |= 0x02;
11147  if (Imm & 0x08) NewImm |= 0x20;
11148  if (Imm & 0x10) NewImm |= 0x04;
11149  if (Imm & 0x20) NewImm |= 0x40;
11150  if (Imm & 0x40) NewImm |= 0x08;
11151  return getI8Imm(NewImm, SDLoc(N));
11152}]>;
11153
11154multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11155                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
11156                          string Name>{
11157  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11158  defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11159                      (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11160                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11161                      (OpNode (_.VT _.RC:$src1),
11162                              (_.VT _.RC:$src2),
11163                              (_.VT _.RC:$src3),
11164                              (i8 imm:$src4)), 1, 1>,
11165                      AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11166  defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11167                    (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11168                    OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11169                    (OpNode (_.VT _.RC:$src1),
11170                            (_.VT _.RC:$src2),
11171                            (_.VT (bitconvert (_.LdFrag addr:$src3))),
11172                            (i8 imm:$src4)), 1, 0>,
11173                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11174                    Sched<[sched.Folded, ReadAfterLd]>;
11175  defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11176                    (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11177                    OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11178                    "$src2, ${src3}"##_.BroadcastStr##", $src4",
11179                    (OpNode (_.VT _.RC:$src1),
11180                            (_.VT _.RC:$src2),
11181                            (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
11182                            (i8 imm:$src4)), 1, 0>, EVEX_B,
11183                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11184                    Sched<[sched.Folded, ReadAfterLd]>;
11185  }// Constraints = "$src1 = $dst"
11186
11187  // Additional patterns for matching passthru operand in other positions.
11188  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11189                   (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11190                   _.RC:$src1)),
11191            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11192             _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11193  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11194                   (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
11195                   _.RC:$src1)),
11196            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11197             _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11198
11199  // Additional patterns for matching loads in other positions.
11200  def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
11201                          _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11202            (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11203                                   addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11204  def : Pat<(_.VT (OpNode _.RC:$src1,
11205                          (bitconvert (_.LdFrag addr:$src3)),
11206                          _.RC:$src2, (i8 imm:$src4))),
11207            (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11208                                   addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11209
11210  // Additional patterns for matching zero masking with loads in other
11211  // positions.
11212  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11213                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11214                    _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11215                   _.ImmAllZerosV)),
11216            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11217             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11218  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11219                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11220                    _.RC:$src2, (i8 imm:$src4)),
11221                   _.ImmAllZerosV)),
11222            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11223             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11224
11225  // Additional patterns for matching masked loads with different
11226  // operand orders.
11227  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11228                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11229                    _.RC:$src2, (i8 imm:$src4)),
11230                   _.RC:$src1)),
11231            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11232             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11233  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11234                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11235                    _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11236                   _.RC:$src1)),
11237            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11238             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11239  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11240                   (OpNode _.RC:$src2, _.RC:$src1,
11241                    (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
11242                   _.RC:$src1)),
11243            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11244             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11245  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11246                   (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11247                    _.RC:$src1, (i8 imm:$src4)),
11248                   _.RC:$src1)),
11249            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11250             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11251  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11252                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11253                    _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11254                   _.RC:$src1)),
11255            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11256             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11257
11258  // Additional patterns for matching broadcasts in other positions.
11259  def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11260                          _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11261            (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11262                                   addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11263  def : Pat<(_.VT (OpNode _.RC:$src1,
11264                          (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11265                          _.RC:$src2, (i8 imm:$src4))),
11266            (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11267                                   addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11268
11269  // Additional patterns for matching zero masking with broadcasts in other
11270  // positions.
11271  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11272                   (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11273                    _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11274                   _.ImmAllZerosV)),
11275            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11276             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11277             (VPTERNLOG321_imm8 imm:$src4))>;
11278  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11279                   (OpNode _.RC:$src1,
11280                    (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11281                    _.RC:$src2, (i8 imm:$src4)),
11282                   _.ImmAllZerosV)),
11283            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11284             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11285             (VPTERNLOG132_imm8 imm:$src4))>;
11286
11287  // Additional patterns for matching masked broadcasts with different
11288  // operand orders.
11289  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11290                   (OpNode _.RC:$src1,
11291                    (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11292                    _.RC:$src2, (i8 imm:$src4)),
11293                   _.RC:$src1)),
11294            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11295             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11296  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11297                   (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11298                    _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11299                   _.RC:$src1)),
11300            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11301             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11302  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11303                   (OpNode _.RC:$src2, _.RC:$src1,
11304                    (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11305                    (i8 imm:$src4)), _.RC:$src1)),
11306            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11307             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11308  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11309                   (OpNode _.RC:$src2,
11310                    (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11311                    _.RC:$src1, (i8 imm:$src4)),
11312                   _.RC:$src1)),
11313            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11314             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11315  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11316                   (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11317                    _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11318                   _.RC:$src1)),
11319            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11320             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11321}
11322
11323multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11324                                 AVX512VLVectorVTInfo _> {
11325  let Predicates = [HasAVX512] in
11326    defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11327                               _.info512, NAME>, EVEX_V512;
11328  let Predicates = [HasAVX512, HasVLX] in {
11329    defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11330                               _.info128, NAME>, EVEX_V128;
11331    defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11332                               _.info256, NAME>, EVEX_V256;
11333  }
11334}
11335
11336defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11337                                        avx512vl_i32_info>;
11338defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11339                                        avx512vl_i64_info>, VEX_W;
11340
11341// Patterns to implement vnot using vpternlog instead of creating all ones
11342// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11343// so that the result is only dependent on src0. But we use the same source
11344// for all operands to prevent a false dependency.
11345// TODO: We should maybe have a more generalized algorithm for folding to
11346// vpternlog.
11347let Predicates = [HasAVX512] in {
11348  def : Pat<(v8i64 (xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV)))),
11349            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11350}
11351
11352let Predicates = [HasAVX512, NoVLX] in {
11353  def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
11354            (EXTRACT_SUBREG
11355             (VPTERNLOGQZrri
11356              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11357              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11358              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11359              (i8 15)), sub_xmm)>;
11360  def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
11361            (EXTRACT_SUBREG
11362             (VPTERNLOGQZrri
11363              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11364              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11365              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11366              (i8 15)), sub_ymm)>;
11367}
11368
11369let Predicates = [HasVLX] in {
11370  def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
11371            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11372  def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
11373            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11374}
11375
11376//===----------------------------------------------------------------------===//
11377// AVX-512 - FixupImm
11378//===----------------------------------------------------------------------===//
11379
11380multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
11381                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
11382                                  X86VectorVTInfo TblVT>{
11383  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11384    defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11385                        (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11386                         OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11387                        (OpNode (_.VT _.RC:$src1),
11388                                (_.VT _.RC:$src2),
11389                                (TblVT.VT _.RC:$src3),
11390                                (i32 imm:$src4),
11391                                (i32 FROUND_CURRENT))>, Sched<[sched]>;
11392    defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11393                      (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11394                      OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11395                      (OpNode (_.VT _.RC:$src1),
11396                              (_.VT _.RC:$src2),
11397                              (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11398                              (i32 imm:$src4),
11399                              (i32 FROUND_CURRENT))>,
11400                      Sched<[sched.Folded, ReadAfterLd]>;
11401    defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11402                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11403                    OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11404                    "$src2, ${src3}"##_.BroadcastStr##", $src4",
11405                      (OpNode (_.VT _.RC:$src1),
11406                              (_.VT _.RC:$src2),
11407                              (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
11408                              (i32 imm:$src4),
11409                              (i32 FROUND_CURRENT))>,
11410                    EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
11411  } // Constraints = "$src1 = $dst"
11412}
11413
11414multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11415                                      SDNode OpNode, X86FoldableSchedWrite sched,
11416                                      X86VectorVTInfo _, X86VectorVTInfo TblVT>{
11417let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11418  defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11419                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11420                      OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11421                      "$src2, $src3, {sae}, $src4",
11422                      (OpNode (_.VT _.RC:$src1),
11423                                (_.VT _.RC:$src2),
11424                                (TblVT.VT _.RC:$src3),
11425                                (i32 imm:$src4),
11426                                (i32 FROUND_NO_EXC))>,
11427                      EVEX_B, Sched<[sched]>;
11428  }
11429}
11430
11431multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
11432                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
11433                                  X86VectorVTInfo _src3VT> {
11434  let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11435      ExeDomain = _.ExeDomain in {
11436    defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11437                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11438                      OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11439                      (OpNode (_.VT _.RC:$src1),
11440                              (_.VT _.RC:$src2),
11441                              (_src3VT.VT _src3VT.RC:$src3),
11442                              (i32 imm:$src4),
11443                              (i32 FROUND_CURRENT))>, Sched<[sched]>;
11444    defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11445                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11446                      OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11447                      "$src2, $src3, {sae}, $src4",
11448                      (OpNode (_.VT _.RC:$src1),
11449                              (_.VT _.RC:$src2),
11450                              (_src3VT.VT _src3VT.RC:$src3),
11451                              (i32 imm:$src4),
11452                              (i32 FROUND_NO_EXC))>,
11453                      EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
11454    defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11455                     (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11456                     OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11457                     (OpNode (_.VT _.RC:$src1),
11458                             (_.VT _.RC:$src2),
11459                             (_src3VT.VT (scalar_to_vector
11460                                       (_src3VT.ScalarLdFrag addr:$src3))),
11461                             (i32 imm:$src4),
11462                             (i32 FROUND_CURRENT))>,
11463                     Sched<[sched.Folded, ReadAfterLd]>;
11464  }
11465}
11466
11467multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11468                                      AVX512VLVectorVTInfo _Vec,
11469                                      AVX512VLVectorVTInfo _Tbl> {
11470  let Predicates = [HasAVX512] in
11471    defm Z    : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
11472                                       _Vec.info512, _Tbl.info512>,
11473                avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
11474                                _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11475                                EVEX_4V, EVEX_V512;
11476  let Predicates = [HasAVX512, HasVLX] in {
11477    defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.XMM,
11478                            _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11479                            EVEX_4V, EVEX_V128;
11480    defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.YMM,
11481                            _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11482                            EVEX_4V, EVEX_V256;
11483  }
11484}
11485
11486defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
11487                                           SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11488                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11489defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
11490                                           SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11491                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11492defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11493                         avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11494defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11495                         avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
11496
11497// Patterns used to select SSE scalar fp arithmetic instructions from
11498// either:
11499//
11500// (1) a scalar fp operation followed by a blend
11501//
11502// The effect is that the backend no longer emits unnecessary vector
11503// insert instructions immediately after SSE scalar fp instructions
11504// like addss or mulss.
11505//
11506// For example, given the following code:
11507//   __m128 foo(__m128 A, __m128 B) {
11508//     A[0] += B[0];
11509//     return A;
11510//   }
11511//
11512// Previously we generated:
11513//   addss %xmm0, %xmm1
11514//   movss %xmm1, %xmm0
11515//
11516// We now generate:
11517//   addss %xmm1, %xmm0
11518//
11519// (2) a vector packed single/double fp operation followed by a vector insert
11520//
11521// The effect is that the backend converts the packed fp instruction
11522// followed by a vector insert into a single SSE scalar fp instruction.
11523//
11524// For example, given the following code:
11525//   __m128 foo(__m128 A, __m128 B) {
11526//     __m128 C = A + B;
11527//     return (__m128) {c[0], a[1], a[2], a[3]};
11528//   }
11529//
11530// Previously we generated:
11531//   addps %xmm0, %xmm1
11532//   movss %xmm1, %xmm0
11533//
11534// We now generate:
11535//   addss %xmm1, %xmm0
11536
11537// TODO: Some canonicalization in lowering would simplify the number of
11538// patterns we have to try to match.
11539multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
11540                                           X86VectorVTInfo _, PatLeaf ZeroFP> {
11541  let Predicates = [HasAVX512] in {
11542    // extracted scalar math op with insert via movss
11543    def : Pat<(MoveNode
11544               (_.VT VR128X:$dst),
11545               (_.VT (scalar_to_vector
11546                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11547                          _.FRC:$src)))),
11548              (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
11549               (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
11550
11551    // extracted masked scalar math op with insert via movss
11552    def : Pat<(MoveNode (_.VT VR128X:$src1),
11553               (scalar_to_vector
11554                (X86selects VK1WM:$mask,
11555                            (Op (_.EltVT
11556                                 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11557                                _.FRC:$src2),
11558                            _.FRC:$src0))),
11559              (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
11560               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11561               VK1WM:$mask, _.VT:$src1,
11562               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11563
11564    // extracted masked scalar math op with insert via movss
11565    def : Pat<(MoveNode (_.VT VR128X:$src1),
11566               (scalar_to_vector
11567                (X86selects VK1WM:$mask,
11568                            (Op (_.EltVT
11569                                 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11570                                _.FRC:$src2), (_.EltVT ZeroFP)))),
11571      (!cast<I>("V"#OpcPrefix#Zrr_Intkz)
11572          VK1WM:$mask, _.VT:$src1,
11573          (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11574  }
11575}
11576
11577defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11578defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11579defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11580defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11581
11582defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11583defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11584defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11585defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11586
11587multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
11588                                             SDNode Move, X86VectorVTInfo _> {
11589  let Predicates = [HasAVX512] in {
11590    def : Pat<(_.VT (Move _.VT:$dst,
11591                     (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11592              (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>;
11593  }
11594}
11595
11596defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
11597defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
11598
11599multiclass AVX512_scalar_unary_math_imm_patterns<SDNode OpNode, string OpcPrefix,
11600                                                 SDNode Move, X86VectorVTInfo _,
11601                                                 bits<8> ImmV> {
11602  let Predicates = [HasAVX512] in {
11603    def : Pat<(_.VT (Move _.VT:$dst,
11604                     (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11605              (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src,
11606                                                        (i32 ImmV))>;
11607  }
11608}
11609
11610defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESS", X86Movss,
11611                                             v4f32x_info, 0x01>;
11612defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESS", X86Movss,
11613                                             v4f32x_info, 0x02>;
11614defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESD", X86Movsd,
11615                                             v2f64x_info, 0x01>;
11616defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESD", X86Movsd,
11617                                             v2f64x_info, 0x02>;
11618
11619//===----------------------------------------------------------------------===//
11620// AES instructions
11621//===----------------------------------------------------------------------===//
11622
11623multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
11624  let Predicates = [HasVLX, HasVAES] in {
11625    defm Z128 : AESI_binop_rm_int<Op, OpStr,
11626                                  !cast<Intrinsic>(IntPrefix),
11627                                  loadv2i64, 0, VR128X, i128mem>,
11628                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
11629    defm Z256 : AESI_binop_rm_int<Op, OpStr,
11630                                  !cast<Intrinsic>(IntPrefix##"_256"),
11631                                  loadv4i64, 0, VR256X, i256mem>,
11632                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
11633    }
11634    let Predicates = [HasAVX512, HasVAES] in
11635    defm Z    : AESI_binop_rm_int<Op, OpStr,
11636                                  !cast<Intrinsic>(IntPrefix##"_512"),
11637                                  loadv8i64, 0, VR512, i512mem>,
11638                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
11639}
11640
11641defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
11642defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
11643defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
11644defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
11645
11646//===----------------------------------------------------------------------===//
11647// PCLMUL instructions - Carry less multiplication
11648//===----------------------------------------------------------------------===//
11649
11650let Predicates = [HasAVX512, HasVPCLMULQDQ] in
11651defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
11652                              EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
11653
11654let Predicates = [HasVLX, HasVPCLMULQDQ] in {
11655defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
11656                              EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
11657
11658defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
11659                                int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
11660                                EVEX_CD8<64, CD8VF>, VEX_WIG;
11661}
11662
11663// Aliases
11664defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
11665defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
11666defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
11667
11668//===----------------------------------------------------------------------===//
11669// VBMI2
11670//===----------------------------------------------------------------------===//
11671
11672multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
11673                              X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11674  let Constraints = "$src1 = $dst",
11675      ExeDomain   = VTI.ExeDomain in {
11676    defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11677                (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11678                "$src3, $src2", "$src2, $src3",
11679                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
11680                AVX512FMA3Base, Sched<[sched]>;
11681    defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11682                (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11683                "$src3, $src2", "$src2, $src3",
11684                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11685                        (VTI.VT (bitconvert (VTI.LdFrag addr:$src3)))))>,
11686                AVX512FMA3Base,
11687                Sched<[sched.Folded, ReadAfterLd]>;
11688  }
11689}
11690
11691multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11692                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
11693         : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
11694  let Constraints = "$src1 = $dst",
11695      ExeDomain   = VTI.ExeDomain in
11696  defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11697              (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
11698              "${src3}"##VTI.BroadcastStr##", $src2",
11699              "$src2, ${src3}"##VTI.BroadcastStr,
11700              (OpNode VTI.RC:$src1, VTI.RC:$src2,
11701               (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>,
11702              AVX512FMA3Base, EVEX_B,
11703              Sched<[sched.Folded, ReadAfterLd]>;
11704}
11705
11706multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
11707                                     X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11708  let Predicates = [HasVBMI2] in
11709  defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11710                                   EVEX_V512;
11711  let Predicates = [HasVBMI2, HasVLX] in {
11712    defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11713                                   EVEX_V256;
11714    defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11715                                   EVEX_V128;
11716  }
11717}
11718
11719multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
11720                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11721  let Predicates = [HasVBMI2] in
11722  defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11723                                    EVEX_V512;
11724  let Predicates = [HasVBMI2, HasVLX] in {
11725    defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11726                                    EVEX_V256;
11727    defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11728                                    EVEX_V128;
11729  }
11730}
11731multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
11732                           SDNode OpNode, X86SchedWriteWidths sched> {
11733  defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched,
11734             avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
11735  defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched,
11736             avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11737  defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched,
11738             avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
11739}
11740
11741multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
11742                           SDNode OpNode, X86SchedWriteWidths sched> {
11743  defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched,
11744             avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
11745             VEX_W, EVEX_CD8<16, CD8VF>;
11746  defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
11747             OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11748  defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
11749             sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11750}
11751
11752// Concat & Shift
11753defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
11754defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
11755defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
11756defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
11757
11758// Compress
11759defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
11760                                         avx512vl_i8_info, HasVBMI2>, EVEX,
11761                                         NotMemoryFoldable;
11762defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
11763                                          avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
11764                                          NotMemoryFoldable;
11765// Expand
11766defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
11767                                      avx512vl_i8_info, HasVBMI2>, EVEX;
11768defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
11769                                      avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
11770
11771//===----------------------------------------------------------------------===//
11772// VNNI
11773//===----------------------------------------------------------------------===//
11774
11775let Constraints = "$src1 = $dst" in
11776multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11777                    X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11778  defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11779                                   (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11780                                   "$src3, $src2", "$src2, $src3",
11781                                   (VTI.VT (OpNode VTI.RC:$src1,
11782                                            VTI.RC:$src2, VTI.RC:$src3))>,
11783                                   EVEX_4V, T8PD, Sched<[sched]>;
11784  defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11785                                   (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11786                                   "$src3, $src2", "$src2, $src3",
11787                                   (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11788                                            (VTI.VT (bitconvert
11789                                                     (VTI.LdFrag addr:$src3)))))>,
11790                                   EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
11791                                   Sched<[sched.Folded, ReadAfterLd]>;
11792  defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11793                                   (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
11794                                   OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
11795                                   "$src2, ${src3}"##VTI.BroadcastStr,
11796                                   (OpNode VTI.RC:$src1, VTI.RC:$src2,
11797                                    (VTI.VT (X86VBroadcast
11798                                             (VTI.ScalarLdFrag addr:$src3))))>,
11799                                   EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
11800                                   T8PD, Sched<[sched.Folded, ReadAfterLd]>;
11801}
11802
11803multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
11804                       X86SchedWriteWidths sched> {
11805  let Predicates = [HasVNNI] in
11806  defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info>, EVEX_V512;
11807  let Predicates = [HasVNNI, HasVLX] in {
11808    defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info>, EVEX_V256;
11809    defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info>, EVEX_V128;
11810  }
11811}
11812
11813// FIXME: Is there a better scheduler class for VPDP?
11814defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul>;
11815defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul>;
11816defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul>;
11817defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul>;
11818
11819//===----------------------------------------------------------------------===//
11820// Bit Algorithms
11821//===----------------------------------------------------------------------===//
11822
11823// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
11824defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
11825                                   avx512vl_i8_info, HasBITALG>;
11826defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
11827                                   avx512vl_i16_info, HasBITALG>, VEX_W;
11828
11829defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
11830defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
11831
11832multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11833  defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
11834                                (ins VTI.RC:$src1, VTI.RC:$src2),
11835                                "vpshufbitqmb",
11836                                "$src2, $src1", "$src1, $src2",
11837                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11838                                (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
11839                                Sched<[sched]>;
11840  defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
11841                                (ins VTI.RC:$src1, VTI.MemOp:$src2),
11842                                "vpshufbitqmb",
11843                                "$src2, $src1", "$src1, $src2",
11844                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11845                                (VTI.VT (bitconvert (VTI.LdFrag addr:$src2))))>,
11846                                EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
11847                                Sched<[sched.Folded, ReadAfterLd]>;
11848}
11849
11850multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11851  let Predicates = [HasBITALG] in
11852  defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
11853  let Predicates = [HasBITALG, HasVLX] in {
11854    defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
11855    defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
11856  }
11857}
11858
11859// FIXME: Is there a better scheduler class for VPSHUFBITQMB?
11860defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
11861
11862//===----------------------------------------------------------------------===//
11863// GFNI
11864//===----------------------------------------------------------------------===//
11865
11866multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11867                                   X86SchedWriteWidths sched> {
11868  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11869  defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
11870                                EVEX_V512;
11871  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
11872    defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
11873                                EVEX_V256;
11874    defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
11875                                EVEX_V128;
11876  }
11877}
11878
11879defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
11880                                          SchedWriteVecALU>,
11881                                          EVEX_CD8<8, CD8VF>, T8PD;
11882
11883multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
11884                                      X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11885                                      X86VectorVTInfo BcstVTI>
11886           : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
11887  let ExeDomain = VTI.ExeDomain in
11888  defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11889                (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
11890                OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
11891                "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
11892                (OpNode (VTI.VT VTI.RC:$src1),
11893                 (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
11894                 (i8 imm:$src3))>, EVEX_B,
11895                 Sched<[sched.Folded, ReadAfterLd]>;
11896}
11897
11898multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11899                                     X86SchedWriteWidths sched> {
11900  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11901  defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
11902                                           v64i8_info, v8i64_info>, EVEX_V512;
11903  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
11904    defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
11905                                           v32i8x_info, v4i64x_info>, EVEX_V256;
11906    defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
11907                                           v16i8x_info, v2i64x_info>, EVEX_V128;
11908  }
11909}
11910
11911defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
11912                         X86GF2P8affineinvqb, SchedWriteVecIMul>,
11913                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
11914defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
11915                         X86GF2P8affineqb, SchedWriteVecIMul>,
11916                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
11917
11918
11919//===----------------------------------------------------------------------===//
11920// AVX5124FMAPS
11921//===----------------------------------------------------------------------===//
11922
11923let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
11924    Constraints = "$src1 = $dst" in {
11925defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
11926                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11927                    "v4fmaddps", "$src3, $src2", "$src2, $src3",
11928                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11929                    Sched<[SchedWriteFMA.ZMM.Folded]>;
11930
11931defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
11932                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11933                     "v4fnmaddps", "$src3, $src2", "$src2, $src3",
11934                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11935                     Sched<[SchedWriteFMA.ZMM.Folded]>;
11936
11937defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
11938                    (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
11939                    "v4fmaddss", "$src3, $src2", "$src2, $src3",
11940                    []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
11941                    Sched<[SchedWriteFMA.Scl.Folded]>;
11942
11943defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
11944                     (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
11945                     "v4fnmaddss", "$src3, $src2", "$src2, $src3",
11946                     []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
11947                     Sched<[SchedWriteFMA.Scl.Folded]>;
11948}
11949
11950//===----------------------------------------------------------------------===//
11951// AVX5124VNNIW
11952//===----------------------------------------------------------------------===//
11953
11954let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
11955    Constraints = "$src1 = $dst" in {
11956defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
11957                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11958                     "vp4dpwssd", "$src3, $src2", "$src2, $src3",
11959                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11960                    Sched<[SchedWriteFMA.ZMM.Folded]>;
11961
11962defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
11963                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11964                     "vp4dpwssds", "$src3, $src2", "$src2, $src3",
11965                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11966                     Sched<[SchedWriteFMA.ZMM.Folded]>;
11967}
11968
11969