1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vbmi | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi | FileCheck %s --check-prefixes=CHECK,X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vbmi-builtins.c
6
7define <8 x i64> @test_mm512_mask2_permutex2var_epi8(<8 x i64> %__A, <8 x i64> %__I, i64 %__U, <8 x i64> %__B) {
8; X86-LABEL: test_mm512_mask2_permutex2var_epi8:
9; X86:       # %bb.0: # %entry
10; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
11; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
12; X86-NEXT:    kunpckdq %k1, %k0, %k1
13; X86-NEXT:    vpermi2b %zmm2, %zmm0, %zmm1 {%k1}
14; X86-NEXT:    vmovdqa64 %zmm1, %zmm0
15; X86-NEXT:    retl
16;
17; X64-LABEL: test_mm512_mask2_permutex2var_epi8:
18; X64:       # %bb.0: # %entry
19; X64-NEXT:    kmovq %rdi, %k1
20; X64-NEXT:    vpermi2b %zmm2, %zmm0, %zmm1 {%k1}
21; X64-NEXT:    vmovdqa64 %zmm1, %zmm0
22; X64-NEXT:    retq
23entry:
24  %0 = bitcast <8 x i64> %__A to <64 x i8>
25  %1 = bitcast <8 x i64> %__I to <64 x i8>
26  %2 = bitcast <8 x i64> %__B to <64 x i8>
27  %3 = tail call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %0, <64 x i8> %1, <64 x i8> %2)
28  %4 = bitcast i64 %__U to <64 x i1>
29  %5 = select <64 x i1> %4, <64 x i8> %3, <64 x i8> %1
30  %6 = bitcast <64 x i8> %5 to <8 x i64>
31  ret <8 x i64> %6
32}
33
34define <8 x i64> @test_mm512_permutex2var_epi8(<8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) {
35; CHECK-LABEL: test_mm512_permutex2var_epi8:
36; CHECK:       # %bb.0: # %entry
37; CHECK-NEXT:    vpermt2b %zmm2, %zmm1, %zmm0
38; CHECK-NEXT:    ret{{[l|q]}}
39entry:
40  %0 = bitcast <8 x i64> %__A to <64 x i8>
41  %1 = bitcast <8 x i64> %__I to <64 x i8>
42  %2 = bitcast <8 x i64> %__B to <64 x i8>
43  %3 = tail call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %0, <64 x i8> %1, <64 x i8> %2)
44  %4 = bitcast <64 x i8> %3 to <8 x i64>
45  ret <8 x i64> %4
46}
47
48define <8 x i64> @test_mm512_mask_permutex2var_epi8(<8 x i64> %__A, i64 %__U, <8 x i64> %__I, <8 x i64> %__B) {
49; X86-LABEL: test_mm512_mask_permutex2var_epi8:
50; X86:       # %bb.0: # %entry
51; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
52; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
53; X86-NEXT:    kunpckdq %k1, %k0, %k1
54; X86-NEXT:    vpermt2b %zmm2, %zmm1, %zmm0 {%k1}
55; X86-NEXT:    retl
56;
57; X64-LABEL: test_mm512_mask_permutex2var_epi8:
58; X64:       # %bb.0: # %entry
59; X64-NEXT:    kmovq %rdi, %k1
60; X64-NEXT:    vpermt2b %zmm2, %zmm1, %zmm0 {%k1}
61; X64-NEXT:    retq
62entry:
63  %0 = bitcast <8 x i64> %__A to <64 x i8>
64  %1 = bitcast <8 x i64> %__I to <64 x i8>
65  %2 = bitcast <8 x i64> %__B to <64 x i8>
66  %3 = tail call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %0, <64 x i8> %1, <64 x i8> %2)
67  %4 = bitcast i64 %__U to <64 x i1>
68  %5 = select <64 x i1> %4, <64 x i8> %3, <64 x i8> %0
69  %6 = bitcast <64 x i8> %5 to <8 x i64>
70  ret <8 x i64> %6
71}
72
73define <8 x i64> @test_mm512_maskz_permutex2var_epi8(i64 %__U, <8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) {
74; X86-LABEL: test_mm512_maskz_permutex2var_epi8:
75; X86:       # %bb.0: # %entry
76; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
77; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
78; X86-NEXT:    kunpckdq %k1, %k0, %k1
79; X86-NEXT:    vpermt2b %zmm2, %zmm1, %zmm0 {%k1} {z}
80; X86-NEXT:    retl
81;
82; X64-LABEL: test_mm512_maskz_permutex2var_epi8:
83; X64:       # %bb.0: # %entry
84; X64-NEXT:    kmovq %rdi, %k1
85; X64-NEXT:    vpermt2b %zmm2, %zmm1, %zmm0 {%k1} {z}
86; X64-NEXT:    retq
87entry:
88  %0 = bitcast <8 x i64> %__A to <64 x i8>
89  %1 = bitcast <8 x i64> %__I to <64 x i8>
90  %2 = bitcast <8 x i64> %__B to <64 x i8>
91  %3 = tail call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %0, <64 x i8> %1, <64 x i8> %2)
92  %4 = bitcast i64 %__U to <64 x i1>
93  %5 = select <64 x i1> %4, <64 x i8> %3, <64 x i8> zeroinitializer
94  %6 = bitcast <64 x i8> %5 to <8 x i64>
95  ret <8 x i64> %6
96}
97
98declare <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>)
99
100define <8 x i64> @test_mm512_mask_multishift_epi64_epi8(<8 x i64> %__W, i64 %__M, <8 x i64> %__X, <8 x i64> %__Y) {
101; X86-LABEL: test_mm512_mask_multishift_epi64_epi8:
102; X86:       # %bb.0: # %entry
103; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
104; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
105; X86-NEXT:    kunpckdq %k1, %k0, %k1
106; X86-NEXT:    vpmultishiftqb %zmm2, %zmm1, %zmm0 {%k1}
107; X86-NEXT:    retl
108;
109; X64-LABEL: test_mm512_mask_multishift_epi64_epi8:
110; X64:       # %bb.0: # %entry
111; X64-NEXT:    kmovq %rdi, %k1
112; X64-NEXT:    vpmultishiftqb %zmm2, %zmm1, %zmm0 {%k1}
113; X64-NEXT:    retq
114entry:
115  %0 = bitcast <8 x i64> %__X to <64 x i8>
116  %1 = bitcast <8 x i64> %__Y to <64 x i8>
117  %2 = tail call <64 x i8> @llvm.x86.avx512.pmultishift.qb.512(<64 x i8> %0, <64 x i8> %1)
118  %3 = bitcast <8 x i64> %__W to <64 x i8>
119  %4 = bitcast i64 %__M to <64 x i1>
120  %5 = select <64 x i1> %4, <64 x i8> %2, <64 x i8> %3
121  %6 = bitcast <64 x i8> %5 to <8 x i64>
122  ret <8 x i64> %6
123}
124
125define <8 x i64> @test_mm512_maskz_multishift_epi64_epi8(i64 %__M, <8 x i64> %__X, <8 x i64> %__Y) {
126; X86-LABEL: test_mm512_maskz_multishift_epi64_epi8:
127; X86:       # %bb.0: # %entry
128; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
129; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
130; X86-NEXT:    kunpckdq %k1, %k0, %k1
131; X86-NEXT:    vpmultishiftqb %zmm1, %zmm0, %zmm0 {%k1} {z}
132; X86-NEXT:    retl
133;
134; X64-LABEL: test_mm512_maskz_multishift_epi64_epi8:
135; X64:       # %bb.0: # %entry
136; X64-NEXT:    kmovq %rdi, %k1
137; X64-NEXT:    vpmultishiftqb %zmm1, %zmm0, %zmm0 {%k1} {z}
138; X64-NEXT:    retq
139entry:
140  %0 = bitcast <8 x i64> %__X to <64 x i8>
141  %1 = bitcast <8 x i64> %__Y to <64 x i8>
142  %2 = tail call <64 x i8> @llvm.x86.avx512.pmultishift.qb.512(<64 x i8> %0, <64 x i8> %1)
143  %3 = bitcast i64 %__M to <64 x i1>
144  %4 = select <64 x i1> %3, <64 x i8> %2, <64 x i8> zeroinitializer
145  %5 = bitcast <64 x i8> %4 to <8 x i64>
146  ret <8 x i64> %5
147}
148
149define <8 x i64> @test_mm512_multishift_epi64_epi8(<8 x i64> %__X, <8 x i64> %__Y) {
150; CHECK-LABEL: test_mm512_multishift_epi64_epi8:
151; CHECK:       # %bb.0: # %entry
152; CHECK-NEXT:    vpmultishiftqb %zmm1, %zmm0, %zmm0
153; CHECK-NEXT:    ret{{[l|q]}}
154entry:
155  %0 = bitcast <8 x i64> %__X to <64 x i8>
156  %1 = bitcast <8 x i64> %__Y to <64 x i8>
157  %2 = tail call <64 x i8> @llvm.x86.avx512.pmultishift.qb.512(<64 x i8> %0, <64 x i8> %1)
158  %3 = bitcast <64 x i8> %2 to <8 x i64>
159  ret <8 x i64> %3
160}
161
162declare <64 x i8> @llvm.x86.avx512.pmultishift.qb.512(<64 x i8>, <64 x i8>)
163