1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=KNL
3; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
4; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
5
6define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
7; CHECK-LABEL: test1:
8; CHECK:       ## %bb.0:
9; CHECK-NEXT:    vcmpleps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x02]
10; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
11; CHECK-NEXT:    retq ## encoding: [0xc3]
12  %mask = fcmp ole <16 x float> %x, %y
13  %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
14  ret <16 x float> %max
15}
16
17define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
18; CHECK-LABEL: test2:
19; CHECK:       ## %bb.0:
20; CHECK-NEXT:    vcmplepd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x02]
21; CHECK-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
22; CHECK-NEXT:    retq ## encoding: [0xc3]
23  %mask = fcmp ole <8 x double> %x, %y
24  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
25  ret <8 x double> %max
26}
27
28define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind {
29; CHECK-LABEL: test3:
30; CHECK:       ## %bb.0:
31; CHECK-NEXT:    vpcmpeqd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x0f]
32; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
33; CHECK-NEXT:    retq ## encoding: [0xc3]
34  %y = load <16 x i32>, <16 x i32>* %yp, align 4
35  %mask = icmp eq <16 x i32> %x, %y
36  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
37  ret <16 x i32> %max
38}
39
40define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
41; CHECK-LABEL: test4_unsigned:
42; CHECK:       ## %bb.0:
43; CHECK-NEXT:    vpcmpnltud %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xc9,0x05]
44; CHECK-NEXT:    vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
45; CHECK-NEXT:    retq ## encoding: [0xc3]
46  %mask = icmp uge <16 x i32> %x, %y
47  %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
48  ret <16 x i32> %max
49}
50
51define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
52; CHECK-LABEL: test5:
53; CHECK:       ## %bb.0:
54; CHECK-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc9]
55; CHECK-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
56; CHECK-NEXT:    retq ## encoding: [0xc3]
57  %mask = icmp eq <8 x i64> %x, %y
58  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
59  ret <8 x i64> %max
60}
61
62define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
63; CHECK-LABEL: test6_unsigned:
64; CHECK:       ## %bb.0:
65; CHECK-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x06]
66; CHECK-NEXT:    vpblendmq %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc2]
67; CHECK-NEXT:    retq ## encoding: [0xc3]
68  %mask = icmp ugt <8 x i64> %x, %y
69  %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
70  ret <8 x i64> %max
71}
72
73define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
74; AVX512-LABEL: test7:
75; AVX512:       ## %bb.0:
76; AVX512-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
77; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
78; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
79; AVX512-NEXT:    vcmpltps %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xca,0x01]
80; AVX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
81; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
82; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
83; AVX512-NEXT:    retq ## encoding: [0xc3]
84;
85; SKX-LABEL: test7:
86; SKX:       ## %bb.0:
87; SKX-NEXT:    vxorps %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x57,0xd2]
88; SKX-NEXT:    vcmpltps %xmm2, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xca,0x01]
89; SKX-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
90; SKX-NEXT:    retq ## encoding: [0xc3]
91
92  %mask = fcmp olt <4 x float> %a, zeroinitializer
93  %c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
94  ret <4 x float>%c
95}
96
97define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
98; AVX512-LABEL: test8:
99; AVX512:       ## %bb.0:
100; AVX512-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
101; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
102; AVX512-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x57,0xd2]
103; AVX512-NEXT:    vcmpltpd %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xca,0x01]
104; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
105; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
106; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
107; AVX512-NEXT:    retq ## encoding: [0xc3]
108;
109; SKX-LABEL: test8:
110; SKX:       ## %bb.0:
111; SKX-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x57,0xd2]
112; SKX-NEXT:    vcmpltpd %xmm2, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xca,0x01]
113; SKX-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
114; SKX-NEXT:    retq ## encoding: [0xc3]
115  %mask = fcmp olt <2 x double> %a, zeroinitializer
116  %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
117  ret <2 x double>%c
118}
119
120define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
121; AVX512-LABEL: test9:
122; AVX512:       ## %bb.0:
123; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
124; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
125; AVX512-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc9]
126; AVX512-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
127; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
128; AVX512-NEXT:    retq ## encoding: [0xc3]
129;
130; SKX-LABEL: test9:
131; SKX:       ## %bb.0:
132; SKX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc9]
133; SKX-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x64,0xc0]
134; SKX-NEXT:    retq ## encoding: [0xc3]
135  %mask = icmp eq <8 x i32> %x, %y
136  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
137  ret <8 x i32> %max
138}
139
140define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
141; AVX512-LABEL: test10:
142; AVX512:       ## %bb.0:
143; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
144; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
145; AVX512-NEXT:    vcmpeqps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x00]
146; AVX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
147; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
148; AVX512-NEXT:    retq ## encoding: [0xc3]
149;
150; SKX-LABEL: test10:
151; SKX:       ## %bb.0:
152; SKX-NEXT:    vcmpeqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x00]
153; SKX-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
154; SKX-NEXT:    retq ## encoding: [0xc3]
155
156  %mask = fcmp oeq <8 x float> %x, %y
157  %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
158  ret <8 x float> %max
159}
160
161define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
162; AVX512-LABEL: test11_unsigned:
163; AVX512:       ## %bb.0:
164; AVX512-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
165; AVX512-NEXT:    retq ## encoding: [0xc3]
166;
167; SKX-LABEL: test11_unsigned:
168; SKX:       ## %bb.0:
169; SKX-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
170; SKX-NEXT:    retq ## encoding: [0xc3]
171  %mask = icmp ugt <8 x i32> %x, %y
172  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
173  ret <8 x i32> %max
174}
175
176define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
177; KNL-LABEL: test12:
178; KNL:       ## %bb.0:
179; KNL-NEXT:    vpcmpeqq %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc2]
180; KNL-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x29,0xcb]
181; KNL-NEXT:    kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
182; KNL-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
183; KNL-NEXT:    ## kill: def $ax killed $ax killed $eax
184; KNL-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
185; KNL-NEXT:    retq ## encoding: [0xc3]
186;
187; AVX512BW-LABEL: test12:
188; AVX512BW:       ## %bb.0:
189; AVX512BW-NEXT:    vpcmpeqq %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc2]
190; AVX512BW-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x29,0xcb]
191; AVX512BW-NEXT:    kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
192; AVX512BW-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
193; AVX512BW-NEXT:    ## kill: def $ax killed $ax killed $eax
194; AVX512BW-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
195; AVX512BW-NEXT:    retq ## encoding: [0xc3]
196;
197; SKX-LABEL: test12:
198; SKX:       ## %bb.0:
199; SKX-NEXT:    vpcmpeqq %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc2]
200; SKX-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x29,0xcb]
201; SKX-NEXT:    kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
202; SKX-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
203; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
204; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
205; SKX-NEXT:    retq ## encoding: [0xc3]
206  %res = icmp eq <16 x i64> %a, %b
207  %res1 = bitcast <16 x i1> %res to i16
208  ret i16 %res1
209}
210
211define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
212; KNL-LABEL: test12_v32i32:
213; KNL:       ## %bb.0:
214; KNL-NEXT:    vpcmpeqd %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc2]
215; KNL-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
216; KNL-NEXT:    vpcmpeqd %zmm3, %zmm1, %k0 ## encoding: [0x62,0xf1,0x75,0x48,0x76,0xc3]
217; KNL-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
218; KNL-NEXT:    shll $16, %eax ## encoding: [0xc1,0xe0,0x10]
219; KNL-NEXT:    orl %ecx, %eax ## encoding: [0x09,0xc8]
220; KNL-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
221; KNL-NEXT:    retq ## encoding: [0xc3]
222;
223; AVX512BW-LABEL: test12_v32i32:
224; AVX512BW:       ## %bb.0:
225; AVX512BW-NEXT:    vpcmpeqd %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc2]
226; AVX512BW-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x76,0xcb]
227; AVX512BW-NEXT:    kunpckwd %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x4b,0xc0]
228; AVX512BW-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
229; AVX512BW-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
230; AVX512BW-NEXT:    retq ## encoding: [0xc3]
231;
232; SKX-LABEL: test12_v32i32:
233; SKX:       ## %bb.0:
234; SKX-NEXT:    vpcmpeqd %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc2]
235; SKX-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x76,0xcb]
236; SKX-NEXT:    kunpckwd %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x4b,0xc0]
237; SKX-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
238; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
239; SKX-NEXT:    retq ## encoding: [0xc3]
240  %res = icmp eq <32 x i32> %a, %b
241  %res1 = bitcast <32 x i1> %res to i32
242  ret i32 %res1
243}
244
245define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
246; KNL-LABEL: test12_v64i16:
247; KNL:       ## %bb.0:
248; KNL-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm4 ## encoding: [0xc5,0xfd,0x75,0xe2]
249; KNL-NEXT:    vpmovsxwd %ymm4, %zmm4 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xe4]
250; KNL-NEXT:    vptestmd %zmm4, %zmm4, %k0 ## encoding: [0x62,0xf2,0x5d,0x48,0x27,0xc4]
251; KNL-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
252; KNL-NEXT:    vextracti64x4 $1, %zmm2, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xd2,0x01]
253; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xc0,0x01]
254; KNL-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x75,0xc2]
255; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xc0]
256; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
257; KNL-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
258; KNL-NEXT:    shll $16, %ecx ## encoding: [0xc1,0xe1,0x10]
259; KNL-NEXT:    orl %eax, %ecx ## encoding: [0x09,0xc1]
260; KNL-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm0 ## encoding: [0xc5,0xf5,0x75,0xc3]
261; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xc0]
262; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
263; KNL-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
264; KNL-NEXT:    vextracti64x4 $1, %zmm3, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xd8,0x01]
265; KNL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 ## encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xc9,0x01]
266; KNL-NEXT:    vpcmpeqw %ymm0, %ymm1, %ymm0 ## encoding: [0xc5,0xf5,0x75,0xc0]
267; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xc0]
268; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
269; KNL-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
270; KNL-NEXT:    shll $16, %eax ## encoding: [0xc1,0xe0,0x10]
271; KNL-NEXT:    orl %edx, %eax ## encoding: [0x09,0xd0]
272; KNL-NEXT:    shlq $32, %rax ## encoding: [0x48,0xc1,0xe0,0x20]
273; KNL-NEXT:    orq %rcx, %rax ## encoding: [0x48,0x09,0xc8]
274; KNL-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
275; KNL-NEXT:    retq ## encoding: [0xc3]
276;
277; AVX512BW-LABEL: test12_v64i16:
278; AVX512BW:       ## %bb.0:
279; AVX512BW-NEXT:    vpcmpeqw %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc2]
280; AVX512BW-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x75,0xcb]
281; AVX512BW-NEXT:    kunpckdq %k0, %k1, %k0 ## encoding: [0xc4,0xe1,0xf4,0x4b,0xc0]
282; AVX512BW-NEXT:    kmovq %k0, %rax ## encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
283; AVX512BW-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
284; AVX512BW-NEXT:    retq ## encoding: [0xc3]
285;
286; SKX-LABEL: test12_v64i16:
287; SKX:       ## %bb.0:
288; SKX-NEXT:    vpcmpeqw %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc2]
289; SKX-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x75,0xcb]
290; SKX-NEXT:    kunpckdq %k0, %k1, %k0 ## encoding: [0xc4,0xe1,0xf4,0x4b,0xc0]
291; SKX-NEXT:    kmovq %k0, %rax ## encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
292; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
293; SKX-NEXT:    retq ## encoding: [0xc3]
294  %res = icmp eq <64 x i16> %a, %b
295  %res1 = bitcast <64 x i1> %res to i64
296  ret i64 %res1
297}
298
299define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
300; AVX512-LABEL: test13:
301; AVX512:       ## %bb.0:
302; AVX512-NEXT:    vcmpeqps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x00]
303; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
304; AVX512-NEXT:    vpsrld $31, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xd0,0x1f]
305; AVX512-NEXT:    retq ## encoding: [0xc3]
306;
307; SKX-LABEL: test13:
308; SKX:       ## %bb.0:
309; SKX-NEXT:    vcmpeqps %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc1,0x00]
310; SKX-NEXT:    vpmovm2d %k0, %zmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x38,0xc0]
311; SKX-NEXT:    vpsrld $31, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xd0,0x1f]
312; SKX-NEXT:    retq ## encoding: [0xc3]
313{
314  %cmpvector_i = fcmp oeq <16 x float> %a, %b
315  %conv = zext <16 x i1> %cmpvector_i to <16 x i32>
316  ret <16 x i32> %conv
317}
318
319define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
320; CHECK-LABEL: test14:
321; CHECK:       ## %bb.0:
322; CHECK-NEXT:    vpsubd %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc9]
323; CHECK-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x66,0xc8]
324; CHECK-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xc1]
325; CHECK-NEXT:    retq ## encoding: [0xc3]
326  %sub_r = sub <16 x i32> %a, %b
327  %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
328  %sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
329  %mask = icmp eq <16 x i32> %sext.i3.i, zeroinitializer
330  %res = select <16 x i1> %mask, <16 x i32> zeroinitializer, <16 x i32> %sub_r
331  ret <16 x i32>%res
332}
333
334define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
335; CHECK-LABEL: test15:
336; CHECK:       ## %bb.0:
337; CHECK-NEXT:    vpsubq %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc9]
338; CHECK-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc8]
339; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xc1]
340; CHECK-NEXT:    retq ## encoding: [0xc3]
341  %sub_r = sub <8 x i64> %a, %b
342  %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
343  %sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
344  %mask = icmp eq <8 x i64> %sext.i3.i, zeroinitializer
345  %res = select <8 x i1> %mask, <8 x i64> zeroinitializer, <8 x i64> %sub_r
346  ret <8 x i64>%res
347}
348
349define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
350; CHECK-LABEL: test16:
351; CHECK:       ## %bb.0:
352; CHECK-NEXT:    vpcmpnltd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x05]
353; CHECK-NEXT:    vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
354; CHECK-NEXT:    retq ## encoding: [0xc3]
355  %mask = icmp sge <16 x i32> %x, %y
356  %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
357  ret <16 x i32> %max
358}
359
360define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
361; CHECK-LABEL: test17:
362; CHECK:       ## %bb.0:
363; CHECK-NEXT:    vpcmpgtd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0x0f]
364; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
365; CHECK-NEXT:    retq ## encoding: [0xc3]
366  %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
367  %mask = icmp sgt <16 x i32> %x, %y
368  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
369  ret <16 x i32> %max
370}
371
372define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
373; CHECK-LABEL: test18:
374; CHECK:       ## %bb.0:
375; CHECK-NEXT:    vpcmpled (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0x0f,0x02]
376; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
377; CHECK-NEXT:    retq ## encoding: [0xc3]
378  %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
379  %mask = icmp sle <16 x i32> %x, %y
380  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
381  ret <16 x i32> %max
382}
383
384define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
385; CHECK-LABEL: test19:
386; CHECK:       ## %bb.0:
387; CHECK-NEXT:    vpcmpleud (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0x0f,0x02]
388; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
389; CHECK-NEXT:    retq ## encoding: [0xc3]
390  %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
391  %mask = icmp ule <16 x i32> %x, %y
392  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
393  ret <16 x i32> %max
394}
395
396define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
397; CHECK-LABEL: test20:
398; CHECK:       ## %bb.0:
399; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc9]
400; CHECK-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf1,0x6d,0x49,0x76,0xcb]
401; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
402; CHECK-NEXT:    retq ## encoding: [0xc3]
403  %mask1 = icmp eq <16 x i32> %x1, %y1
404  %mask0 = icmp eq <16 x i32> %x, %y
405  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
406  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
407  ret <16 x i32> %max
408}
409
410define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
411; CHECK-LABEL: test21:
412; CHECK:       ## %bb.0:
413; CHECK-NEXT:    vpcmpleq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x02]
414; CHECK-NEXT:    vpcmpnltq %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf3,0xed,0x49,0x1f,0xcb,0x05]
415; CHECK-NEXT:    vpblendmq %zmm0, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x64,0xc0]
416; CHECK-NEXT:    retq ## encoding: [0xc3]
417  %mask1 = icmp sge <8 x i64> %x1, %y1
418  %mask0 = icmp sle <8 x i64> %x, %y
419  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
420  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
421  ret <8 x i64> %max
422}
423
424define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
425; CHECK-LABEL: test22:
426; CHECK:       ## %bb.0:
427; CHECK-NEXT:    vpcmpgtq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xca]
428; CHECK-NEXT:    vpcmpgtq (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0x0f]
429; CHECK-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
430; CHECK-NEXT:    retq ## encoding: [0xc3]
431  %mask1 = icmp sgt <8 x i64> %x1, %y1
432  %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
433  %mask0 = icmp sgt <8 x i64> %x, %y
434  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
435  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
436  ret <8 x i64> %max
437}
438
439define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
440; CHECK-LABEL: test23:
441; CHECK:       ## %bb.0:
442; CHECK-NEXT:    vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
443; CHECK-NEXT:    vpcmpleud (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0x0f,0x02]
444; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
445; CHECK-NEXT:    retq ## encoding: [0xc3]
446  %mask1 = icmp sge <16 x i32> %x1, %y1
447  %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
448  %mask0 = icmp ule <16 x i32> %x, %y
449  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
450  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
451  ret <16 x i32> %max
452}
453
454define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
455; CHECK-LABEL: test24:
456; CHECK:       ## %bb.0:
457; CHECK-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x58,0x29,0x0f]
458; CHECK-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
459; CHECK-NEXT:    retq ## encoding: [0xc3]
460  %yb = load i64, i64* %yb.ptr, align 4
461  %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
462  %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
463  %mask = icmp eq <8 x i64> %x, %y
464  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
465  ret <8 x i64> %max
466}
467
468define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind {
469; CHECK-LABEL: test25:
470; CHECK:       ## %bb.0:
471; CHECK-NEXT:    vpcmpled (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x58,0x1f,0x0f,0x02]
472; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
473; CHECK-NEXT:    retq ## encoding: [0xc3]
474  %yb = load i32, i32* %yb.ptr, align 4
475  %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
476  %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
477  %mask = icmp sle <16 x i32> %x, %y
478  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
479  ret <16 x i32> %max
480}
481
482define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
483; CHECK-LABEL: test26:
484; CHECK:       ## %bb.0:
485; CHECK-NEXT:    vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
486; CHECK-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x66,0x0f]
487; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
488; CHECK-NEXT:    retq ## encoding: [0xc3]
489  %mask1 = icmp sge <16 x i32> %x1, %y1
490  %yb = load i32, i32* %yb.ptr, align 4
491  %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
492  %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
493  %mask0 = icmp sgt <16 x i32> %x, %y
494  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
495  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
496  ret <16 x i32> %max
497}
498
499define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
500; CHECK-LABEL: test27:
501; CHECK:       ## %bb.0:
502; CHECK-NEXT:    vpcmpnltq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x05]
503; CHECK-NEXT:    vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x59,0x1f,0x0f,0x02]
504; CHECK-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
505; CHECK-NEXT:    retq ## encoding: [0xc3]
506  %mask1 = icmp sge <8 x i64> %x1, %y1
507  %yb = load i64, i64* %yb.ptr, align 4
508  %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
509  %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
510  %mask0 = icmp sle <8 x i64> %x, %y
511  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
512  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
513  ret <8 x i64> %max
514}
515
516define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) {
517; AVX512-LABEL: test28:
518; AVX512:       ## %bb.0:
519; AVX512-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1]
520; AVX512-NEXT:    vpcmpgtq %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x48,0x37,0xcb]
521; AVX512-NEXT:    kxnorw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc9]
522; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
523; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
524; AVX512-NEXT:    retq ## encoding: [0xc3]
525;
526; SKX-LABEL: test28:
527; SKX:       ## %bb.0:
528; SKX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1]
529; SKX-NEXT:    vpcmpgtq %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x48,0x37,0xcb]
530; SKX-NEXT:    kxnorb %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x46,0xc1]
531; SKX-NEXT:    vpmovm2d %k0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0]
532; SKX-NEXT:    retq ## encoding: [0xc3]
533  %x_gt_y = icmp sgt <8 x i64> %x, %y
534  %x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1
535  %res = icmp eq <8 x i1>%x_gt_y, %x1_gt_y1
536  %resse = sext <8 x i1>%res to <8 x i32>
537  ret <8 x i32> %resse
538}
539
540define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) {
541; KNL-LABEL: test29:
542; KNL:       ## %bb.0:
543; KNL-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
544; KNL-NEXT:    vpcmpgtd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6d,0x48,0x66,0xcb]
545; KNL-NEXT:    kxorw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x47,0xc9]
546; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
547; KNL-NEXT:    vpmovdb %zmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
548; KNL-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
549; KNL-NEXT:    retq ## encoding: [0xc3]
550;
551; AVX512BW-LABEL: test29:
552; AVX512BW:       ## %bb.0:
553; AVX512BW-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
554; AVX512BW-NEXT:    vpcmpgtd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6d,0x48,0x66,0xcb]
555; AVX512BW-NEXT:    kxorw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x47,0xc1]
556; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0]
557; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
558; AVX512BW-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
559; AVX512BW-NEXT:    retq ## encoding: [0xc3]
560;
561; SKX-LABEL: test29:
562; SKX:       ## %bb.0:
563; SKX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
564; SKX-NEXT:    vpcmpgtd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6d,0x48,0x66,0xcb]
565; SKX-NEXT:    kxorw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x47,0xc1]
566; SKX-NEXT:    vpmovm2b %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
567; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
568; SKX-NEXT:    retq ## encoding: [0xc3]
569  %x_gt_y = icmp sgt <16 x i32> %x, %y
570  %x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1
571  %res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1
572  %resse = sext <16 x i1>%res to <16 x i8>
573  ret <16 x i8> %resse
574}
575
576define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
577; AVX512-LABEL: test30:
578; AVX512:       ## %bb.0:
579; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
580; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
581; AVX512-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x00]
582; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
583; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
584; AVX512-NEXT:    retq ## encoding: [0xc3]
585;
586; SKX-LABEL: test30:
587; SKX:       ## %bb.0:
588; SKX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc9,0x00]
589; SKX-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
590; SKX-NEXT:    retq ## encoding: [0xc3]
591
592  %mask = fcmp oeq <4 x double> %x, %y
593  %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
594  ret <4 x double> %max
595}
596
597define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind {
598; AVX512-LABEL: test31:
599; AVX512:       ## %bb.0:
600; AVX512-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
601; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
602; AVX512-NEXT:    vmovupd (%rdi), %xmm2 ## encoding: [0xc5,0xf9,0x10,0x17]
603; AVX512-NEXT:    vcmpltpd %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xca,0x01]
604; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
605; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
606; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
607; AVX512-NEXT:    retq ## encoding: [0xc3]
608;
609; SKX-LABEL: test31:
610; SKX:       ## %bb.0:
611; SKX-NEXT:    vcmpltpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x01]
612; SKX-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
613; SKX-NEXT:    retq ## encoding: [0xc3]
614
615  %y = load <2 x double>, <2 x double>* %yp, align 4
616  %mask = fcmp olt <2 x double> %x, %y
617  %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
618  ret <2 x double> %max
619}
620
621define <2 x double> @test31_commute(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind {
622; AVX512-LABEL: test31_commute:
623; AVX512:       ## %bb.0:
624; AVX512-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
625; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
626; AVX512-NEXT:    vmovupd (%rdi), %xmm2 ## encoding: [0xc5,0xf9,0x10,0x17]
627; AVX512-NEXT:    vcmpltpd %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc8,0x01]
628; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
629; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
630; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
631; AVX512-NEXT:    retq ## encoding: [0xc3]
632;
633; SKX-LABEL: test31_commute:
634; SKX:       ## %bb.0:
635; SKX-NEXT:    vcmpgtpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x0e]
636; SKX-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
637; SKX-NEXT:    retq ## encoding: [0xc3]
638
639  %y = load <2 x double>, <2 x double>* %yp, align 4
640  %mask = fcmp olt <2 x double> %y, %x
641  %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
642  ret <2 x double> %max
643}
644
645define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind {
646; AVX512-LABEL: test32:
647; AVX512:       ## %bb.0:
648; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
649; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
650; AVX512-NEXT:    vmovupd (%rdi), %ymm2 ## encoding: [0xc5,0xfd,0x10,0x17]
651; AVX512-NEXT:    vcmpltpd %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xca,0x01]
652; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
653; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
654; AVX512-NEXT:    retq ## encoding: [0xc3]
655;
656; SKX-LABEL: test32:
657; SKX:       ## %bb.0:
658; SKX-NEXT:    vcmpltpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x01]
659; SKX-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
660; SKX-NEXT:    retq ## encoding: [0xc3]
661
662  %y = load <4 x double>, <4 x double>* %yp, align 4
663  %mask = fcmp ogt <4 x double> %y, %x
664  %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
665  ret <4 x double> %max
666}
667
668define <4 x double> @test32_commute(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind {
669; AVX512-LABEL: test32_commute:
670; AVX512:       ## %bb.0:
671; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
672; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
673; AVX512-NEXT:    vmovupd (%rdi), %ymm2 ## encoding: [0xc5,0xfd,0x10,0x17]
674; AVX512-NEXT:    vcmpltpd %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc8,0x01]
675; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
676; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
677; AVX512-NEXT:    retq ## encoding: [0xc3]
678;
679; SKX-LABEL: test32_commute:
680; SKX:       ## %bb.0:
681; SKX-NEXT:    vcmpgtpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x0e]
682; SKX-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
683; SKX-NEXT:    retq ## encoding: [0xc3]
684
685  %y = load <4 x double>, <4 x double>* %yp, align 4
686  %mask = fcmp ogt <4 x double> %x, %y
687  %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
688  ret <4 x double> %max
689}
690
691define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind {
692; CHECK-LABEL: test33:
693; CHECK:       ## %bb.0:
694; CHECK-NEXT:    vcmpltpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x01]
695; CHECK-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
696; CHECK-NEXT:    retq ## encoding: [0xc3]
697  %y = load <8 x double>, <8 x double>* %yp, align 4
698  %mask = fcmp olt <8 x double> %x, %y
699  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
700  ret <8 x double> %max
701}
702
703define <8 x double> @test33_commute(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind {
704; CHECK-LABEL: test33_commute:
705; CHECK:       ## %bb.0:
706; CHECK-NEXT:    vcmpgtpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x0e]
707; CHECK-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
708; CHECK-NEXT:    retq ## encoding: [0xc3]
709  %y = load <8 x double>, <8 x double>* %yp, align 4
710  %mask = fcmp olt <8 x double> %y, %x
711  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
712  ret <8 x double> %max
713}
714
715define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind {
716; AVX512-LABEL: test34:
717; AVX512:       ## %bb.0:
718; AVX512-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
719; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
720; AVX512-NEXT:    vmovups (%rdi), %xmm2 ## encoding: [0xc5,0xf8,0x10,0x17]
721; AVX512-NEXT:    vcmpltps %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xca,0x01]
722; AVX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
723; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
724; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
725; AVX512-NEXT:    retq ## encoding: [0xc3]
726;
727; SKX-LABEL: test34:
728; SKX:       ## %bb.0:
729; SKX-NEXT:    vcmpltps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x01]
730; SKX-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
731; SKX-NEXT:    retq ## encoding: [0xc3]
732  %y = load <4 x float>, <4 x float>* %yp, align 4
733  %mask = fcmp olt <4 x float> %x, %y
734  %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
735  ret <4 x float> %max
736}
737
738define <4 x float> @test34_commute(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind {
739; AVX512-LABEL: test34_commute:
740; AVX512:       ## %bb.0:
741; AVX512-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
742; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
743; AVX512-NEXT:    vmovups (%rdi), %xmm2 ## encoding: [0xc5,0xf8,0x10,0x17]
744; AVX512-NEXT:    vcmpltps %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xc8,0x01]
745; AVX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
746; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
747; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
748; AVX512-NEXT:    retq ## encoding: [0xc3]
749;
750; SKX-LABEL: test34_commute:
751; SKX:       ## %bb.0:
752; SKX-NEXT:    vcmpgtps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x0e]
753; SKX-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
754; SKX-NEXT:    retq ## encoding: [0xc3]
755  %y = load <4 x float>, <4 x float>* %yp, align 4
756  %mask = fcmp olt <4 x float> %y, %x
757  %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
758  ret <4 x float> %max
759}
760
761define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind {
762; AVX512-LABEL: test35:
763; AVX512:       ## %bb.0:
764; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
765; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
766; AVX512-NEXT:    vmovups (%rdi), %ymm2 ## encoding: [0xc5,0xfc,0x10,0x17]
767; AVX512-NEXT:    vcmpltps %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xca,0x01]
768; AVX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
769; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
770; AVX512-NEXT:    retq ## encoding: [0xc3]
771;
772; SKX-LABEL: test35:
773; SKX:       ## %bb.0:
774; SKX-NEXT:    vcmpltps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x01]
775; SKX-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
776; SKX-NEXT:    retq ## encoding: [0xc3]
777
778  %y = load <8 x float>, <8 x float>* %yp, align 4
779  %mask = fcmp ogt <8 x float> %y, %x
780  %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
781  ret <8 x float> %max
782}
783
784define <8 x float> @test35_commute(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind {
785; AVX512-LABEL: test35_commute:
786; AVX512:       ## %bb.0:
787; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
788; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
789; AVX512-NEXT:    vmovups (%rdi), %ymm2 ## encoding: [0xc5,0xfc,0x10,0x17]
790; AVX512-NEXT:    vcmpltps %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xc8,0x01]
791; AVX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
792; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
793; AVX512-NEXT:    retq ## encoding: [0xc3]
794;
795; SKX-LABEL: test35_commute:
796; SKX:       ## %bb.0:
797; SKX-NEXT:    vcmpgtps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x0e]
798; SKX-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
799; SKX-NEXT:    retq ## encoding: [0xc3]
800
801  %y = load <8 x float>, <8 x float>* %yp, align 4
802  %mask = fcmp ogt <8 x float> %x, %y
803  %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
804  ret <8 x float> %max
805}
806
807define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind {
808; CHECK-LABEL: test36:
809; CHECK:       ## %bb.0:
810; CHECK-NEXT:    vcmpltps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x01]
811; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
812; CHECK-NEXT:    retq ## encoding: [0xc3]
813  %y = load <16 x float>, <16 x float>* %yp, align 4
814  %mask = fcmp olt <16 x float> %x, %y
815  %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
816  ret <16 x float> %max
817}
818
819define <16 x float> @test36_commute(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind {
820; CHECK-LABEL: test36_commute:
821; CHECK:       ## %bb.0:
822; CHECK-NEXT:    vcmpgtps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x0e]
823; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
824; CHECK-NEXT:    retq ## encoding: [0xc3]
825  %y = load <16 x float>, <16 x float>* %yp, align 4
826  %mask = fcmp olt <16 x float> %y, %x
827  %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
828  ret <16 x float> %max
829}
830
831define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind {
832; CHECK-LABEL: test37:
833; CHECK:       ## %bb.0:
834; CHECK-NEXT:    vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
835; CHECK-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
836; CHECK-NEXT:    retq ## encoding: [0xc3]
837
838  %a = load double, double* %ptr
839  %v = insertelement <8 x double> undef, double %a, i32 0
840  %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
841
842  %mask = fcmp ogt <8 x double> %shuffle, %x
843  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
844  ret <8 x double> %max
845}
846
847define <8 x double> @test37_commute(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind {
848; CHECK-LABEL: test37_commute:
849; CHECK:       ## %bb.0:
850; CHECK-NEXT:    vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
851; CHECK-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
852; CHECK-NEXT:    retq ## encoding: [0xc3]
853
854  %a = load double, double* %ptr
855  %v = insertelement <8 x double> undef, double %a, i32 0
856  %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
857
858  %mask = fcmp ogt <8 x double> %x, %shuffle
859  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
860  ret <8 x double> %max
861}
862
863define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind {
864; AVX512-LABEL: test38:
865; AVX512:       ## %bb.0:
866; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
867; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
868; AVX512-NEXT:    vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
869; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
870; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
871; AVX512-NEXT:    retq ## encoding: [0xc3]
872;
873; SKX-LABEL: test38:
874; SKX:       ## %bb.0:
875; SKX-NEXT:    vcmpltpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x01]
876; SKX-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
877; SKX-NEXT:    retq ## encoding: [0xc3]
878
879  %a = load double, double* %ptr
880  %v = insertelement <4 x double> undef, double %a, i32 0
881  %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer
882
883  %mask = fcmp ogt <4 x double> %shuffle, %x
884  %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
885  ret <4 x double> %max
886}
887
888define <4 x double> @test38_commute(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind {
889; AVX512-LABEL: test38_commute:
890; AVX512:       ## %bb.0:
891; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
892; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
893; AVX512-NEXT:    vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
894; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
895; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
896; AVX512-NEXT:    retq ## encoding: [0xc3]
897;
898; SKX-LABEL: test38_commute:
899; SKX:       ## %bb.0:
900; SKX-NEXT:    vcmpgtpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x0e]
901; SKX-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
902; SKX-NEXT:    retq ## encoding: [0xc3]
903
904  %a = load double, double* %ptr
905  %v = insertelement <4 x double> undef, double %a, i32 0
906  %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer
907
908  %mask = fcmp ogt <4 x double> %x, %shuffle
909  %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
910  ret <4 x double> %max
911}
912
913define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind {
914; AVX512-LABEL: test39:
915; AVX512:       ## %bb.0:
916; AVX512-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
917; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
918; AVX512-NEXT:    vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
919; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
920; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
921; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
922; AVX512-NEXT:    retq ## encoding: [0xc3]
923;
924; SKX-LABEL: test39:
925; SKX:       ## %bb.0:
926; SKX-NEXT:    vcmpltpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x01]
927; SKX-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
928; SKX-NEXT:    retq ## encoding: [0xc3]
929
930  %a = load double, double* %ptr
931  %v = insertelement <2 x double> undef, double %a, i32 0
932  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
933
934  %mask = fcmp ogt <2 x double> %shuffle, %x
935  %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
936  ret <2 x double> %max
937}
938
939define <2 x double> @test39_commute(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind {
940; AVX512-LABEL: test39_commute:
941; AVX512:       ## %bb.0:
942; AVX512-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
943; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
944; AVX512-NEXT:    vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
945; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
946; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
947; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
948; AVX512-NEXT:    retq ## encoding: [0xc3]
949;
950; SKX-LABEL: test39_commute:
951; SKX:       ## %bb.0:
952; SKX-NEXT:    vcmpgtpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x0e]
953; SKX-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
954; SKX-NEXT:    retq ## encoding: [0xc3]
955
956  %a = load double, double* %ptr
957  %v = insertelement <2 x double> undef, double %a, i32 0
958  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
959
960  %mask = fcmp ogt <2 x double> %x, %shuffle
961  %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
962  ret <2 x double> %max
963}
964
965
966define <16  x float> @test40(<16  x float> %x, <16  x float> %x1, float* %ptr) nounwind {
967; CHECK-LABEL: test40:
968; CHECK:       ## %bb.0:
969; CHECK-NEXT:    vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
970; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
971; CHECK-NEXT:    retq ## encoding: [0xc3]
972
973  %a = load float, float* %ptr
974  %v = insertelement <16  x float> undef, float %a, i32 0
975  %shuffle = shufflevector <16  x float> %v, <16  x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
976
977  %mask = fcmp ogt <16  x float> %shuffle, %x
978  %max = select <16 x i1> %mask, <16  x float> %x, <16  x float> %x1
979  ret <16  x float> %max
980}
981
982define <16  x float> @test40_commute(<16  x float> %x, <16  x float> %x1, float* %ptr) nounwind {
983; CHECK-LABEL: test40_commute:
984; CHECK:       ## %bb.0:
985; CHECK-NEXT:    vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
986; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
987; CHECK-NEXT:    retq ## encoding: [0xc3]
988
989  %a = load float, float* %ptr
990  %v = insertelement <16  x float> undef, float %a, i32 0
991  %shuffle = shufflevector <16  x float> %v, <16  x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
992
993  %mask = fcmp ogt <16  x float> %x, %shuffle
994  %max = select <16 x i1> %mask, <16  x float> %x, <16  x float> %x1
995  ret <16  x float> %max
996}
997
998define <8  x float> @test41(<8  x float> %x, <8  x float> %x1, float* %ptr) nounwind {
999; AVX512-LABEL: test41:
1000; AVX512:       ## %bb.0:
1001; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
1002; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
1003; AVX512-NEXT:    vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
1004; AVX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
1005; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
1006; AVX512-NEXT:    retq ## encoding: [0xc3]
1007;
1008; SKX-LABEL: test41:
1009; SKX:       ## %bb.0:
1010; SKX-NEXT:    vcmpltps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x01]
1011; SKX-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
1012; SKX-NEXT:    retq ## encoding: [0xc3]
1013
1014  %a = load float, float* %ptr
1015  %v = insertelement <8  x float> undef, float %a, i32 0
1016  %shuffle = shufflevector <8  x float> %v, <8  x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1017
1018  %mask = fcmp ogt <8  x float> %shuffle, %x
1019  %max = select <8 x i1> %mask, <8  x float> %x, <8  x float> %x1
1020  ret <8  x float> %max
1021}
1022
1023define <8  x float> @test41_commute(<8  x float> %x, <8  x float> %x1, float* %ptr) nounwind {
1024; AVX512-LABEL: test41_commute:
1025; AVX512:       ## %bb.0:
1026; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
1027; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
1028; AVX512-NEXT:    vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
1029; AVX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
1030; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
1031; AVX512-NEXT:    retq ## encoding: [0xc3]
1032;
1033; SKX-LABEL: test41_commute:
1034; SKX:       ## %bb.0:
1035; SKX-NEXT:    vcmpgtps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x0e]
1036; SKX-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
1037; SKX-NEXT:    retq ## encoding: [0xc3]
1038
1039  %a = load float, float* %ptr
1040  %v = insertelement <8  x float> undef, float %a, i32 0
1041  %shuffle = shufflevector <8  x float> %v, <8  x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1042
1043  %mask = fcmp ogt <8  x float> %x, %shuffle
1044  %max = select <8 x i1> %mask, <8  x float> %x, <8  x float> %x1
1045  ret <8  x float> %max
1046}
1047
1048define <4  x float> @test42(<4  x float> %x, <4  x float> %x1, float* %ptr) nounwind {
1049; AVX512-LABEL: test42:
1050; AVX512:       ## %bb.0:
1051; AVX512-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
1052; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
1053; AVX512-NEXT:    vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
1054; AVX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
1055; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
1056; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1057; AVX512-NEXT:    retq ## encoding: [0xc3]
1058;
1059; SKX-LABEL: test42:
1060; SKX:       ## %bb.0:
1061; SKX-NEXT:    vcmpltps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x01]
1062; SKX-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
1063; SKX-NEXT:    retq ## encoding: [0xc3]
1064
1065  %a = load float, float* %ptr
1066  %v = insertelement <4  x float> undef, float %a, i32 0
1067  %shuffle = shufflevector <4  x float> %v, <4  x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1068
1069  %mask = fcmp ogt <4  x float> %shuffle, %x
1070  %max = select <4 x i1> %mask, <4  x float> %x, <4  x float> %x1
1071  ret <4  x float> %max
1072}
1073
1074define <4  x float> @test42_commute(<4  x float> %x, <4  x float> %x1, float* %ptr) nounwind {
1075; AVX512-LABEL: test42_commute:
1076; AVX512:       ## %bb.0:
1077; AVX512-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
1078; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
1079; AVX512-NEXT:    vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
1080; AVX512-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
1081; AVX512-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
1082; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1083; AVX512-NEXT:    retq ## encoding: [0xc3]
1084;
1085; SKX-LABEL: test42_commute:
1086; SKX:       ## %bb.0:
1087; SKX-NEXT:    vcmpgtps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x0e]
1088; SKX-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
1089; SKX-NEXT:    retq ## encoding: [0xc3]
1090
1091  %a = load float, float* %ptr
1092  %v = insertelement <4  x float> undef, float %a, i32 0
1093  %shuffle = shufflevector <4  x float> %v, <4  x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1094
1095  %mask = fcmp ogt <4  x float> %x, %shuffle
1096  %max = select <4 x i1> %mask, <4  x float> %x, <4  x float> %x1
1097  ret <4  x float> %max
1098}
1099
1100define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind {
1101; KNL-LABEL: test43:
1102; KNL:       ## %bb.0:
1103; KNL-NEXT:    vpmovsxwq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x24,0xd2]
1104; KNL-NEXT:    vpsllq $63, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xf2,0x3f]
1105; KNL-NEXT:    vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
1106; KNL-NEXT:    vptestmq %zmm2, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x27,0xca]
1107; KNL-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
1108; KNL-NEXT:    retq ## encoding: [0xc3]
1109;
1110; AVX512BW-LABEL: test43:
1111; AVX512BW:       ## %bb.0:
1112; AVX512BW-NEXT:    vpsllw $15, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x71,0xf2,0x0f]
1113; AVX512BW-NEXT:    vpmovw2m %zmm2, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x29,0xca]
1114; AVX512BW-NEXT:    vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x01]
1115; AVX512BW-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
1116; AVX512BW-NEXT:    retq ## encoding: [0xc3]
1117;
1118; SKX-LABEL: test43:
1119; SKX:       ## %bb.0:
1120; SKX-NEXT:    vpsllw $15, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf2,0x0f]
1121; SKX-NEXT:    vpmovw2m %xmm2, %k1 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xca]
1122; SKX-NEXT:    vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x01]
1123; SKX-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
1124; SKX-NEXT:    retq ## encoding: [0xc3]
1125
1126  %a = load double, double* %ptr
1127  %v = insertelement <8 x double> undef, double %a, i32 0
1128  %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
1129
1130  %mask_cmp = fcmp ogt <8 x double> %shuffle, %x
1131  %mask = and <8 x i1> %mask_cmp, %mask_in
1132  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
1133  ret <8 x double> %max
1134}
1135
1136define <8 x double> @test43_commute(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind {
1137; KNL-LABEL: test43_commute:
1138; KNL:       ## %bb.0:
1139; KNL-NEXT:    vpmovsxwq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x24,0xd2]
1140; KNL-NEXT:    vpsllq $63, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xf2,0x3f]
1141; KNL-NEXT:    vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
1142; KNL-NEXT:    vptestmq %zmm2, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x27,0xca]
1143; KNL-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
1144; KNL-NEXT:    retq ## encoding: [0xc3]
1145;
1146; AVX512BW-LABEL: test43_commute:
1147; AVX512BW:       ## %bb.0:
1148; AVX512BW-NEXT:    vpsllw $15, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x71,0xf2,0x0f]
1149; AVX512BW-NEXT:    vpmovw2m %zmm2, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x29,0xca]
1150; AVX512BW-NEXT:    vcmpgtpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x0e]
1151; AVX512BW-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
1152; AVX512BW-NEXT:    retq ## encoding: [0xc3]
1153;
1154; SKX-LABEL: test43_commute:
1155; SKX:       ## %bb.0:
1156; SKX-NEXT:    vpsllw $15, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf2,0x0f]
1157; SKX-NEXT:    vpmovw2m %xmm2, %k1 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xca]
1158; SKX-NEXT:    vcmpgtpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x0e]
1159; SKX-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
1160; SKX-NEXT:    retq ## encoding: [0xc3]
1161
1162  %a = load double, double* %ptr
1163  %v = insertelement <8 x double> undef, double %a, i32 0
1164  %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
1165
1166  %mask_cmp = fcmp ogt <8 x double> %x, %shuffle
1167  %mask = and <8 x i1> %mask_cmp, %mask_in
1168  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
1169  ret <8 x double> %max
1170}
1171
1172define <4 x i32> @test44(<4 x i16> %x, <4 x i16> %y) #0 {
1173; AVX512-LABEL: test44:
1174; AVX512:       ## %bb.0:
1175; AVX512-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x75,0xc1]
1176; AVX512-NEXT:    vpmovsxwd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x23,0xc0]
1177; AVX512-NEXT:    retq ## encoding: [0xc3]
1178;
1179; SKX-LABEL: test44:
1180; SKX:       ## %bb.0:
1181; SKX-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
1182; SKX-NEXT:    vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
1183; SKX-NEXT:    retq ## encoding: [0xc3]
1184  %mask = icmp eq <4 x i16> %x, %y
1185  %1 = sext <4 x i1> %mask to <4 x i32>
1186  ret <4 x i32> %1
1187}
1188
1189define <2 x i64> @test45(<2 x i16> %x, <2 x i16> %y) #0 {
1190; AVX512-LABEL: test45:
1191; AVX512:       ## %bb.0:
1192; AVX512-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x75,0xc1]
1193; AVX512-NEXT:    vpmovzxwq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x34,0xc0]
1194; AVX512-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1195; AVX512-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]
1196; AVX512-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1197; AVX512-NEXT:    retq ## encoding: [0xc3]
1198;
1199; SKX-LABEL: test45:
1200; SKX:       ## %bb.0:
1201; SKX-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
1202; SKX-NEXT:    vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
1203; SKX-NEXT:    vpsrlq $63, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
1204; SKX-NEXT:    retq ## encoding: [0xc3]
1205  %mask = icmp eq <2 x i16> %x, %y
1206  %1 = zext <2 x i1> %mask to <2 x i64>
1207  ret <2 x i64> %1
1208}
1209
1210define <2 x i64> @test46(<2 x float> %x, <2 x float> %y) #0 {
1211; AVX512-LABEL: test46:
1212; AVX512:       ## %bb.0:
1213; AVX512-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc2,0xc1,0x00]
1214; AVX512-NEXT:    vpermilps $212, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xd4]
1215; AVX512-NEXT:    ## xmm0 = xmm0[0,1,1,3]
1216; AVX512-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x54,0x05,A,A,A,A]
1217; AVX512-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1218; AVX512-NEXT:    retq ## encoding: [0xc3]
1219;
1220; SKX-LABEL: test46:
1221; SKX:       ## %bb.0:
1222; SKX-NEXT:    vcmpeqps %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x00]
1223; SKX-NEXT:    vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
1224; SKX-NEXT:    vpsrlq $63, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
1225; SKX-NEXT:    retq ## encoding: [0xc3]
1226  %mask = fcmp oeq <2 x float> %x, %y
1227  %1 = zext <2 x i1> %mask to <2 x i64>
1228  ret <2 x i64> %1
1229}
1230
1231define <16 x i8> @test47(<16 x i32> %a, <16 x i8> %b, <16 x i8> %c) {
1232; KNL-LABEL: test47:
1233; KNL:       ## %bb.0:
1234; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
1235; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
1236; KNL-NEXT:    vpmovdb %zmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
1237; KNL-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 ## encoding: [0xc4,0xe3,0x69,0x4c,0xc1,0x00]
1238; KNL-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1239; KNL-NEXT:    retq ## encoding: [0xc3]
1240;
1241; AVX512BW-LABEL: test47:
1242; AVX512BW:       ## %bb.0:
1243; AVX512BW-NEXT:    ## kill: def $xmm2 killed $xmm2 def $zmm2
1244; AVX512BW-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
1245; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
1246; AVX512BW-NEXT:    vpblendmb %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x66,0xc1]
1247; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
1248; AVX512BW-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1249; AVX512BW-NEXT:    retq ## encoding: [0xc3]
1250;
1251; SKX-LABEL: test47:
1252; SKX:       ## %bb.0:
1253; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
1254; SKX-NEXT:    vpblendmb %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x66,0xc1]
1255; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1256; SKX-NEXT:    retq ## encoding: [0xc3]
1257  %cmp = icmp eq <16 x i32> %a, zeroinitializer
1258  %res = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %c
1259  ret <16 x i8> %res
1260}
1261
1262define <16 x i16> @test48(<16 x i32> %a, <16 x i16> %b, <16 x i16> %c) {
1263; KNL-LABEL: test48:
1264; KNL:       ## %bb.0:
1265; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
1266; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
1267; KNL-NEXT:    vpmovdw %zmm0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x33,0xc0]
1268; KNL-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 ## encoding: [0xc4,0xe3,0x6d,0x4c,0xc1,0x00]
1269; KNL-NEXT:    retq ## encoding: [0xc3]
1270;
1271; AVX512BW-LABEL: test48:
1272; AVX512BW:       ## %bb.0:
1273; AVX512BW-NEXT:    ## kill: def $ymm2 killed $ymm2 def $zmm2
1274; AVX512BW-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
1275; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
1276; AVX512BW-NEXT:    vpblendmw %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x66,0xc1]
1277; AVX512BW-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
1278; AVX512BW-NEXT:    retq ## encoding: [0xc3]
1279;
1280; SKX-LABEL: test48:
1281; SKX:       ## %bb.0:
1282; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
1283; SKX-NEXT:    vpblendmw %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x66,0xc1]
1284; SKX-NEXT:    retq ## encoding: [0xc3]
1285  %cmp = icmp eq <16 x i32> %a, zeroinitializer
1286  %res = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %c
1287  ret <16 x i16> %res
1288}
1289
1290define <8 x i16> @test49(<8 x i64> %a, <8 x i16> %b, <8 x i16> %c) {
1291; KNL-LABEL: test49:
1292; KNL:       ## %bb.0:
1293; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc8]
1294; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
1295; KNL-NEXT:    vpmovdw %zmm0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x33,0xc0]
1296; KNL-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 ## encoding: [0xc4,0xe3,0x69,0x4c,0xc1,0x00]
1297; KNL-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1298; KNL-NEXT:    retq ## encoding: [0xc3]
1299;
1300; AVX512BW-LABEL: test49:
1301; AVX512BW:       ## %bb.0:
1302; AVX512BW-NEXT:    ## kill: def $xmm2 killed $xmm2 def $zmm2
1303; AVX512BW-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
1304; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc8]
1305; AVX512BW-NEXT:    vpblendmw %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x66,0xc1]
1306; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
1307; AVX512BW-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1308; AVX512BW-NEXT:    retq ## encoding: [0xc3]
1309;
1310; SKX-LABEL: test49:
1311; SKX:       ## %bb.0:
1312; SKX-NEXT:    vptestnmq %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc8]
1313; SKX-NEXT:    vpblendmw %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x66,0xc1]
1314; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1315; SKX-NEXT:    retq ## encoding: [0xc3]
1316  %cmp = icmp eq <8 x i64> %a, zeroinitializer
1317  %res = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %c
1318  ret <8 x i16> %res
1319}
1320
1321define i16 @pcmpeq_mem_1(<16 x i32> %a, <16 x i32>* %b) {
1322; KNL-LABEL: pcmpeq_mem_1:
1323; KNL:       ## %bb.0:
1324; KNL-NEXT:    vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
1325; KNL-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1326; KNL-NEXT:    ## kill: def $ax killed $ax killed $eax
1327; KNL-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1328; KNL-NEXT:    retq ## encoding: [0xc3]
1329;
1330; AVX512BW-LABEL: pcmpeq_mem_1:
1331; AVX512BW:       ## %bb.0:
1332; AVX512BW-NEXT:    vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
1333; AVX512BW-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
1334; AVX512BW-NEXT:    ## kill: def $ax killed $ax killed $eax
1335; AVX512BW-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1336; AVX512BW-NEXT:    retq ## encoding: [0xc3]
1337;
1338; SKX-LABEL: pcmpeq_mem_1:
1339; SKX:       ## %bb.0:
1340; SKX-NEXT:    vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
1341; SKX-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
1342; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
1343; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1344; SKX-NEXT:    retq ## encoding: [0xc3]
1345  %load = load <16 x i32>, <16 x i32>* %b
1346  %cmp = icmp eq <16 x i32> %a, %load
1347  %cast = bitcast <16 x i1> %cmp to i16
1348  ret i16 %cast
1349}
1350
1351; Make sure we use the short pcmpeq encoding like the test above when the memoryo
1352; operand is in the first argument instead of the second.
1353define i16 @pcmpeq_mem_2(<16 x i32> %a, <16 x i32>* %b) {
1354; KNL-LABEL: pcmpeq_mem_2:
1355; KNL:       ## %bb.0:
1356; KNL-NEXT:    vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
1357; KNL-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1358; KNL-NEXT:    ## kill: def $ax killed $ax killed $eax
1359; KNL-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1360; KNL-NEXT:    retq ## encoding: [0xc3]
1361;
1362; AVX512BW-LABEL: pcmpeq_mem_2:
1363; AVX512BW:       ## %bb.0:
1364; AVX512BW-NEXT:    vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
1365; AVX512BW-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
1366; AVX512BW-NEXT:    ## kill: def $ax killed $ax killed $eax
1367; AVX512BW-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1368; AVX512BW-NEXT:    retq ## encoding: [0xc3]
1369;
1370; SKX-LABEL: pcmpeq_mem_2:
1371; SKX:       ## %bb.0:
1372; SKX-NEXT:    vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
1373; SKX-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
1374; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
1375; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1376; SKX-NEXT:    retq ## encoding: [0xc3]
1377  %load = load <16 x i32>, <16 x i32>* %b
1378  %cmp = icmp eq <16 x i32> %load, %a
1379  %cast = bitcast <16 x i1> %cmp to i16
1380  ret i16 %cast
1381}
1382
1383; Don't let a degenerate case trigger an infinite loop.
1384; This should get simplified before it even exists as a vselect node,
1385; but that does not happen as of this change.
1386
1387define <2 x i64> @PR41066(<2 x i64> %t0, <2 x double> %x, <2 x double> %y) {
1388; AVX512-LABEL: PR41066:
1389; AVX512:       ## %bb.0:
1390; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x57,0xc0]
1391; AVX512-NEXT:    retq ## encoding: [0xc3]
1392;
1393; SKX-LABEL: PR41066:
1394; SKX:       ## %bb.0:
1395; SKX-NEXT:    vxorps %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
1396; SKX-NEXT:    retq ## encoding: [0xc3]
1397  %t1 = fcmp ogt <2 x double> %x, %y
1398  %t2 = select <2 x i1> %t1, <2 x i64> <i64 undef, i64 0>, <2 x i64> zeroinitializer
1399  ret <2 x i64> %t2
1400}
1401
1402define <4 x i32> @zext_bool_logic(<4 x i64> %cond1, <4 x i64> %cond2, <4 x i32> %x) {
1403; AVX512-LABEL: zext_bool_logic:
1404; AVX512:       ## %bb.0:
1405; AVX512-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
1406; AVX512-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
1407; AVX512-NEXT:    vptestnmq %zmm0, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc0]
1408; AVX512-NEXT:    vptestnmq %zmm1, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf6,0x48,0x27,0xc9]
1409; AVX512-NEXT:    korw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x45,0xc9]
1410; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
1411; AVX512-NEXT:    vpsubd %xmm0, %xmm2, %xmm0 ## encoding: [0xc5,0xe9,0xfa,0xc0]
1412; AVX512-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1413; AVX512-NEXT:    retq ## encoding: [0xc3]
1414;
1415; SKX-LABEL: zext_bool_logic:
1416; SKX:       ## %bb.0:
1417; SKX-NEXT:    vptestnmq %ymm0, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc0]
1418; SKX-NEXT:    vptestnmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf6,0x28,0x27,0xc9]
1419; SKX-NEXT:    korw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x45,0xc1]
1420; SKX-NEXT:    vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
1421; SKX-NEXT:    vpsubd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfa,0xc0]
1422; SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1423; SKX-NEXT:    retq ## encoding: [0xc3]
1424  %a = icmp eq <4 x i64> %cond1, zeroinitializer
1425  %b = icmp eq <4 x i64> %cond2, zeroinitializer
1426  %c = or <4 x i1> %a, %b
1427  %d = zext <4 x i1> %c to <4 x i32>
1428  %e = add <4 x i32> %d, %x
1429  ret <4 x i32> %e
1430}
1431
1432; This used to crash in WidenVecRes_SETCC due to generating the wrong
1433; result type.
1434define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
1435; KNL-LABEL: half_vec_compare:
1436; KNL:       ## %bb.0: ## %entry
1437; KNL-NEXT:    movzwl 2(%rdi), %eax ## encoding: [0x0f,0xb7,0x47,0x02]
1438; KNL-NEXT:    movzwl (%rdi), %ecx ## encoding: [0x0f,0xb7,0x0f]
1439; KNL-NEXT:    vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
1440; KNL-NEXT:    vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
1441; KNL-NEXT:    vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
1442; KNL-NEXT:    vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
1443; KNL-NEXT:    setp %cl ## encoding: [0x0f,0x9a,0xc1]
1444; KNL-NEXT:    setne %dl ## encoding: [0x0f,0x95,0xc2]
1445; KNL-NEXT:    orb %cl, %dl ## encoding: [0x08,0xca]
1446; KNL-NEXT:    andl $1, %edx ## encoding: [0x83,0xe2,0x01]
1447; KNL-NEXT:    kmovw %edx, %k0 ## encoding: [0xc5,0xf8,0x92,0xc2]
1448; KNL-NEXT:    vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
1449; KNL-NEXT:    vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
1450; KNL-NEXT:    vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
1451; KNL-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
1452; KNL-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
1453; KNL-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
1454; KNL-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
1455; KNL-NEXT:    kshiftlw $1, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x01]
1456; KNL-NEXT:    korw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x45,0xc9]
1457; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
1458; KNL-NEXT:    vpmovdw %zmm0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x33,0xc0]
1459; KNL-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0]
1460; KNL-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]
1461; KNL-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1462; KNL-NEXT:    vpextrw $0, %xmm0, (%rsi) ## encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00]
1463; KNL-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1464; KNL-NEXT:    retq ## encoding: [0xc3]
1465;
1466; AVX512BW-LABEL: half_vec_compare:
1467; AVX512BW:       ## %bb.0: ## %entry
1468; AVX512BW-NEXT:    movzwl 2(%rdi), %eax ## encoding: [0x0f,0xb7,0x47,0x02]
1469; AVX512BW-NEXT:    movzwl (%rdi), %ecx ## encoding: [0x0f,0xb7,0x0f]
1470; AVX512BW-NEXT:    vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
1471; AVX512BW-NEXT:    vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
1472; AVX512BW-NEXT:    vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
1473; AVX512BW-NEXT:    vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
1474; AVX512BW-NEXT:    setp %cl ## encoding: [0x0f,0x9a,0xc1]
1475; AVX512BW-NEXT:    setne %dl ## encoding: [0x0f,0x95,0xc2]
1476; AVX512BW-NEXT:    orb %cl, %dl ## encoding: [0x08,0xca]
1477; AVX512BW-NEXT:    andl $1, %edx ## encoding: [0x83,0xe2,0x01]
1478; AVX512BW-NEXT:    kmovw %edx, %k0 ## encoding: [0xc5,0xf8,0x92,0xc2]
1479; AVX512BW-NEXT:    vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
1480; AVX512BW-NEXT:    vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
1481; AVX512BW-NEXT:    vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
1482; AVX512BW-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
1483; AVX512BW-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
1484; AVX512BW-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
1485; AVX512BW-NEXT:    kmovd %ecx, %k1 ## encoding: [0xc5,0xfb,0x92,0xc9]
1486; AVX512BW-NEXT:    kshiftlw $1, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x01]
1487; AVX512BW-NEXT:    korw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x45,0xc1]
1488; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0 ## encoding: [0x62,0xf2,0xfe,0x48,0x28,0xc0]
1489; AVX512BW-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0]
1490; AVX512BW-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]
1491; AVX512BW-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1492; AVX512BW-NEXT:    vpextrw $0, %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00]
1493; AVX512BW-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1494; AVX512BW-NEXT:    retq ## encoding: [0xc3]
1495;
1496; SKX-LABEL: half_vec_compare:
1497; SKX:       ## %bb.0: ## %entry
1498; SKX-NEXT:    movzwl (%rdi), %eax ## encoding: [0x0f,0xb7,0x07]
1499; SKX-NEXT:    movzwl 2(%rdi), %ecx ## encoding: [0x0f,0xb7,0x4f,0x02]
1500; SKX-NEXT:    vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
1501; SKX-NEXT:    vcvtph2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
1502; SKX-NEXT:    vxorps %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x57,0xc9]
1503; SKX-NEXT:    vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
1504; SKX-NEXT:    setp %cl ## encoding: [0x0f,0x9a,0xc1]
1505; SKX-NEXT:    setne %dl ## encoding: [0x0f,0x95,0xc2]
1506; SKX-NEXT:    orb %cl, %dl ## encoding: [0x08,0xca]
1507; SKX-NEXT:    kmovd %edx, %k0 ## encoding: [0xc5,0xfb,0x92,0xc2]
1508; SKX-NEXT:    kshiftlb $1, %k0, %k0 ## encoding: [0xc4,0xe3,0x79,0x32,0xc0,0x01]
1509; SKX-NEXT:    vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
1510; SKX-NEXT:    vcvtph2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
1511; SKX-NEXT:    vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
1512; SKX-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
1513; SKX-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
1514; SKX-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
1515; SKX-NEXT:    kmovd %ecx, %k1 ## encoding: [0xc5,0xfb,0x92,0xc9]
1516; SKX-NEXT:    kshiftlb $7, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x32,0xc9,0x07]
1517; SKX-NEXT:    kshiftrb $7, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x30,0xc9,0x07]
1518; SKX-NEXT:    korw %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x45,0xc0]
1519; SKX-NEXT:    vpmovm2w %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
1520; SKX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc0]
1521; SKX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]
1522; SKX-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1523; SKX-NEXT:    vpextrw $0, %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00]
1524; SKX-NEXT:    retq ## encoding: [0xc3]
1525entry:
1526  %0 = load <2 x half>, <2 x half>* %x
1527  %1 = fcmp une <2 x half> %0, zeroinitializer
1528  %2 = zext <2 x i1> %1 to <2 x i8>
1529  store <2 x i8> %2, <2 x i8>* %y
1530  ret void
1531}
1532
1533; This test used to end up with the vpcmpgtb on KNL having its operands in the wrong order.
1534define <8 x i64> @cmp_swap_bug(<16 x i8>* %x, <8 x i64> %y, <8 x i64> %z) {
1535; KNL-LABEL: cmp_swap_bug:
1536; KNL:       ## %bb.0: ## %entry
1537; KNL-NEXT:    vmovdqa (%rdi), %xmm2 ## encoding: [0xc5,0xf9,0x6f,0x17]
1538; KNL-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1539; KNL-NEXT:    ## encoding: [0xc4,0xe2,0x69,0x00,0x15,A,A,A,A]
1540; KNL-NEXT:    ## fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1541; KNL-NEXT:    vpxor %xmm3, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0xef,0xdb]
1542; KNL-NEXT:    vpcmpgtb %xmm2, %xmm3, %xmm2 ## encoding: [0xc5,0xe1,0x64,0xd2]
1543; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x21,0xd2]
1544; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1 ## encoding: [0x62,0xf2,0x6d,0x48,0x27,0xca]
1545; KNL-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
1546; KNL-NEXT:    retq ## encoding: [0xc3]
1547;
1548; AVX512BW-LABEL: cmp_swap_bug:
1549; AVX512BW:       ## %bb.0: ## %entry
1550; AVX512BW-NEXT:    vmovdqa (%rdi), %xmm2 ## encoding: [0xc5,0xf9,0x6f,0x17]
1551; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1552; AVX512BW-NEXT:    ## encoding: [0xc4,0xe2,0x69,0x00,0x15,A,A,A,A]
1553; AVX512BW-NEXT:    ## fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1554; AVX512BW-NEXT:    vpmovb2m %zmm2, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x29,0xca]
1555; AVX512BW-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
1556; AVX512BW-NEXT:    retq ## encoding: [0xc3]
1557;
1558; SKX-LABEL: cmp_swap_bug:
1559; SKX:       ## %bb.0: ## %entry
1560; SKX-NEXT:    vmovdqa (%rdi), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x17]
1561; SKX-NEXT:    vpmovwb %xmm2, %xmm2 ## encoding: [0x62,0xf2,0x7e,0x08,0x30,0xd2]
1562; SKX-NEXT:    vpmovb2m %xmm2, %k1 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xca]
1563; SKX-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
1564; SKX-NEXT:    retq ## encoding: [0xc3]
1565entry:
1566  %0 = load <16 x i8>, <16 x i8>* %x
1567  %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1568  %2 = icmp slt <8 x i8> %1, zeroinitializer
1569  %3 = select <8 x i1> %2, <8 x i64> %y, <8 x i64> %z
1570  ret <8 x i64> %3
1571}
1572
1573define <2 x i32> @narrow_cmp_select_reverse(<2 x i64> %x, <2 x i32> %y) nounwind {
1574; AVX512-LABEL: narrow_cmp_select_reverse:
1575; AVX512:       ## %bb.0:
1576; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0xef,0xd2]
1577; AVX512-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x29,0xc2]
1578; AVX512-NEXT:    vpshufd $232, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x70,0xc0,0xe8]
1579; AVX512-NEXT:    ## xmm0 = xmm0[0,2,2,3]
1580; AVX512-NEXT:    vpandn %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdf,0xc1]
1581; AVX512-NEXT:    retq ## encoding: [0xc3]
1582;
1583; SKX-LABEL: narrow_cmp_select_reverse:
1584; SKX:       ## %bb.0:
1585; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
1586; SKX-NEXT:    vmovdqa32 %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xc1]
1587; SKX-NEXT:    retq ## encoding: [0xc3]
1588  %mask = icmp eq <2 x i64> %x, zeroinitializer
1589  %res = select <2 x i1> %mask, <2 x i32> zeroinitializer, <2 x i32> %y
1590  ret <2 x i32> %res
1591}
1592