1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512
6
7define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
8; SSE-LABEL: test_v2f64_sext:
9; SSE:       # %bb.0:
10; SSE-NEXT:    cmpltpd %xmm0, %xmm1
11; SSE-NEXT:    movmskpd %xmm1, %eax
12; SSE-NEXT:    negl %eax
13; SSE-NEXT:    sbbq %rax, %rax
14; SSE-NEXT:    retq
15;
16; AVX-LABEL: test_v2f64_sext:
17; AVX:       # %bb.0:
18; AVX-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
19; AVX-NEXT:    vmovmskpd %xmm0, %eax
20; AVX-NEXT:    negl %eax
21; AVX-NEXT:    sbbq %rax, %rax
22; AVX-NEXT:    retq
23;
24; AVX512-LABEL: test_v2f64_sext:
25; AVX512:       # %bb.0:
26; AVX512-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
27; AVX512-NEXT:    vmovmskpd %xmm0, %eax
28; AVX512-NEXT:    negl %eax
29; AVX512-NEXT:    sbbq %rax, %rax
30; AVX512-NEXT:    retq
31  %c = fcmp ogt <2 x double> %a0, %a1
32  %s = sext <2 x i1> %c to <2 x i64>
33  %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
34  %2 = or <2 x i64> %s, %1
35  %3 = extractelement <2 x i64> %2, i32 0
36  ret i64 %3
37}
38
39define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
40; SSE-LABEL: test_v4f64_sext:
41; SSE:       # %bb.0:
42; SSE-NEXT:    cmpltpd %xmm1, %xmm3
43; SSE-NEXT:    cmpltpd %xmm0, %xmm2
44; SSE-NEXT:    orpd %xmm3, %xmm2
45; SSE-NEXT:    movmskpd %xmm2, %eax
46; SSE-NEXT:    negl %eax
47; SSE-NEXT:    sbbq %rax, %rax
48; SSE-NEXT:    retq
49;
50; AVX-LABEL: test_v4f64_sext:
51; AVX:       # %bb.0:
52; AVX-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
53; AVX-NEXT:    vmovmskpd %ymm0, %eax
54; AVX-NEXT:    negl %eax
55; AVX-NEXT:    sbbq %rax, %rax
56; AVX-NEXT:    vzeroupper
57; AVX-NEXT:    retq
58;
59; AVX512-LABEL: test_v4f64_sext:
60; AVX512:       # %bb.0:
61; AVX512-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
62; AVX512-NEXT:    vmovmskpd %ymm0, %eax
63; AVX512-NEXT:    negl %eax
64; AVX512-NEXT:    sbbq %rax, %rax
65; AVX512-NEXT:    vzeroupper
66; AVX512-NEXT:    retq
67  %c = fcmp ogt <4 x double> %a0, %a1
68  %s = sext <4 x i1> %c to <4 x i64>
69  %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
70  %2 = or <4 x i64> %s, %1
71  %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
72  %4 = or <4 x i64> %2, %3
73  %5 = extractelement <4 x i64> %4, i64 0
74  ret i64 %5
75}
76
77define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
78; SSE-LABEL: test_v4f64_legal_sext:
79; SSE:       # %bb.0:
80; SSE-NEXT:    cmpltpd %xmm1, %xmm3
81; SSE-NEXT:    cmpltpd %xmm0, %xmm2
82; SSE-NEXT:    packssdw %xmm3, %xmm2
83; SSE-NEXT:    movmskps %xmm2, %eax
84; SSE-NEXT:    negl %eax
85; SSE-NEXT:    sbbq %rax, %rax
86; SSE-NEXT:    retq
87;
88; AVX-LABEL: test_v4f64_legal_sext:
89; AVX:       # %bb.0:
90; AVX-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
91; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
92; AVX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
93; AVX-NEXT:    vmovmskps %xmm0, %eax
94; AVX-NEXT:    negl %eax
95; AVX-NEXT:    sbbq %rax, %rax
96; AVX-NEXT:    vzeroupper
97; AVX-NEXT:    retq
98;
99; AVX512-LABEL: test_v4f64_legal_sext:
100; AVX512:       # %bb.0:
101; AVX512-NEXT:    vcmpltpd %ymm0, %ymm1, %k1
102; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
103; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
104; AVX512-NEXT:    vmovmskps %xmm0, %eax
105; AVX512-NEXT:    negl %eax
106; AVX512-NEXT:    sbbq %rax, %rax
107; AVX512-NEXT:    vzeroupper
108; AVX512-NEXT:    retq
109  %c = fcmp ogt <4 x double> %a0, %a1
110  %s = sext <4 x i1> %c to <4 x i32>
111  %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
112  %2 = or <4 x i32> %s, %1
113  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
114  %4 = or <4 x i32> %2, %3
115  %5 = extractelement <4 x i32> %4, i64 0
116  %6 = sext i32 %5 to i64
117  ret i64 %6
118}
119
120define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) {
121; SSE-LABEL: test_v4f32_sext:
122; SSE:       # %bb.0:
123; SSE-NEXT:    cmpltps %xmm0, %xmm1
124; SSE-NEXT:    movmskps %xmm1, %eax
125; SSE-NEXT:    negl %eax
126; SSE-NEXT:    sbbl %eax, %eax
127; SSE-NEXT:    retq
128;
129; AVX-LABEL: test_v4f32_sext:
130; AVX:       # %bb.0:
131; AVX-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
132; AVX-NEXT:    vmovmskps %xmm0, %eax
133; AVX-NEXT:    negl %eax
134; AVX-NEXT:    sbbl %eax, %eax
135; AVX-NEXT:    retq
136;
137; AVX512-LABEL: test_v4f32_sext:
138; AVX512:       # %bb.0:
139; AVX512-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
140; AVX512-NEXT:    vmovmskps %xmm0, %eax
141; AVX512-NEXT:    negl %eax
142; AVX512-NEXT:    sbbl %eax, %eax
143; AVX512-NEXT:    retq
144  %c = fcmp ogt <4 x float> %a0, %a1
145  %s = sext <4 x i1> %c to <4 x i32>
146  %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
147  %2 = or <4 x i32> %s, %1
148  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
149  %4 = or <4 x i32> %2, %3
150  %5 = extractelement <4 x i32> %4, i32 0
151  ret i32 %5
152}
153
154define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
155; SSE-LABEL: test_v8f32_sext:
156; SSE:       # %bb.0:
157; SSE-NEXT:    cmpltps %xmm1, %xmm3
158; SSE-NEXT:    cmpltps %xmm0, %xmm2
159; SSE-NEXT:    orps %xmm3, %xmm2
160; SSE-NEXT:    movmskps %xmm2, %eax
161; SSE-NEXT:    negl %eax
162; SSE-NEXT:    sbbl %eax, %eax
163; SSE-NEXT:    retq
164;
165; AVX-LABEL: test_v8f32_sext:
166; AVX:       # %bb.0:
167; AVX-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
168; AVX-NEXT:    vmovmskps %ymm0, %eax
169; AVX-NEXT:    negl %eax
170; AVX-NEXT:    sbbl %eax, %eax
171; AVX-NEXT:    vzeroupper
172; AVX-NEXT:    retq
173;
174; AVX512-LABEL: test_v8f32_sext:
175; AVX512:       # %bb.0:
176; AVX512-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
177; AVX512-NEXT:    vmovmskps %ymm0, %eax
178; AVX512-NEXT:    negl %eax
179; AVX512-NEXT:    sbbl %eax, %eax
180; AVX512-NEXT:    vzeroupper
181; AVX512-NEXT:    retq
182  %c = fcmp ogt <8 x float> %a0, %a1
183  %s = sext <8 x i1> %c to <8 x i32>
184  %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
185  %2 = or <8 x i32> %s, %1
186  %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
187  %4 = or <8 x i32> %2, %3
188  %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
189  %6 = or <8 x i32> %4, %5
190  %7 = extractelement <8 x i32> %6, i32 0
191  ret i32 %7
192}
193
194define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
195; SSE-LABEL: test_v8f32_legal_sext:
196; SSE:       # %bb.0:
197; SSE-NEXT:    cmpltps %xmm1, %xmm3
198; SSE-NEXT:    cmpltps %xmm0, %xmm2
199; SSE-NEXT:    packssdw %xmm3, %xmm2
200; SSE-NEXT:    pmovmskb %xmm2, %eax
201; SSE-NEXT:    negl %eax
202; SSE-NEXT:    sbbl %eax, %eax
203; SSE-NEXT:    retq
204;
205; AVX-LABEL: test_v8f32_legal_sext:
206; AVX:       # %bb.0:
207; AVX-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
208; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
209; AVX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
210; AVX-NEXT:    vpmovmskb %xmm0, %eax
211; AVX-NEXT:    negl %eax
212; AVX-NEXT:    sbbl %eax, %eax
213; AVX-NEXT:    vzeroupper
214; AVX-NEXT:    retq
215;
216; AVX512-LABEL: test_v8f32_legal_sext:
217; AVX512:       # %bb.0:
218; AVX512-NEXT:    vcmpltps %ymm0, %ymm1, %k0
219; AVX512-NEXT:    vpmovm2w %k0, %xmm0
220; AVX512-NEXT:    vpmovmskb %xmm0, %eax
221; AVX512-NEXT:    negl %eax
222; AVX512-NEXT:    sbbl %eax, %eax
223; AVX512-NEXT:    vzeroupper
224; AVX512-NEXT:    retq
225  %c = fcmp ogt <8 x float> %a0, %a1
226  %s = sext <8 x i1> %c to <8 x i16>
227  %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
228  %2 = or <8 x i16> %s, %1
229  %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
230  %4 = or <8 x i16> %2, %3
231  %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
232  %6 = or <8 x i16> %4, %5
233  %7 = extractelement <8 x i16> %6, i32 0
234  %8 = sext i16 %7 to i32
235  ret i32 %8
236}
237
238define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
239; SSE-LABEL: test_v2i64_sext:
240; SSE:       # %bb.0:
241; SSE-NEXT:    pcmpgtq %xmm1, %xmm0
242; SSE-NEXT:    movmskpd %xmm0, %eax
243; SSE-NEXT:    negl %eax
244; SSE-NEXT:    sbbq %rax, %rax
245; SSE-NEXT:    retq
246;
247; AVX-LABEL: test_v2i64_sext:
248; AVX:       # %bb.0:
249; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
250; AVX-NEXT:    vmovmskpd %xmm0, %eax
251; AVX-NEXT:    negl %eax
252; AVX-NEXT:    sbbq %rax, %rax
253; AVX-NEXT:    retq
254;
255; AVX512-LABEL: test_v2i64_sext:
256; AVX512:       # %bb.0:
257; AVX512-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
258; AVX512-NEXT:    vmovmskpd %xmm0, %eax
259; AVX512-NEXT:    negl %eax
260; AVX512-NEXT:    sbbq %rax, %rax
261; AVX512-NEXT:    retq
262  %c = icmp sgt <2 x i64> %a0, %a1
263  %s = sext <2 x i1> %c to <2 x i64>
264  %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
265  %2 = or <2 x i64> %s, %1
266  %3 = extractelement <2 x i64> %2, i32 0
267  ret i64 %3
268}
269
270define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
271; SSE-LABEL: test_v4i64_sext:
272; SSE:       # %bb.0:
273; SSE-NEXT:    pcmpgtq %xmm3, %xmm1
274; SSE-NEXT:    pcmpgtq %xmm2, %xmm0
275; SSE-NEXT:    por %xmm1, %xmm0
276; SSE-NEXT:    movmskpd %xmm0, %eax
277; SSE-NEXT:    negl %eax
278; SSE-NEXT:    sbbq %rax, %rax
279; SSE-NEXT:    retq
280;
281; AVX1-LABEL: test_v4i64_sext:
282; AVX1:       # %bb.0:
283; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
284; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
285; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
286; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
287; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
288; AVX1-NEXT:    vmovmskpd %ymm0, %eax
289; AVX1-NEXT:    negl %eax
290; AVX1-NEXT:    sbbq %rax, %rax
291; AVX1-NEXT:    vzeroupper
292; AVX1-NEXT:    retq
293;
294; AVX2-LABEL: test_v4i64_sext:
295; AVX2:       # %bb.0:
296; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
297; AVX2-NEXT:    vmovmskpd %ymm0, %eax
298; AVX2-NEXT:    negl %eax
299; AVX2-NEXT:    sbbq %rax, %rax
300; AVX2-NEXT:    vzeroupper
301; AVX2-NEXT:    retq
302;
303; AVX512-LABEL: test_v4i64_sext:
304; AVX512:       # %bb.0:
305; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
306; AVX512-NEXT:    vmovmskpd %ymm0, %eax
307; AVX512-NEXT:    negl %eax
308; AVX512-NEXT:    sbbq %rax, %rax
309; AVX512-NEXT:    vzeroupper
310; AVX512-NEXT:    retq
311  %c = icmp sgt <4 x i64> %a0, %a1
312  %s = sext <4 x i1> %c to <4 x i64>
313  %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
314  %2 = or <4 x i64> %s, %1
315  %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
316  %4 = or <4 x i64> %2, %3
317  %5 = extractelement <4 x i64> %4, i64 0
318  ret i64 %5
319}
320
321define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
322; SSE-LABEL: test_v4i64_legal_sext:
323; SSE:       # %bb.0:
324; SSE-NEXT:    pcmpgtq %xmm3, %xmm1
325; SSE-NEXT:    pcmpgtq %xmm2, %xmm0
326; SSE-NEXT:    packssdw %xmm1, %xmm0
327; SSE-NEXT:    movmskps %xmm0, %eax
328; SSE-NEXT:    negl %eax
329; SSE-NEXT:    sbbq %rax, %rax
330; SSE-NEXT:    retq
331;
332; AVX1-LABEL: test_v4i64_legal_sext:
333; AVX1:       # %bb.0:
334; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
335; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
336; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
337; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
338; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
339; AVX1-NEXT:    vmovmskps %xmm0, %eax
340; AVX1-NEXT:    negl %eax
341; AVX1-NEXT:    sbbq %rax, %rax
342; AVX1-NEXT:    vzeroupper
343; AVX1-NEXT:    retq
344;
345; AVX2-LABEL: test_v4i64_legal_sext:
346; AVX2:       # %bb.0:
347; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
348; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
349; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
350; AVX2-NEXT:    vmovmskps %xmm0, %eax
351; AVX2-NEXT:    negl %eax
352; AVX2-NEXT:    sbbq %rax, %rax
353; AVX2-NEXT:    vzeroupper
354; AVX2-NEXT:    retq
355;
356; AVX512-LABEL: test_v4i64_legal_sext:
357; AVX512:       # %bb.0:
358; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm0, %k1
359; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
360; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
361; AVX512-NEXT:    vmovmskps %xmm0, %eax
362; AVX512-NEXT:    negl %eax
363; AVX512-NEXT:    sbbq %rax, %rax
364; AVX512-NEXT:    vzeroupper
365; AVX512-NEXT:    retq
366  %c = icmp sgt <4 x i64> %a0, %a1
367  %s = sext <4 x i1> %c to <4 x i32>
368  %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
369  %2 = or <4 x i32> %s, %1
370  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
371  %4 = or <4 x i32> %2, %3
372  %5 = extractelement <4 x i32> %4, i64 0
373  %6 = sext i32 %5 to i64
374  ret i64 %6
375}
376
377define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) {
378; SSE-LABEL: test_v4i32_sext:
379; SSE:       # %bb.0:
380; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
381; SSE-NEXT:    movmskps %xmm0, %eax
382; SSE-NEXT:    negl %eax
383; SSE-NEXT:    sbbl %eax, %eax
384; SSE-NEXT:    retq
385;
386; AVX-LABEL: test_v4i32_sext:
387; AVX:       # %bb.0:
388; AVX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
389; AVX-NEXT:    vmovmskps %xmm0, %eax
390; AVX-NEXT:    negl %eax
391; AVX-NEXT:    sbbl %eax, %eax
392; AVX-NEXT:    retq
393;
394; AVX512-LABEL: test_v4i32_sext:
395; AVX512:       # %bb.0:
396; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
397; AVX512-NEXT:    vmovmskps %xmm0, %eax
398; AVX512-NEXT:    negl %eax
399; AVX512-NEXT:    sbbl %eax, %eax
400; AVX512-NEXT:    retq
401  %c = icmp sgt <4 x i32> %a0, %a1
402  %s = sext <4 x i1> %c to <4 x i32>
403  %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
404  %2 = or <4 x i32> %s, %1
405  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
406  %4 = or <4 x i32> %2, %3
407  %5 = extractelement <4 x i32> %4, i32 0
408  ret i32 %5
409}
410
411define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
412; SSE-LABEL: test_v8i32_sext:
413; SSE:       # %bb.0:
414; SSE-NEXT:    pcmpgtd %xmm3, %xmm1
415; SSE-NEXT:    pcmpgtd %xmm2, %xmm0
416; SSE-NEXT:    por %xmm1, %xmm0
417; SSE-NEXT:    movmskps %xmm0, %eax
418; SSE-NEXT:    negl %eax
419; SSE-NEXT:    sbbl %eax, %eax
420; SSE-NEXT:    retq
421;
422; AVX1-LABEL: test_v8i32_sext:
423; AVX1:       # %bb.0:
424; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
425; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
426; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm3, %xmm2
427; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
428; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
429; AVX1-NEXT:    vmovmskps %ymm0, %eax
430; AVX1-NEXT:    negl %eax
431; AVX1-NEXT:    sbbl %eax, %eax
432; AVX1-NEXT:    vzeroupper
433; AVX1-NEXT:    retq
434;
435; AVX2-LABEL: test_v8i32_sext:
436; AVX2:       # %bb.0:
437; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
438; AVX2-NEXT:    vmovmskps %ymm0, %eax
439; AVX2-NEXT:    negl %eax
440; AVX2-NEXT:    sbbl %eax, %eax
441; AVX2-NEXT:    vzeroupper
442; AVX2-NEXT:    retq
443;
444; AVX512-LABEL: test_v8i32_sext:
445; AVX512:       # %bb.0:
446; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
447; AVX512-NEXT:    vmovmskps %ymm0, %eax
448; AVX512-NEXT:    negl %eax
449; AVX512-NEXT:    sbbl %eax, %eax
450; AVX512-NEXT:    vzeroupper
451; AVX512-NEXT:    retq
452  %c = icmp sgt <8 x i32> %a0, %a1
453  %s = sext <8 x i1> %c to <8 x i32>
454  %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
455  %2 = or <8 x i32> %s, %1
456  %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
457  %4 = or <8 x i32> %2, %3
458  %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
459  %6 = or <8 x i32> %4, %5
460  %7 = extractelement <8 x i32> %6, i32 0
461  ret i32 %7
462}
463
464define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
465; SSE-LABEL: test_v8i32_legal_sext:
466; SSE:       # %bb.0:
467; SSE-NEXT:    pcmpgtd %xmm3, %xmm1
468; SSE-NEXT:    pcmpgtd %xmm2, %xmm0
469; SSE-NEXT:    packssdw %xmm1, %xmm0
470; SSE-NEXT:    pmovmskb %xmm0, %eax
471; SSE-NEXT:    negl %eax
472; SSE-NEXT:    sbbl %eax, %eax
473; SSE-NEXT:    retq
474;
475; AVX1-LABEL: test_v8i32_legal_sext:
476; AVX1:       # %bb.0:
477; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
478; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
479; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm3, %xmm2
480; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
481; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
482; AVX1-NEXT:    vpmovmskb %xmm0, %eax
483; AVX1-NEXT:    negl %eax
484; AVX1-NEXT:    sbbl %eax, %eax
485; AVX1-NEXT:    vzeroupper
486; AVX1-NEXT:    retq
487;
488; AVX2-LABEL: test_v8i32_legal_sext:
489; AVX2:       # %bb.0:
490; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
491; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
492; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
493; AVX2-NEXT:    vpmovmskb %xmm0, %eax
494; AVX2-NEXT:    negl %eax
495; AVX2-NEXT:    sbbl %eax, %eax
496; AVX2-NEXT:    vzeroupper
497; AVX2-NEXT:    retq
498;
499; AVX512-LABEL: test_v8i32_legal_sext:
500; AVX512:       # %bb.0:
501; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
502; AVX512-NEXT:    vpmovm2w %k0, %xmm0
503; AVX512-NEXT:    vpmovmskb %xmm0, %eax
504; AVX512-NEXT:    negl %eax
505; AVX512-NEXT:    sbbl %eax, %eax
506; AVX512-NEXT:    vzeroupper
507; AVX512-NEXT:    retq
508  %c = icmp sgt <8 x i32> %a0, %a1
509  %s = sext <8 x i1> %c to <8 x i16>
510  %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
511  %2 = or <8 x i16> %s, %1
512  %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
513  %4 = or <8 x i16> %2, %3
514  %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
515  %6 = or <8 x i16> %4, %5
516  %7 = extractelement <8 x i16> %6, i32 0
517  %8 = sext i16 %7 to i32
518  ret i32 %8
519}
520
521define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
522; SSE-LABEL: test_v8i16_sext:
523; SSE:       # %bb.0:
524; SSE-NEXT:    pcmpgtw %xmm1, %xmm0
525; SSE-NEXT:    pmovmskb %xmm0, %eax
526; SSE-NEXT:    negl %eax
527; SSE-NEXT:    sbbl %eax, %eax
528; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
529; SSE-NEXT:    retq
530;
531; AVX-LABEL: test_v8i16_sext:
532; AVX:       # %bb.0:
533; AVX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
534; AVX-NEXT:    vpmovmskb %xmm0, %eax
535; AVX-NEXT:    negl %eax
536; AVX-NEXT:    sbbl %eax, %eax
537; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
538; AVX-NEXT:    retq
539;
540; AVX512-LABEL: test_v8i16_sext:
541; AVX512:       # %bb.0:
542; AVX512-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
543; AVX512-NEXT:    vpmovmskb %xmm0, %eax
544; AVX512-NEXT:    negl %eax
545; AVX512-NEXT:    sbbl %eax, %eax
546; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
547; AVX512-NEXT:    retq
548  %c = icmp sgt <8 x i16> %a0, %a1
549  %s = sext <8 x i1> %c to <8 x i16>
550  %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
551  %2 = or <8 x i16> %s, %1
552  %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
553  %4 = or <8 x i16> %2, %3
554  %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
555  %6 = or <8 x i16> %4, %5
556  %7 = extractelement <8 x i16> %6, i32 0
557  ret i16 %7
558}
559
560define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
561; SSE-LABEL: test_v16i16_sext:
562; SSE:       # %bb.0:
563; SSE-NEXT:    pcmpgtw %xmm3, %xmm1
564; SSE-NEXT:    pcmpgtw %xmm2, %xmm0
565; SSE-NEXT:    por %xmm1, %xmm0
566; SSE-NEXT:    pmovmskb %xmm0, %eax
567; SSE-NEXT:    negl %eax
568; SSE-NEXT:    sbbl %eax, %eax
569; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
570; SSE-NEXT:    retq
571;
572; AVX1-LABEL: test_v16i16_sext:
573; AVX1:       # %bb.0:
574; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
575; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
576; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm2
577; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
578; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
579; AVX1-NEXT:    vpmovmskb %xmm0, %eax
580; AVX1-NEXT:    negl %eax
581; AVX1-NEXT:    sbbl %eax, %eax
582; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
583; AVX1-NEXT:    vzeroupper
584; AVX1-NEXT:    retq
585;
586; AVX2-LABEL: test_v16i16_sext:
587; AVX2:       # %bb.0:
588; AVX2-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
589; AVX2-NEXT:    vpmovmskb %ymm0, %eax
590; AVX2-NEXT:    negl %eax
591; AVX2-NEXT:    sbbl %eax, %eax
592; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
593; AVX2-NEXT:    vzeroupper
594; AVX2-NEXT:    retq
595;
596; AVX512-LABEL: test_v16i16_sext:
597; AVX512:       # %bb.0:
598; AVX512-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
599; AVX512-NEXT:    vpmovmskb %ymm0, %eax
600; AVX512-NEXT:    negl %eax
601; AVX512-NEXT:    sbbl %eax, %eax
602; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
603; AVX512-NEXT:    vzeroupper
604; AVX512-NEXT:    retq
605  %c = icmp sgt <16 x i16> %a0, %a1
606  %s = sext <16 x i1> %c to <16 x i16>
607  %1 = shufflevector <16 x i16> %s, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
608  %2 = or <16 x i16> %s, %1
609  %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
610  %4 = or <16 x i16> %2, %3
611  %5 = shufflevector <16 x i16> %4, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
612  %6 = or <16 x i16> %4, %5
613  %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
614  %8 = or <16 x i16> %6, %7
615  %9 = extractelement <16 x i16> %8, i32 0
616  ret i16 %9
617}
618
619define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
620; SSE-LABEL: test_v16i16_legal_sext:
621; SSE:       # %bb.0:
622; SSE-NEXT:    pcmpgtw %xmm3, %xmm1
623; SSE-NEXT:    pcmpgtw %xmm2, %xmm0
624; SSE-NEXT:    packsswb %xmm1, %xmm0
625; SSE-NEXT:    pmovmskb %xmm0, %eax
626; SSE-NEXT:    negl %eax
627; SSE-NEXT:    sbbl %eax, %eax
628; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
629; SSE-NEXT:    retq
630;
631; AVX1-LABEL: test_v16i16_legal_sext:
632; AVX1:       # %bb.0:
633; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
634; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
635; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm2
636; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
637; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
638; AVX1-NEXT:    vpmovmskb %xmm0, %eax
639; AVX1-NEXT:    negl %eax
640; AVX1-NEXT:    sbbl %eax, %eax
641; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
642; AVX1-NEXT:    vzeroupper
643; AVX1-NEXT:    retq
644;
645; AVX2-LABEL: test_v16i16_legal_sext:
646; AVX2:       # %bb.0:
647; AVX2-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
648; AVX2-NEXT:    vpmovmskb %ymm0, %eax
649; AVX2-NEXT:    negl %eax
650; AVX2-NEXT:    sbbl %eax, %eax
651; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
652; AVX2-NEXT:    vzeroupper
653; AVX2-NEXT:    retq
654;
655; AVX512-LABEL: test_v16i16_legal_sext:
656; AVX512:       # %bb.0:
657; AVX512-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0
658; AVX512-NEXT:    vpmovm2b %k0, %xmm0
659; AVX512-NEXT:    vpmovmskb %xmm0, %eax
660; AVX512-NEXT:    negl %eax
661; AVX512-NEXT:    sbbl %eax, %eax
662; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
663; AVX512-NEXT:    vzeroupper
664; AVX512-NEXT:    retq
665  %c  = icmp sgt <16 x i16> %a0, %a1
666  %s  = sext <16 x i1> %c to <16 x i8>
667  %1  = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
668  %2  = or <16 x i8> %s, %1
669  %3  = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
670  %4  = or <16 x i8> %2, %3
671  %5  = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
672  %6  = or <16 x i8> %4, %5
673  %7  = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
674  %8  = or <16 x i8> %6, %7
675  %9  = extractelement <16 x i8> %8, i32 0
676  %10 = sext i8 %9 to i16
677  ret i16 %10
678}
679
680define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
681; SSE-LABEL: test_v16i8_sext:
682; SSE:       # %bb.0:
683; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
684; SSE-NEXT:    pmovmskb %xmm0, %eax
685; SSE-NEXT:    negl %eax
686; SSE-NEXT:    sbbl %eax, %eax
687; SSE-NEXT:    # kill: def $al killed $al killed $eax
688; SSE-NEXT:    retq
689;
690; AVX-LABEL: test_v16i8_sext:
691; AVX:       # %bb.0:
692; AVX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
693; AVX-NEXT:    vpmovmskb %xmm0, %eax
694; AVX-NEXT:    negl %eax
695; AVX-NEXT:    sbbl %eax, %eax
696; AVX-NEXT:    # kill: def $al killed $al killed $eax
697; AVX-NEXT:    retq
698;
699; AVX512-LABEL: test_v16i8_sext:
700; AVX512:       # %bb.0:
701; AVX512-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
702; AVX512-NEXT:    vpmovmskb %xmm0, %eax
703; AVX512-NEXT:    negl %eax
704; AVX512-NEXT:    sbbl %eax, %eax
705; AVX512-NEXT:    # kill: def $al killed $al killed $eax
706; AVX512-NEXT:    retq
707  %c = icmp sgt <16 x i8> %a0, %a1
708  %s = sext <16 x i1> %c to <16 x i8>
709  %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
710  %2 = or <16 x i8> %s, %1
711  %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
712  %4 = or <16 x i8> %2, %3
713  %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
714  %6 = or <16 x i8> %4, %5
715  %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
716  %8 = or <16 x i8> %6, %7
717  %9 = extractelement <16 x i8> %8, i32 0
718  ret i8 %9
719}
720
721define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
722; SSE-LABEL: test_v32i8_sext:
723; SSE:       # %bb.0:
724; SSE-NEXT:    pcmpgtb %xmm3, %xmm1
725; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
726; SSE-NEXT:    por %xmm1, %xmm0
727; SSE-NEXT:    pmovmskb %xmm0, %eax
728; SSE-NEXT:    negl %eax
729; SSE-NEXT:    sbbl %eax, %eax
730; SSE-NEXT:    # kill: def $al killed $al killed $eax
731; SSE-NEXT:    retq
732;
733; AVX1-LABEL: test_v32i8_sext:
734; AVX1:       # %bb.0:
735; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
736; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
737; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm3, %xmm2
738; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
739; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
740; AVX1-NEXT:    vpmovmskb %xmm0, %eax
741; AVX1-NEXT:    negl %eax
742; AVX1-NEXT:    sbbl %eax, %eax
743; AVX1-NEXT:    # kill: def $al killed $al killed $eax
744; AVX1-NEXT:    vzeroupper
745; AVX1-NEXT:    retq
746;
747; AVX2-LABEL: test_v32i8_sext:
748; AVX2:       # %bb.0:
749; AVX2-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
750; AVX2-NEXT:    vpmovmskb %ymm0, %eax
751; AVX2-NEXT:    negl %eax
752; AVX2-NEXT:    sbbl %eax, %eax
753; AVX2-NEXT:    # kill: def $al killed $al killed $eax
754; AVX2-NEXT:    vzeroupper
755; AVX2-NEXT:    retq
756;
757; AVX512-LABEL: test_v32i8_sext:
758; AVX512:       # %bb.0:
759; AVX512-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
760; AVX512-NEXT:    vpmovmskb %ymm0, %eax
761; AVX512-NEXT:    negl %eax
762; AVX512-NEXT:    sbbl %eax, %eax
763; AVX512-NEXT:    # kill: def $al killed $al killed $eax
764; AVX512-NEXT:    vzeroupper
765; AVX512-NEXT:    retq
766  %c  = icmp sgt <32 x i8> %a0, %a1
767  %s  = sext <32 x i1> %c to <32 x i8>
768  %1  = shufflevector <32 x i8> %s, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
769  %2  = or <32 x i8> %s, %1
770  %3  = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
771  %4  = or <32 x i8> %2, %3
772  %5  = shufflevector <32 x i8> %4, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
773  %6  = or <32 x i8> %4, %5
774  %7  = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
775  %8  = or <32 x i8> %6, %7
776  %9  = shufflevector <32 x i8> %8, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
777  %10 = or <32 x i8> %8, %9
778  %11 = extractelement <32 x i8> %10, i32 0
779  ret i8 %11
780}
781
782define i1 @bool_reduction_v2f64(<2 x double> %x, <2 x double> %y) {
783; SSE-LABEL: bool_reduction_v2f64:
784; SSE:       # %bb.0:
785; SSE-NEXT:    cmpltpd %xmm0, %xmm1
786; SSE-NEXT:    movmskpd %xmm1, %eax
787; SSE-NEXT:    testl %eax, %eax
788; SSE-NEXT:    setne %al
789; SSE-NEXT:    retq
790;
791; AVX-LABEL: bool_reduction_v2f64:
792; AVX:       # %bb.0:
793; AVX-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
794; AVX-NEXT:    vmovmskpd %xmm0, %eax
795; AVX-NEXT:    testl %eax, %eax
796; AVX-NEXT:    setne %al
797; AVX-NEXT:    retq
798;
799; AVX512-LABEL: bool_reduction_v2f64:
800; AVX512:       # %bb.0:
801; AVX512-NEXT:    vcmpltpd %xmm0, %xmm1, %k0
802; AVX512-NEXT:    kmovd %k0, %eax
803; AVX512-NEXT:    testb %al, %al
804; AVX512-NEXT:    setne %al
805; AVX512-NEXT:    retq
806  %a = fcmp ogt <2 x double> %x, %y
807  %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
808  %c = or <2 x i1> %a, %b
809  %d = extractelement <2 x i1> %c, i32 0
810  ret i1 %d
811}
812
813define i1 @bool_reduction_v4f32(<4 x float> %x, <4 x float> %y) {
814; SSE-LABEL: bool_reduction_v4f32:
815; SSE:       # %bb.0:
816; SSE-NEXT:    cmpeqps %xmm1, %xmm0
817; SSE-NEXT:    movmskps %xmm0, %eax
818; SSE-NEXT:    testl %eax, %eax
819; SSE-NEXT:    setne %al
820; SSE-NEXT:    retq
821;
822; AVX-LABEL: bool_reduction_v4f32:
823; AVX:       # %bb.0:
824; AVX-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm0
825; AVX-NEXT:    vmovmskps %xmm0, %eax
826; AVX-NEXT:    testl %eax, %eax
827; AVX-NEXT:    setne %al
828; AVX-NEXT:    retq
829;
830; AVX512-LABEL: bool_reduction_v4f32:
831; AVX512:       # %bb.0:
832; AVX512-NEXT:    vcmpeqps %xmm1, %xmm0, %k0
833; AVX512-NEXT:    kmovd %k0, %eax
834; AVX512-NEXT:    testb %al, %al
835; AVX512-NEXT:    setne %al
836; AVX512-NEXT:    retq
837  %a = fcmp oeq <4 x float> %x, %y
838  %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
839  %b = or <4 x i1> %s1, %a
840  %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
841  %c = or <4 x i1> %s2, %b
842  %d = extractelement <4 x i1> %c, i32 0
843  ret i1 %d
844}
845
846define i1 @bool_reduction_v4f64(<4 x double> %x, <4 x double> %y) {
847; SSE-LABEL: bool_reduction_v4f64:
848; SSE:       # %bb.0:
849; SSE-NEXT:    cmplepd %xmm1, %xmm3
850; SSE-NEXT:    cmplepd %xmm0, %xmm2
851; SSE-NEXT:    packssdw %xmm3, %xmm2
852; SSE-NEXT:    movmskps %xmm2, %eax
853; SSE-NEXT:    testl %eax, %eax
854; SSE-NEXT:    setne %al
855; SSE-NEXT:    retq
856;
857; AVX-LABEL: bool_reduction_v4f64:
858; AVX:       # %bb.0:
859; AVX-NEXT:    vcmplepd %ymm0, %ymm1, %ymm0
860; AVX-NEXT:    vmovmskpd %ymm0, %eax
861; AVX-NEXT:    testl %eax, %eax
862; AVX-NEXT:    setne %al
863; AVX-NEXT:    vzeroupper
864; AVX-NEXT:    retq
865;
866; AVX512-LABEL: bool_reduction_v4f64:
867; AVX512:       # %bb.0:
868; AVX512-NEXT:    vcmplepd %ymm0, %ymm1, %k0
869; AVX512-NEXT:    kmovd %k0, %eax
870; AVX512-NEXT:    testb %al, %al
871; AVX512-NEXT:    setne %al
872; AVX512-NEXT:    vzeroupper
873; AVX512-NEXT:    retq
874  %a = fcmp oge <4 x double> %x, %y
875  %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
876  %b = or <4 x i1> %s1, %a
877  %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
878  %c = or <4 x i1> %s2, %b
879  %d = extractelement <4 x i1> %c, i32 0
880  ret i1 %d
881}
882
883define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) {
884; SSE-LABEL: bool_reduction_v8f32:
885; SSE:       # %bb.0:
886; SSE-NEXT:    cmpneqps %xmm3, %xmm1
887; SSE-NEXT:    cmpneqps %xmm2, %xmm0
888; SSE-NEXT:    packssdw %xmm1, %xmm0
889; SSE-NEXT:    pmovmskb %xmm0, %eax
890; SSE-NEXT:    testl %eax, %eax
891; SSE-NEXT:    setne %al
892; SSE-NEXT:    retq
893;
894; AVX-LABEL: bool_reduction_v8f32:
895; AVX:       # %bb.0:
896; AVX-NEXT:    vcmpneqps %ymm1, %ymm0, %ymm0
897; AVX-NEXT:    vmovmskps %ymm0, %eax
898; AVX-NEXT:    testl %eax, %eax
899; AVX-NEXT:    setne %al
900; AVX-NEXT:    vzeroupper
901; AVX-NEXT:    retq
902;
903; AVX512-LABEL: bool_reduction_v8f32:
904; AVX512:       # %bb.0:
905; AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k0
906; AVX512-NEXT:    kmovd %k0, %eax
907; AVX512-NEXT:    testb %al, %al
908; AVX512-NEXT:    setne %al
909; AVX512-NEXT:    vzeroupper
910; AVX512-NEXT:    retq
911  %a = fcmp une <8 x float> %x, %y
912  %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
913  %b = or <8 x i1> %s1, %a
914  %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
915  %c = or <8 x i1> %s2, %b
916  %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
917  %d = or <8 x i1> %s3, %c
918  %e = extractelement <8 x i1> %d, i32 0
919  ret i1 %e
920}
921
922define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) {
923; SSE-LABEL: bool_reduction_v2i64:
924; SSE:       # %bb.0:
925; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
926; SSE-NEXT:    pxor %xmm2, %xmm1
927; SSE-NEXT:    pxor %xmm2, %xmm0
928; SSE-NEXT:    pcmpgtq %xmm1, %xmm0
929; SSE-NEXT:    movmskpd %xmm0, %eax
930; SSE-NEXT:    testl %eax, %eax
931; SSE-NEXT:    setne %al
932; SSE-NEXT:    retq
933;
934; AVX-LABEL: bool_reduction_v2i64:
935; AVX:       # %bb.0:
936; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
937; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm1
938; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0
939; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
940; AVX-NEXT:    vmovmskpd %xmm0, %eax
941; AVX-NEXT:    testl %eax, %eax
942; AVX-NEXT:    setne %al
943; AVX-NEXT:    retq
944;
945; AVX512-LABEL: bool_reduction_v2i64:
946; AVX512:       # %bb.0:
947; AVX512-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k0
948; AVX512-NEXT:    kmovd %k0, %eax
949; AVX512-NEXT:    testb %al, %al
950; AVX512-NEXT:    setne %al
951; AVX512-NEXT:    retq
952  %a = icmp ugt <2 x i64> %x, %y
953  %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
954  %c = or <2 x i1> %a, %b
955  %d = extractelement <2 x i1> %c, i32 0
956  ret i1 %d
957}
958
959define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
960; SSE-LABEL: bool_reduction_v4i32:
961; SSE:       # %bb.0:
962; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
963; SSE-NEXT:    movmskps %xmm0, %eax
964; SSE-NEXT:    cmpl $15, %eax
965; SSE-NEXT:    setne %al
966; SSE-NEXT:    retq
967;
968; AVX-LABEL: bool_reduction_v4i32:
969; AVX:       # %bb.0:
970; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
971; AVX-NEXT:    vmovmskps %xmm0, %eax
972; AVX-NEXT:    cmpl $15, %eax
973; AVX-NEXT:    setne %al
974; AVX-NEXT:    retq
975;
976; AVX512-LABEL: bool_reduction_v4i32:
977; AVX512:       # %bb.0:
978; AVX512-NEXT:    vpcmpneqd %xmm1, %xmm0, %k0
979; AVX512-NEXT:    kmovd %k0, %eax
980; AVX512-NEXT:    testb %al, %al
981; AVX512-NEXT:    setne %al
982; AVX512-NEXT:    retq
983  %a = icmp ne <4 x i32> %x, %y
984  %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
985  %b = or <4 x i1> %s1, %a
986  %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
987  %c = or <4 x i1> %s2, %b
988  %d = extractelement <4 x i1> %c, i32 0
989  ret i1 %d
990}
991
992define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) {
993; SSE-LABEL: bool_reduction_v8i16:
994; SSE:       # %bb.0:
995; SSE-NEXT:    pcmpgtw %xmm0, %xmm1
996; SSE-NEXT:    pmovmskb %xmm1, %eax
997; SSE-NEXT:    testl %eax, %eax
998; SSE-NEXT:    setne %al
999; SSE-NEXT:    retq
1000;
1001; AVX-LABEL: bool_reduction_v8i16:
1002; AVX:       # %bb.0:
1003; AVX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
1004; AVX-NEXT:    vpmovmskb %xmm0, %eax
1005; AVX-NEXT:    testl %eax, %eax
1006; AVX-NEXT:    setne %al
1007; AVX-NEXT:    retq
1008;
1009; AVX512-LABEL: bool_reduction_v8i16:
1010; AVX512:       # %bb.0:
1011; AVX512-NEXT:    vpcmpgtw %xmm0, %xmm1, %k0
1012; AVX512-NEXT:    kmovd %k0, %eax
1013; AVX512-NEXT:    testb %al, %al
1014; AVX512-NEXT:    setne %al
1015; AVX512-NEXT:    retq
1016  %a = icmp slt <8 x i16> %x, %y
1017  %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1018  %b = or <8 x i1> %s1, %a
1019  %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1020  %c = or <8 x i1> %s2, %b
1021  %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1022  %d = or <8 x i1> %s3, %c
1023  %e = extractelement <8 x i1> %d, i32 0
1024  ret i1 %e
1025}
1026
1027define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) {
1028; SSE-LABEL: bool_reduction_v16i8:
1029; SSE:       # %bb.0:
1030; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
1031; SSE-NEXT:    pmovmskb %xmm0, %eax
1032; SSE-NEXT:    testl %eax, %eax
1033; SSE-NEXT:    setne %al
1034; SSE-NEXT:    retq
1035;
1036; AVX-LABEL: bool_reduction_v16i8:
1037; AVX:       # %bb.0:
1038; AVX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
1039; AVX-NEXT:    vpmovmskb %xmm0, %eax
1040; AVX-NEXT:    testl %eax, %eax
1041; AVX-NEXT:    setne %al
1042; AVX-NEXT:    retq
1043;
1044; AVX512-LABEL: bool_reduction_v16i8:
1045; AVX512:       # %bb.0:
1046; AVX512-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0
1047; AVX512-NEXT:    kortestw %k0, %k0
1048; AVX512-NEXT:    setne %al
1049; AVX512-NEXT:    retq
1050  %a = icmp sgt <16 x i8> %x, %y
1051  %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1052  %b = or <16 x i1> %s1, %a
1053  %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1054  %c = or <16 x i1> %s2, %b
1055  %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1056  %d = or <16 x i1> %s3, %c
1057  %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1058  %e = or <16 x i1> %s4, %d
1059  %f = extractelement <16 x i1> %e, i32 0
1060  ret i1 %f
1061}
1062
1063define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
1064; SSE-LABEL: bool_reduction_v4i64:
1065; SSE:       # %bb.0:
1066; SSE-NEXT:    pcmpgtq %xmm1, %xmm3
1067; SSE-NEXT:    pcmpgtq %xmm0, %xmm2
1068; SSE-NEXT:    packssdw %xmm3, %xmm2
1069; SSE-NEXT:    movmskps %xmm2, %eax
1070; SSE-NEXT:    testl %eax, %eax
1071; SSE-NEXT:    setne %al
1072; SSE-NEXT:    retq
1073;
1074; AVX1-LABEL: bool_reduction_v4i64:
1075; AVX1:       # %bb.0:
1076; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1077; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
1078; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
1079; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
1080; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1081; AVX1-NEXT:    vmovmskpd %ymm0, %eax
1082; AVX1-NEXT:    testl %eax, %eax
1083; AVX1-NEXT:    setne %al
1084; AVX1-NEXT:    vzeroupper
1085; AVX1-NEXT:    retq
1086;
1087; AVX2-LABEL: bool_reduction_v4i64:
1088; AVX2:       # %bb.0:
1089; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
1090; AVX2-NEXT:    vmovmskpd %ymm0, %eax
1091; AVX2-NEXT:    testl %eax, %eax
1092; AVX2-NEXT:    setne %al
1093; AVX2-NEXT:    vzeroupper
1094; AVX2-NEXT:    retq
1095;
1096; AVX512-LABEL: bool_reduction_v4i64:
1097; AVX512:       # %bb.0:
1098; AVX512-NEXT:    vpcmpgtq %ymm0, %ymm1, %k0
1099; AVX512-NEXT:    kmovd %k0, %eax
1100; AVX512-NEXT:    testb %al, %al
1101; AVX512-NEXT:    setne %al
1102; AVX512-NEXT:    vzeroupper
1103; AVX512-NEXT:    retq
1104  %a = icmp slt <4 x i64> %x, %y
1105  %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1106  %b = or <4 x i1> %s1, %a
1107  %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1108  %c = or <4 x i1> %s2, %b
1109  %d = extractelement <4 x i1> %c, i32 0
1110  ret i1 %d
1111}
1112
1113define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
1114; SSE-LABEL: bool_reduction_v8i32:
1115; SSE:       # %bb.0:
1116; SSE-NEXT:    pminud %xmm1, %xmm3
1117; SSE-NEXT:    pcmpeqd %xmm1, %xmm3
1118; SSE-NEXT:    pminud %xmm0, %xmm2
1119; SSE-NEXT:    pcmpeqd %xmm0, %xmm2
1120; SSE-NEXT:    packssdw %xmm3, %xmm2
1121; SSE-NEXT:    pmovmskb %xmm2, %eax
1122; SSE-NEXT:    testl %eax, %eax
1123; SSE-NEXT:    setne %al
1124; SSE-NEXT:    retq
1125;
1126; AVX1-LABEL: bool_reduction_v8i32:
1127; AVX1:       # %bb.0:
1128; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1129; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1130; AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm2
1131; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm3, %xmm2
1132; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm1
1133; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1134; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1135; AVX1-NEXT:    vmovmskps %ymm0, %eax
1136; AVX1-NEXT:    testl %eax, %eax
1137; AVX1-NEXT:    setne %al
1138; AVX1-NEXT:    vzeroupper
1139; AVX1-NEXT:    retq
1140;
1141; AVX2-LABEL: bool_reduction_v8i32:
1142; AVX2:       # %bb.0:
1143; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm1
1144; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
1145; AVX2-NEXT:    vmovmskps %ymm0, %eax
1146; AVX2-NEXT:    testl %eax, %eax
1147; AVX2-NEXT:    setne %al
1148; AVX2-NEXT:    vzeroupper
1149; AVX2-NEXT:    retq
1150;
1151; AVX512-LABEL: bool_reduction_v8i32:
1152; AVX512:       # %bb.0:
1153; AVX512-NEXT:    vpcmpleud %ymm1, %ymm0, %k0
1154; AVX512-NEXT:    kmovd %k0, %eax
1155; AVX512-NEXT:    testb %al, %al
1156; AVX512-NEXT:    setne %al
1157; AVX512-NEXT:    vzeroupper
1158; AVX512-NEXT:    retq
1159  %a = icmp ule <8 x i32> %x, %y
1160  %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1161  %b = or <8 x i1> %s1, %a
1162  %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1163  %c = or <8 x i1> %s2, %b
1164  %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1165  %d = or <8 x i1> %s3, %c
1166  %e = extractelement <8 x i1> %d, i32 0
1167  ret i1 %e
1168}
1169
1170define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
1171; SSE-LABEL: bool_reduction_v16i16:
1172; SSE:       # %bb.0:
1173; SSE-NEXT:    pcmpeqw %xmm3, %xmm1
1174; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
1175; SSE-NEXT:    packsswb %xmm1, %xmm0
1176; SSE-NEXT:    pmovmskb %xmm0, %eax
1177; SSE-NEXT:    testl %eax, %eax
1178; SSE-NEXT:    setne %al
1179; SSE-NEXT:    retq
1180;
1181; AVX1-LABEL: bool_reduction_v16i16:
1182; AVX1:       # %bb.0:
1183; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1184; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1185; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm3, %xmm2
1186; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
1187; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
1188; AVX1-NEXT:    vpmovmskb %xmm0, %eax
1189; AVX1-NEXT:    testl %eax, %eax
1190; AVX1-NEXT:    setne %al
1191; AVX1-NEXT:    vzeroupper
1192; AVX1-NEXT:    retq
1193;
1194; AVX2-LABEL: bool_reduction_v16i16:
1195; AVX2:       # %bb.0:
1196; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
1197; AVX2-NEXT:    vpmovmskb %ymm0, %eax
1198; AVX2-NEXT:    testl %eax, %eax
1199; AVX2-NEXT:    setne %al
1200; AVX2-NEXT:    vzeroupper
1201; AVX2-NEXT:    retq
1202;
1203; AVX512-LABEL: bool_reduction_v16i16:
1204; AVX512:       # %bb.0:
1205; AVX512-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0
1206; AVX512-NEXT:    kortestw %k0, %k0
1207; AVX512-NEXT:    setne %al
1208; AVX512-NEXT:    vzeroupper
1209; AVX512-NEXT:    retq
1210  %a = icmp eq <16 x i16> %x, %y
1211  %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1212  %b = or <16 x i1> %s1, %a
1213  %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1214  %c = or <16 x i1> %s2, %b
1215  %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1216  %d = or <16 x i1> %s3, %c
1217  %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1218  %e = or <16 x i1> %s4, %d
1219  %f = extractelement <16 x i1> %e, i32 0
1220  ret i1 %f
1221}
1222
1223define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
1224; SSE-LABEL: bool_reduction_v32i8:
1225; SSE:       # %bb.0:
1226; SSE-NEXT:    pcmpeqb %xmm3, %xmm1
1227; SSE-NEXT:    pcmpeqb %xmm2, %xmm0
1228; SSE-NEXT:    por %xmm1, %xmm0
1229; SSE-NEXT:    pmovmskb %xmm0, %eax
1230; SSE-NEXT:    testl %eax, %eax
1231; SSE-NEXT:    setne %al
1232; SSE-NEXT:    retq
1233;
1234; AVX1-LABEL: bool_reduction_v32i8:
1235; AVX1:       # %bb.0:
1236; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1237; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1238; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm3, %xmm2
1239; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
1240; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
1241; AVX1-NEXT:    vpmovmskb %xmm0, %eax
1242; AVX1-NEXT:    testl %eax, %eax
1243; AVX1-NEXT:    setne %al
1244; AVX1-NEXT:    vzeroupper
1245; AVX1-NEXT:    retq
1246;
1247; AVX2-LABEL: bool_reduction_v32i8:
1248; AVX2:       # %bb.0:
1249; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
1250; AVX2-NEXT:    vpmovmskb %ymm0, %eax
1251; AVX2-NEXT:    testl %eax, %eax
1252; AVX2-NEXT:    setne %al
1253; AVX2-NEXT:    vzeroupper
1254; AVX2-NEXT:    retq
1255;
1256; AVX512-LABEL: bool_reduction_v32i8:
1257; AVX512:       # %bb.0:
1258; AVX512-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0
1259; AVX512-NEXT:    kortestd %k0, %k0
1260; AVX512-NEXT:    setne %al
1261; AVX512-NEXT:    vzeroupper
1262; AVX512-NEXT:    retq
1263  %a = icmp eq <32 x i8> %x, %y
1264  %s1 = shufflevector <32 x i1> %a, <32 x i1> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1265  %b = or <32 x i1> %s1, %a
1266  %s2 = shufflevector <32 x i1> %b, <32 x i1> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1267  %c = or <32 x i1> %s2, %b
1268  %s3 = shufflevector <32 x i1> %c, <32 x i1> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1269  %d = or <32 x i1> %s3, %c
1270  %s4 = shufflevector <32 x i1> %d, <32 x i1> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1271  %e = or <32 x i1> %s4, %d
1272  %s5 = shufflevector <32 x i1> %e, <32 x i1> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1273  %f = or <32 x i1> %s5, %e
1274  %g = extractelement <32 x i1> %f, i32 0
1275  ret i1 %g
1276}
1277