1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512
6
7define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
8; SSE-LABEL: test_v2f64_sext:
9; SSE:       # %bb.0:
10; SSE-NEXT:    cmpltpd %xmm0, %xmm1
11; SSE-NEXT:    movmskpd %xmm1, %ecx
12; SSE-NEXT:    xorl %eax, %eax
13; SSE-NEXT:    cmpl $3, %ecx
14; SSE-NEXT:    sete %al
15; SSE-NEXT:    negq %rax
16; SSE-NEXT:    retq
17;
18; AVX-LABEL: test_v2f64_sext:
19; AVX:       # %bb.0:
20; AVX-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
21; AVX-NEXT:    vmovmskpd %xmm0, %ecx
22; AVX-NEXT:    xorl %eax, %eax
23; AVX-NEXT:    cmpl $3, %ecx
24; AVX-NEXT:    sete %al
25; AVX-NEXT:    negq %rax
26; AVX-NEXT:    retq
27;
28; AVX512-LABEL: test_v2f64_sext:
29; AVX512:       # %bb.0:
30; AVX512-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
31; AVX512-NEXT:    vmovmskpd %xmm0, %ecx
32; AVX512-NEXT:    xorl %eax, %eax
33; AVX512-NEXT:    cmpl $3, %ecx
34; AVX512-NEXT:    sete %al
35; AVX512-NEXT:    negq %rax
36; AVX512-NEXT:    retq
37  %c = fcmp ogt <2 x double> %a0, %a1
38  %s = sext <2 x i1> %c to <2 x i64>
39  %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
40  %2 = and <2 x i64> %s, %1
41  %3 = extractelement <2 x i64> %2, i32 0
42  ret i64 %3
43}
44
45define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
46; SSE-LABEL: test_v4f64_sext:
47; SSE:       # %bb.0:
48; SSE-NEXT:    cmpltpd %xmm1, %xmm3
49; SSE-NEXT:    cmpltpd %xmm0, %xmm2
50; SSE-NEXT:    andpd %xmm3, %xmm2
51; SSE-NEXT:    movmskpd %xmm2, %ecx
52; SSE-NEXT:    xorl %eax, %eax
53; SSE-NEXT:    cmpl $3, %ecx
54; SSE-NEXT:    sete %al
55; SSE-NEXT:    negq %rax
56; SSE-NEXT:    retq
57;
58; AVX-LABEL: test_v4f64_sext:
59; AVX:       # %bb.0:
60; AVX-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
61; AVX-NEXT:    vmovmskpd %ymm0, %ecx
62; AVX-NEXT:    xorl %eax, %eax
63; AVX-NEXT:    cmpl $15, %ecx
64; AVX-NEXT:    sete %al
65; AVX-NEXT:    negq %rax
66; AVX-NEXT:    vzeroupper
67; AVX-NEXT:    retq
68;
69; AVX512-LABEL: test_v4f64_sext:
70; AVX512:       # %bb.0:
71; AVX512-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
72; AVX512-NEXT:    vmovmskpd %ymm0, %ecx
73; AVX512-NEXT:    xorl %eax, %eax
74; AVX512-NEXT:    cmpl $15, %ecx
75; AVX512-NEXT:    sete %al
76; AVX512-NEXT:    negq %rax
77; AVX512-NEXT:    vzeroupper
78; AVX512-NEXT:    retq
79  %c = fcmp ogt <4 x double> %a0, %a1
80  %s = sext <4 x i1> %c to <4 x i64>
81  %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
82  %2 = and <4 x i64> %s, %1
83  %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
84  %4 = and <4 x i64> %2, %3
85  %5 = extractelement <4 x i64> %4, i64 0
86  ret i64 %5
87}
88
89define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
90; SSE-LABEL: test_v4f64_legal_sext:
91; SSE:       # %bb.0:
92; SSE-NEXT:    cmpltpd %xmm1, %xmm3
93; SSE-NEXT:    cmpltpd %xmm0, %xmm2
94; SSE-NEXT:    packssdw %xmm3, %xmm2
95; SSE-NEXT:    movmskps %xmm2, %ecx
96; SSE-NEXT:    xorl %eax, %eax
97; SSE-NEXT:    cmpl $15, %ecx
98; SSE-NEXT:    sete %al
99; SSE-NEXT:    negq %rax
100; SSE-NEXT:    retq
101;
102; AVX-LABEL: test_v4f64_legal_sext:
103; AVX:       # %bb.0:
104; AVX-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
105; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
106; AVX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
107; AVX-NEXT:    vmovmskps %xmm0, %ecx
108; AVX-NEXT:    xorl %eax, %eax
109; AVX-NEXT:    cmpl $15, %ecx
110; AVX-NEXT:    sete %al
111; AVX-NEXT:    negq %rax
112; AVX-NEXT:    vzeroupper
113; AVX-NEXT:    retq
114;
115; AVX512-LABEL: test_v4f64_legal_sext:
116; AVX512:       # %bb.0:
117; AVX512-NEXT:    vcmpltpd %ymm0, %ymm1, %k1
118; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
119; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
120; AVX512-NEXT:    vmovmskps %xmm0, %ecx
121; AVX512-NEXT:    xorl %eax, %eax
122; AVX512-NEXT:    cmpl $15, %ecx
123; AVX512-NEXT:    sete %al
124; AVX512-NEXT:    negq %rax
125; AVX512-NEXT:    vzeroupper
126; AVX512-NEXT:    retq
127  %c = fcmp ogt <4 x double> %a0, %a1
128  %s = sext <4 x i1> %c to <4 x i32>
129  %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
130  %2 = and <4 x i32> %s, %1
131  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
132  %4 = and <4 x i32> %2, %3
133  %5 = extractelement <4 x i32> %4, i64 0
134  %6 = sext i32 %5 to i64
135  ret i64 %6
136}
137
138define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) {
139; SSE-LABEL: test_v4f32_sext:
140; SSE:       # %bb.0:
141; SSE-NEXT:    cmpltps %xmm0, %xmm1
142; SSE-NEXT:    movmskps %xmm1, %ecx
143; SSE-NEXT:    xorl %eax, %eax
144; SSE-NEXT:    cmpl $15, %ecx
145; SSE-NEXT:    sete %al
146; SSE-NEXT:    negl %eax
147; SSE-NEXT:    retq
148;
149; AVX-LABEL: test_v4f32_sext:
150; AVX:       # %bb.0:
151; AVX-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
152; AVX-NEXT:    vmovmskps %xmm0, %ecx
153; AVX-NEXT:    xorl %eax, %eax
154; AVX-NEXT:    cmpl $15, %ecx
155; AVX-NEXT:    sete %al
156; AVX-NEXT:    negl %eax
157; AVX-NEXT:    retq
158;
159; AVX512-LABEL: test_v4f32_sext:
160; AVX512:       # %bb.0:
161; AVX512-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
162; AVX512-NEXT:    vmovmskps %xmm0, %ecx
163; AVX512-NEXT:    xorl %eax, %eax
164; AVX512-NEXT:    cmpl $15, %ecx
165; AVX512-NEXT:    sete %al
166; AVX512-NEXT:    negl %eax
167; AVX512-NEXT:    retq
168  %c = fcmp ogt <4 x float> %a0, %a1
169  %s = sext <4 x i1> %c to <4 x i32>
170  %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
171  %2 = and <4 x i32> %s, %1
172  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
173  %4 = and <4 x i32> %2, %3
174  %5 = extractelement <4 x i32> %4, i32 0
175  ret i32 %5
176}
177
178define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
179; SSE-LABEL: test_v8f32_sext:
180; SSE:       # %bb.0:
181; SSE-NEXT:    cmpltps %xmm1, %xmm3
182; SSE-NEXT:    cmpltps %xmm0, %xmm2
183; SSE-NEXT:    andps %xmm3, %xmm2
184; SSE-NEXT:    movmskps %xmm2, %ecx
185; SSE-NEXT:    xorl %eax, %eax
186; SSE-NEXT:    cmpl $15, %ecx
187; SSE-NEXT:    sete %al
188; SSE-NEXT:    negl %eax
189; SSE-NEXT:    retq
190;
191; AVX-LABEL: test_v8f32_sext:
192; AVX:       # %bb.0:
193; AVX-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
194; AVX-NEXT:    vmovmskps %ymm0, %ecx
195; AVX-NEXT:    xorl %eax, %eax
196; AVX-NEXT:    cmpl $255, %ecx
197; AVX-NEXT:    sete %al
198; AVX-NEXT:    negl %eax
199; AVX-NEXT:    vzeroupper
200; AVX-NEXT:    retq
201;
202; AVX512-LABEL: test_v8f32_sext:
203; AVX512:       # %bb.0:
204; AVX512-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
205; AVX512-NEXT:    vmovmskps %ymm0, %ecx
206; AVX512-NEXT:    xorl %eax, %eax
207; AVX512-NEXT:    cmpl $255, %ecx
208; AVX512-NEXT:    sete %al
209; AVX512-NEXT:    negl %eax
210; AVX512-NEXT:    vzeroupper
211; AVX512-NEXT:    retq
212  %c = fcmp ogt <8 x float> %a0, %a1
213  %s = sext <8 x i1> %c to <8 x i32>
214  %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
215  %2 = and <8 x i32> %s, %1
216  %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
217  %4 = and <8 x i32> %2, %3
218  %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
219  %6 = and <8 x i32> %4, %5
220  %7 = extractelement <8 x i32> %6, i32 0
221  ret i32 %7
222}
223
224define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
225; SSE-LABEL: test_v8f32_legal_sext:
226; SSE:       # %bb.0:
227; SSE-NEXT:    cmpltps %xmm1, %xmm3
228; SSE-NEXT:    cmpltps %xmm0, %xmm2
229; SSE-NEXT:    packssdw %xmm3, %xmm2
230; SSE-NEXT:    pmovmskb %xmm2, %ecx
231; SSE-NEXT:    xorl %eax, %eax
232; SSE-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
233; SSE-NEXT:    sete %al
234; SSE-NEXT:    negl %eax
235; SSE-NEXT:    retq
236;
237; AVX-LABEL: test_v8f32_legal_sext:
238; AVX:       # %bb.0:
239; AVX-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
240; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
241; AVX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
242; AVX-NEXT:    vpmovmskb %xmm0, %ecx
243; AVX-NEXT:    xorl %eax, %eax
244; AVX-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
245; AVX-NEXT:    sete %al
246; AVX-NEXT:    negl %eax
247; AVX-NEXT:    vzeroupper
248; AVX-NEXT:    retq
249;
250; AVX512-LABEL: test_v8f32_legal_sext:
251; AVX512:       # %bb.0:
252; AVX512-NEXT:    vcmpltps %ymm0, %ymm1, %k0
253; AVX512-NEXT:    vpmovm2w %k0, %xmm0
254; AVX512-NEXT:    vpmovmskb %xmm0, %ecx
255; AVX512-NEXT:    xorl %eax, %eax
256; AVX512-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
257; AVX512-NEXT:    sete %al
258; AVX512-NEXT:    negl %eax
259; AVX512-NEXT:    vzeroupper
260; AVX512-NEXT:    retq
261  %c = fcmp ogt <8 x float> %a0, %a1
262  %s = sext <8 x i1> %c to <8 x i16>
263  %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
264  %2 = and <8 x i16> %s, %1
265  %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
266  %4 = and <8 x i16> %2, %3
267  %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
268  %6 = and <8 x i16> %4, %5
269  %7 = extractelement <8 x i16> %6, i32 0
270  %8 = sext i16 %7 to i32
271  ret i32 %8
272}
273
274define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
275; SSE-LABEL: test_v2i64_sext:
276; SSE:       # %bb.0:
277; SSE-NEXT:    pcmpgtq %xmm1, %xmm0
278; SSE-NEXT:    movmskpd %xmm0, %ecx
279; SSE-NEXT:    xorl %eax, %eax
280; SSE-NEXT:    cmpl $3, %ecx
281; SSE-NEXT:    sete %al
282; SSE-NEXT:    negq %rax
283; SSE-NEXT:    retq
284;
285; AVX-LABEL: test_v2i64_sext:
286; AVX:       # %bb.0:
287; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
288; AVX-NEXT:    vmovmskpd %xmm0, %ecx
289; AVX-NEXT:    xorl %eax, %eax
290; AVX-NEXT:    cmpl $3, %ecx
291; AVX-NEXT:    sete %al
292; AVX-NEXT:    negq %rax
293; AVX-NEXT:    retq
294;
295; AVX512-LABEL: test_v2i64_sext:
296; AVX512:       # %bb.0:
297; AVX512-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
298; AVX512-NEXT:    vmovmskpd %xmm0, %ecx
299; AVX512-NEXT:    xorl %eax, %eax
300; AVX512-NEXT:    cmpl $3, %ecx
301; AVX512-NEXT:    sete %al
302; AVX512-NEXT:    negq %rax
303; AVX512-NEXT:    retq
304  %c = icmp sgt <2 x i64> %a0, %a1
305  %s = sext <2 x i1> %c to <2 x i64>
306  %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
307  %2 = and <2 x i64> %s, %1
308  %3 = extractelement <2 x i64> %2, i32 0
309  ret i64 %3
310}
311
312define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
313; SSE-LABEL: test_v4i64_sext:
314; SSE:       # %bb.0:
315; SSE-NEXT:    pcmpgtq %xmm3, %xmm1
316; SSE-NEXT:    pcmpgtq %xmm2, %xmm0
317; SSE-NEXT:    pand %xmm1, %xmm0
318; SSE-NEXT:    movmskpd %xmm0, %ecx
319; SSE-NEXT:    xorl %eax, %eax
320; SSE-NEXT:    cmpl $3, %ecx
321; SSE-NEXT:    sete %al
322; SSE-NEXT:    negq %rax
323; SSE-NEXT:    retq
324;
325; AVX1-LABEL: test_v4i64_sext:
326; AVX1:       # %bb.0:
327; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
328; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
329; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
330; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
331; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
332; AVX1-NEXT:    vmovmskpd %ymm0, %ecx
333; AVX1-NEXT:    xorl %eax, %eax
334; AVX1-NEXT:    cmpl $15, %ecx
335; AVX1-NEXT:    sete %al
336; AVX1-NEXT:    negq %rax
337; AVX1-NEXT:    vzeroupper
338; AVX1-NEXT:    retq
339;
340; AVX2-LABEL: test_v4i64_sext:
341; AVX2:       # %bb.0:
342; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
343; AVX2-NEXT:    vmovmskpd %ymm0, %ecx
344; AVX2-NEXT:    xorl %eax, %eax
345; AVX2-NEXT:    cmpl $15, %ecx
346; AVX2-NEXT:    sete %al
347; AVX2-NEXT:    negq %rax
348; AVX2-NEXT:    vzeroupper
349; AVX2-NEXT:    retq
350;
351; AVX512-LABEL: test_v4i64_sext:
352; AVX512:       # %bb.0:
353; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
354; AVX512-NEXT:    vmovmskpd %ymm0, %ecx
355; AVX512-NEXT:    xorl %eax, %eax
356; AVX512-NEXT:    cmpl $15, %ecx
357; AVX512-NEXT:    sete %al
358; AVX512-NEXT:    negq %rax
359; AVX512-NEXT:    vzeroupper
360; AVX512-NEXT:    retq
361  %c = icmp sgt <4 x i64> %a0, %a1
362  %s = sext <4 x i1> %c to <4 x i64>
363  %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
364  %2 = and <4 x i64> %s, %1
365  %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
366  %4 = and <4 x i64> %2, %3
367  %5 = extractelement <4 x i64> %4, i64 0
368  ret i64 %5
369}
370
371define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
372; SSE-LABEL: test_v4i64_legal_sext:
373; SSE:       # %bb.0:
374; SSE-NEXT:    pcmpgtq %xmm3, %xmm1
375; SSE-NEXT:    pcmpgtq %xmm2, %xmm0
376; SSE-NEXT:    packssdw %xmm1, %xmm0
377; SSE-NEXT:    movmskps %xmm0, %ecx
378; SSE-NEXT:    xorl %eax, %eax
379; SSE-NEXT:    cmpl $15, %ecx
380; SSE-NEXT:    sete %al
381; SSE-NEXT:    negq %rax
382; SSE-NEXT:    retq
383;
384; AVX1-LABEL: test_v4i64_legal_sext:
385; AVX1:       # %bb.0:
386; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
387; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
388; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
389; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
390; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
391; AVX1-NEXT:    vmovmskps %xmm0, %ecx
392; AVX1-NEXT:    xorl %eax, %eax
393; AVX1-NEXT:    cmpl $15, %ecx
394; AVX1-NEXT:    sete %al
395; AVX1-NEXT:    negq %rax
396; AVX1-NEXT:    vzeroupper
397; AVX1-NEXT:    retq
398;
399; AVX2-LABEL: test_v4i64_legal_sext:
400; AVX2:       # %bb.0:
401; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
402; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
403; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
404; AVX2-NEXT:    vmovmskps %xmm0, %ecx
405; AVX2-NEXT:    xorl %eax, %eax
406; AVX2-NEXT:    cmpl $15, %ecx
407; AVX2-NEXT:    sete %al
408; AVX2-NEXT:    negq %rax
409; AVX2-NEXT:    vzeroupper
410; AVX2-NEXT:    retq
411;
412; AVX512-LABEL: test_v4i64_legal_sext:
413; AVX512:       # %bb.0:
414; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm0, %k1
415; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
416; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
417; AVX512-NEXT:    vmovmskps %xmm0, %ecx
418; AVX512-NEXT:    xorl %eax, %eax
419; AVX512-NEXT:    cmpl $15, %ecx
420; AVX512-NEXT:    sete %al
421; AVX512-NEXT:    negq %rax
422; AVX512-NEXT:    vzeroupper
423; AVX512-NEXT:    retq
424  %c = icmp sgt <4 x i64> %a0, %a1
425  %s = sext <4 x i1> %c to <4 x i32>
426  %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
427  %2 = and <4 x i32> %s, %1
428  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
429  %4 = and <4 x i32> %2, %3
430  %5 = extractelement <4 x i32> %4, i64 0
431  %6 = sext i32 %5 to i64
432  ret i64 %6
433}
434
435define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) {
436; SSE-LABEL: test_v4i32_sext:
437; SSE:       # %bb.0:
438; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
439; SSE-NEXT:    movmskps %xmm0, %ecx
440; SSE-NEXT:    xorl %eax, %eax
441; SSE-NEXT:    cmpl $15, %ecx
442; SSE-NEXT:    sete %al
443; SSE-NEXT:    negl %eax
444; SSE-NEXT:    retq
445;
446; AVX-LABEL: test_v4i32_sext:
447; AVX:       # %bb.0:
448; AVX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
449; AVX-NEXT:    vmovmskps %xmm0, %ecx
450; AVX-NEXT:    xorl %eax, %eax
451; AVX-NEXT:    cmpl $15, %ecx
452; AVX-NEXT:    sete %al
453; AVX-NEXT:    negl %eax
454; AVX-NEXT:    retq
455;
456; AVX512-LABEL: test_v4i32_sext:
457; AVX512:       # %bb.0:
458; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
459; AVX512-NEXT:    vmovmskps %xmm0, %ecx
460; AVX512-NEXT:    xorl %eax, %eax
461; AVX512-NEXT:    cmpl $15, %ecx
462; AVX512-NEXT:    sete %al
463; AVX512-NEXT:    negl %eax
464; AVX512-NEXT:    retq
465  %c = icmp sgt <4 x i32> %a0, %a1
466  %s = sext <4 x i1> %c to <4 x i32>
467  %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
468  %2 = and <4 x i32> %s, %1
469  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
470  %4 = and <4 x i32> %2, %3
471  %5 = extractelement <4 x i32> %4, i32 0
472  ret i32 %5
473}
474
475define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
476; SSE-LABEL: test_v8i32_sext:
477; SSE:       # %bb.0:
478; SSE-NEXT:    pcmpgtd %xmm3, %xmm1
479; SSE-NEXT:    pcmpgtd %xmm2, %xmm0
480; SSE-NEXT:    pand %xmm1, %xmm0
481; SSE-NEXT:    movmskps %xmm0, %ecx
482; SSE-NEXT:    xorl %eax, %eax
483; SSE-NEXT:    cmpl $15, %ecx
484; SSE-NEXT:    sete %al
485; SSE-NEXT:    negl %eax
486; SSE-NEXT:    retq
487;
488; AVX1-LABEL: test_v8i32_sext:
489; AVX1:       # %bb.0:
490; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
491; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
492; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm3, %xmm2
493; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
494; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
495; AVX1-NEXT:    vmovmskps %ymm0, %ecx
496; AVX1-NEXT:    xorl %eax, %eax
497; AVX1-NEXT:    cmpl $255, %ecx
498; AVX1-NEXT:    sete %al
499; AVX1-NEXT:    negl %eax
500; AVX1-NEXT:    vzeroupper
501; AVX1-NEXT:    retq
502;
503; AVX2-LABEL: test_v8i32_sext:
504; AVX2:       # %bb.0:
505; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
506; AVX2-NEXT:    vmovmskps %ymm0, %ecx
507; AVX2-NEXT:    xorl %eax, %eax
508; AVX2-NEXT:    cmpl $255, %ecx
509; AVX2-NEXT:    sete %al
510; AVX2-NEXT:    negl %eax
511; AVX2-NEXT:    vzeroupper
512; AVX2-NEXT:    retq
513;
514; AVX512-LABEL: test_v8i32_sext:
515; AVX512:       # %bb.0:
516; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
517; AVX512-NEXT:    vmovmskps %ymm0, %ecx
518; AVX512-NEXT:    xorl %eax, %eax
519; AVX512-NEXT:    cmpl $255, %ecx
520; AVX512-NEXT:    sete %al
521; AVX512-NEXT:    negl %eax
522; AVX512-NEXT:    vzeroupper
523; AVX512-NEXT:    retq
524  %c = icmp sgt <8 x i32> %a0, %a1
525  %s = sext <8 x i1> %c to <8 x i32>
526  %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
527  %2 = and <8 x i32> %s, %1
528  %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
529  %4 = and <8 x i32> %2, %3
530  %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
531  %6 = and <8 x i32> %4, %5
532  %7 = extractelement <8 x i32> %6, i32 0
533  ret i32 %7
534}
535
536define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
537; SSE-LABEL: test_v8i32_legal_sext:
538; SSE:       # %bb.0:
539; SSE-NEXT:    pcmpgtd %xmm3, %xmm1
540; SSE-NEXT:    pcmpgtd %xmm2, %xmm0
541; SSE-NEXT:    packssdw %xmm1, %xmm0
542; SSE-NEXT:    pmovmskb %xmm0, %ecx
543; SSE-NEXT:    xorl %eax, %eax
544; SSE-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
545; SSE-NEXT:    sete %al
546; SSE-NEXT:    negl %eax
547; SSE-NEXT:    retq
548;
549; AVX1-LABEL: test_v8i32_legal_sext:
550; AVX1:       # %bb.0:
551; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
552; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
553; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm3, %xmm2
554; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
555; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
556; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
557; AVX1-NEXT:    xorl %eax, %eax
558; AVX1-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
559; AVX1-NEXT:    sete %al
560; AVX1-NEXT:    negl %eax
561; AVX1-NEXT:    vzeroupper
562; AVX1-NEXT:    retq
563;
564; AVX2-LABEL: test_v8i32_legal_sext:
565; AVX2:       # %bb.0:
566; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
567; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
568; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
569; AVX2-NEXT:    vpmovmskb %xmm0, %ecx
570; AVX2-NEXT:    xorl %eax, %eax
571; AVX2-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
572; AVX2-NEXT:    sete %al
573; AVX2-NEXT:    negl %eax
574; AVX2-NEXT:    vzeroupper
575; AVX2-NEXT:    retq
576;
577; AVX512-LABEL: test_v8i32_legal_sext:
578; AVX512:       # %bb.0:
579; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
580; AVX512-NEXT:    vpmovm2w %k0, %xmm0
581; AVX512-NEXT:    vpmovmskb %xmm0, %ecx
582; AVX512-NEXT:    xorl %eax, %eax
583; AVX512-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
584; AVX512-NEXT:    sete %al
585; AVX512-NEXT:    negl %eax
586; AVX512-NEXT:    vzeroupper
587; AVX512-NEXT:    retq
588  %c = icmp sgt <8 x i32> %a0, %a1
589  %s = sext <8 x i1> %c to <8 x i16>
590  %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
591  %2 = and <8 x i16> %s, %1
592  %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
593  %4 = and <8 x i16> %2, %3
594  %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
595  %6 = and <8 x i16> %4, %5
596  %7 = extractelement <8 x i16> %6, i32 0
597  %8 = sext i16 %7 to i32
598  ret i32 %8
599}
600
601define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
602; SSE-LABEL: test_v8i16_sext:
603; SSE:       # %bb.0:
604; SSE-NEXT:    pcmpgtw %xmm1, %xmm0
605; SSE-NEXT:    pmovmskb %xmm0, %ecx
606; SSE-NEXT:    xorl %eax, %eax
607; SSE-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
608; SSE-NEXT:    sete %al
609; SSE-NEXT:    negl %eax
610; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
611; SSE-NEXT:    retq
612;
613; AVX-LABEL: test_v8i16_sext:
614; AVX:       # %bb.0:
615; AVX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
616; AVX-NEXT:    vpmovmskb %xmm0, %ecx
617; AVX-NEXT:    xorl %eax, %eax
618; AVX-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
619; AVX-NEXT:    sete %al
620; AVX-NEXT:    negl %eax
621; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
622; AVX-NEXT:    retq
623;
624; AVX512-LABEL: test_v8i16_sext:
625; AVX512:       # %bb.0:
626; AVX512-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
627; AVX512-NEXT:    vpmovmskb %xmm0, %ecx
628; AVX512-NEXT:    xorl %eax, %eax
629; AVX512-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
630; AVX512-NEXT:    sete %al
631; AVX512-NEXT:    negl %eax
632; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
633; AVX512-NEXT:    retq
634  %c = icmp sgt <8 x i16> %a0, %a1
635  %s = sext <8 x i1> %c to <8 x i16>
636  %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
637  %2 = and <8 x i16> %s, %1
638  %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
639  %4 = and <8 x i16> %2, %3
640  %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
641  %6 = and <8 x i16> %4, %5
642  %7 = extractelement <8 x i16> %6, i32 0
643  ret i16 %7
644}
645
646define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
647; SSE-LABEL: test_v16i16_sext:
648; SSE:       # %bb.0:
649; SSE-NEXT:    pcmpgtw %xmm3, %xmm1
650; SSE-NEXT:    pcmpgtw %xmm2, %xmm0
651; SSE-NEXT:    pand %xmm1, %xmm0
652; SSE-NEXT:    pmovmskb %xmm0, %ecx
653; SSE-NEXT:    xorl %eax, %eax
654; SSE-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
655; SSE-NEXT:    sete %al
656; SSE-NEXT:    negl %eax
657; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
658; SSE-NEXT:    retq
659;
660; AVX1-LABEL: test_v16i16_sext:
661; AVX1:       # %bb.0:
662; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
663; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
664; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm2
665; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
666; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
667; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
668; AVX1-NEXT:    xorl %eax, %eax
669; AVX1-NEXT:    cmpl $65535, %ecx # imm = 0xFFFF
670; AVX1-NEXT:    sete %al
671; AVX1-NEXT:    negl %eax
672; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
673; AVX1-NEXT:    vzeroupper
674; AVX1-NEXT:    retq
675;
676; AVX2-LABEL: test_v16i16_sext:
677; AVX2:       # %bb.0:
678; AVX2-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
679; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
680; AVX2-NEXT:    xorl %eax, %eax
681; AVX2-NEXT:    cmpl $-1, %ecx
682; AVX2-NEXT:    sete %al
683; AVX2-NEXT:    negl %eax
684; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
685; AVX2-NEXT:    vzeroupper
686; AVX2-NEXT:    retq
687;
688; AVX512-LABEL: test_v16i16_sext:
689; AVX512:       # %bb.0:
690; AVX512-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
691; AVX512-NEXT:    vpmovmskb %ymm0, %ecx
692; AVX512-NEXT:    xorl %eax, %eax
693; AVX512-NEXT:    cmpl $-1, %ecx
694; AVX512-NEXT:    sete %al
695; AVX512-NEXT:    negl %eax
696; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
697; AVX512-NEXT:    vzeroupper
698; AVX512-NEXT:    retq
699  %c = icmp sgt <16 x i16> %a0, %a1
700  %s = sext <16 x i1> %c to <16 x i16>
701  %1 = shufflevector <16 x i16> %s, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
702  %2 = and <16 x i16> %s, %1
703  %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
704  %4 = and <16 x i16> %2, %3
705  %5 = shufflevector <16 x i16> %4, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
706  %6 = and <16 x i16> %4, %5
707  %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
708  %8 = and <16 x i16> %6, %7
709  %9 = extractelement <16 x i16> %8, i32 0
710  ret i16 %9
711}
712
713define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
714; SSE-LABEL: test_v16i16_legal_sext:
715; SSE:       # %bb.0:
716; SSE-NEXT:    pcmpgtw %xmm3, %xmm1
717; SSE-NEXT:    pcmpgtw %xmm2, %xmm0
718; SSE-NEXT:    packsswb %xmm1, %xmm0
719; SSE-NEXT:    pmovmskb %xmm0, %eax
720; SSE-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
721; SSE-NEXT:    sete %al
722; SSE-NEXT:    negb %al
723; SSE-NEXT:    movsbl %al, %eax
724; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
725; SSE-NEXT:    retq
726;
727; AVX1-LABEL: test_v16i16_legal_sext:
728; AVX1:       # %bb.0:
729; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
730; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
731; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm2
732; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
733; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
734; AVX1-NEXT:    vpmovmskb %xmm0, %eax
735; AVX1-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
736; AVX1-NEXT:    sete %al
737; AVX1-NEXT:    negb %al
738; AVX1-NEXT:    movsbl %al, %eax
739; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
740; AVX1-NEXT:    vzeroupper
741; AVX1-NEXT:    retq
742;
743; AVX2-LABEL: test_v16i16_legal_sext:
744; AVX2:       # %bb.0:
745; AVX2-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
746; AVX2-NEXT:    vpmovmskb %ymm0, %eax
747; AVX2-NEXT:    cmpl $-1, %eax
748; AVX2-NEXT:    sete %al
749; AVX2-NEXT:    negb %al
750; AVX2-NEXT:    movsbl %al, %eax
751; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
752; AVX2-NEXT:    vzeroupper
753; AVX2-NEXT:    retq
754;
755; AVX512-LABEL: test_v16i16_legal_sext:
756; AVX512:       # %bb.0:
757; AVX512-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0
758; AVX512-NEXT:    vpmovm2b %k0, %xmm0
759; AVX512-NEXT:    vpmovmskb %xmm0, %eax
760; AVX512-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
761; AVX512-NEXT:    sete %al
762; AVX512-NEXT:    negb %al
763; AVX512-NEXT:    movsbl %al, %eax
764; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
765; AVX512-NEXT:    vzeroupper
766; AVX512-NEXT:    retq
767  %c  = icmp sgt <16 x i16> %a0, %a1
768  %s  = sext <16 x i1> %c to <16 x i8>
769  %1  = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
770  %2  = and <16 x i8> %s, %1
771  %3  = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
772  %4  = and <16 x i8> %2, %3
773  %5  = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
774  %6  = and <16 x i8> %4, %5
775  %7  = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
776  %8  = and <16 x i8> %6, %7
777  %9  = extractelement <16 x i8> %8, i32 0
778  %10 = sext i8 %9 to i16
779  ret i16 %10
780}
781
782define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
783; SSE-LABEL: test_v16i8_sext:
784; SSE:       # %bb.0:
785; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
786; SSE-NEXT:    pmovmskb %xmm0, %eax
787; SSE-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
788; SSE-NEXT:    sete %al
789; SSE-NEXT:    negb %al
790; SSE-NEXT:    retq
791;
792; AVX-LABEL: test_v16i8_sext:
793; AVX:       # %bb.0:
794; AVX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
795; AVX-NEXT:    vpmovmskb %xmm0, %eax
796; AVX-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
797; AVX-NEXT:    sete %al
798; AVX-NEXT:    negb %al
799; AVX-NEXT:    retq
800;
801; AVX512-LABEL: test_v16i8_sext:
802; AVX512:       # %bb.0:
803; AVX512-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
804; AVX512-NEXT:    vpmovmskb %xmm0, %eax
805; AVX512-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
806; AVX512-NEXT:    sete %al
807; AVX512-NEXT:    negb %al
808; AVX512-NEXT:    retq
809  %c = icmp sgt <16 x i8> %a0, %a1
810  %s = sext <16 x i1> %c to <16 x i8>
811  %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
812  %2 = and <16 x i8> %s, %1
813  %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
814  %4 = and <16 x i8> %2, %3
815  %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
816  %6 = and <16 x i8> %4, %5
817  %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
818  %8 = and <16 x i8> %6, %7
819  %9 = extractelement <16 x i8> %8, i32 0
820  ret i8 %9
821}
822
823define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
824; SSE-LABEL: test_v32i8_sext:
825; SSE:       # %bb.0:
826; SSE-NEXT:    pcmpgtb %xmm3, %xmm1
827; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
828; SSE-NEXT:    pand %xmm1, %xmm0
829; SSE-NEXT:    pmovmskb %xmm0, %eax
830; SSE-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
831; SSE-NEXT:    sete %al
832; SSE-NEXT:    negb %al
833; SSE-NEXT:    retq
834;
835; AVX1-LABEL: test_v32i8_sext:
836; AVX1:       # %bb.0:
837; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
838; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
839; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm3, %xmm2
840; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
841; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
842; AVX1-NEXT:    vpmovmskb %xmm0, %eax
843; AVX1-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
844; AVX1-NEXT:    sete %al
845; AVX1-NEXT:    negb %al
846; AVX1-NEXT:    vzeroupper
847; AVX1-NEXT:    retq
848;
849; AVX2-LABEL: test_v32i8_sext:
850; AVX2:       # %bb.0:
851; AVX2-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
852; AVX2-NEXT:    vpmovmskb %ymm0, %eax
853; AVX2-NEXT:    cmpl $-1, %eax
854; AVX2-NEXT:    sete %al
855; AVX2-NEXT:    negb %al
856; AVX2-NEXT:    vzeroupper
857; AVX2-NEXT:    retq
858;
859; AVX512-LABEL: test_v32i8_sext:
860; AVX512:       # %bb.0:
861; AVX512-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
862; AVX512-NEXT:    vpmovmskb %ymm0, %eax
863; AVX512-NEXT:    cmpl $-1, %eax
864; AVX512-NEXT:    sete %al
865; AVX512-NEXT:    negb %al
866; AVX512-NEXT:    vzeroupper
867; AVX512-NEXT:    retq
868  %c  = icmp sgt <32 x i8> %a0, %a1
869  %s  = sext <32 x i1> %c to <32 x i8>
870  %1  = shufflevector <32 x i8> %s, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
871  %2  = and <32 x i8> %s, %1
872  %3  = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
873  %4  = and <32 x i8> %2, %3
874  %5  = shufflevector <32 x i8> %4, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
875  %6  = and <32 x i8> %4, %5
876  %7  = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
877  %8  = and <32 x i8> %6, %7
878  %9  = shufflevector <32 x i8> %8, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
879  %10 = and <32 x i8> %8, %9
880  %11 = extractelement <32 x i8> %10, i32 0
881  ret i8 %11
882}
883
884define i1 @bool_reduction_v2f64(<2 x double> %x, <2 x double> %y) {
885; SSE-LABEL: bool_reduction_v2f64:
886; SSE:       # %bb.0:
887; SSE-NEXT:    cmpltpd %xmm0, %xmm1
888; SSE-NEXT:    movmskpd %xmm1, %eax
889; SSE-NEXT:    cmpb $3, %al
890; SSE-NEXT:    sete %al
891; SSE-NEXT:    retq
892;
893; AVX-LABEL: bool_reduction_v2f64:
894; AVX:       # %bb.0:
895; AVX-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
896; AVX-NEXT:    vmovmskpd %xmm0, %eax
897; AVX-NEXT:    cmpb $3, %al
898; AVX-NEXT:    sete %al
899; AVX-NEXT:    retq
900;
901; AVX512-LABEL: bool_reduction_v2f64:
902; AVX512:       # %bb.0:
903; AVX512-NEXT:    vcmpltpd %xmm0, %xmm1, %k0
904; AVX512-NEXT:    kmovd %k0, %eax
905; AVX512-NEXT:    cmpb $3, %al
906; AVX512-NEXT:    sete %al
907; AVX512-NEXT:    retq
908  %a = fcmp ogt <2 x double> %x, %y
909  %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
910  %c = and <2 x i1> %a, %b
911  %d = extractelement <2 x i1> %c, i32 0
912  ret i1 %d
913}
914
915define i1 @bool_reduction_v4f32(<4 x float> %x, <4 x float> %y) {
916; SSE-LABEL: bool_reduction_v4f32:
917; SSE:       # %bb.0:
918; SSE-NEXT:    cmpeqps %xmm1, %xmm0
919; SSE-NEXT:    movmskps %xmm0, %eax
920; SSE-NEXT:    cmpb $15, %al
921; SSE-NEXT:    sete %al
922; SSE-NEXT:    retq
923;
924; AVX-LABEL: bool_reduction_v4f32:
925; AVX:       # %bb.0:
926; AVX-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm0
927; AVX-NEXT:    vmovmskps %xmm0, %eax
928; AVX-NEXT:    cmpb $15, %al
929; AVX-NEXT:    sete %al
930; AVX-NEXT:    retq
931;
932; AVX512-LABEL: bool_reduction_v4f32:
933; AVX512:       # %bb.0:
934; AVX512-NEXT:    vcmpeqps %xmm1, %xmm0, %k0
935; AVX512-NEXT:    kmovd %k0, %eax
936; AVX512-NEXT:    cmpb $15, %al
937; AVX512-NEXT:    sete %al
938; AVX512-NEXT:    retq
939  %a = fcmp oeq <4 x float> %x, %y
940  %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
941  %b = and <4 x i1> %s1, %a
942  %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
943  %c = and <4 x i1> %s2, %b
944  %d = extractelement <4 x i1> %c, i32 0
945  ret i1 %d
946}
947
948define i1 @bool_reduction_v4f64(<4 x double> %x, <4 x double> %y) {
949; SSE-LABEL: bool_reduction_v4f64:
950; SSE:       # %bb.0:
951; SSE-NEXT:    cmplepd %xmm1, %xmm3
952; SSE-NEXT:    cmplepd %xmm0, %xmm2
953; SSE-NEXT:    packssdw %xmm3, %xmm2
954; SSE-NEXT:    movmskps %xmm2, %eax
955; SSE-NEXT:    cmpb $15, %al
956; SSE-NEXT:    sete %al
957; SSE-NEXT:    retq
958;
959; AVX-LABEL: bool_reduction_v4f64:
960; AVX:       # %bb.0:
961; AVX-NEXT:    vcmplepd %ymm0, %ymm1, %ymm0
962; AVX-NEXT:    vmovmskpd %ymm0, %eax
963; AVX-NEXT:    cmpb $15, %al
964; AVX-NEXT:    sete %al
965; AVX-NEXT:    vzeroupper
966; AVX-NEXT:    retq
967;
968; AVX512-LABEL: bool_reduction_v4f64:
969; AVX512:       # %bb.0:
970; AVX512-NEXT:    vcmplepd %ymm0, %ymm1, %k0
971; AVX512-NEXT:    kmovd %k0, %eax
972; AVX512-NEXT:    cmpb $15, %al
973; AVX512-NEXT:    sete %al
974; AVX512-NEXT:    vzeroupper
975; AVX512-NEXT:    retq
976  %a = fcmp oge <4 x double> %x, %y
977  %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
978  %b = and <4 x i1> %s1, %a
979  %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
980  %c = and <4 x i1> %s2, %b
981  %d = extractelement <4 x i1> %c, i32 0
982  ret i1 %d
983}
984
985define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) {
986; SSE-LABEL: bool_reduction_v8f32:
987; SSE:       # %bb.0:
988; SSE-NEXT:    cmpneqps %xmm3, %xmm1
989; SSE-NEXT:    cmpneqps %xmm2, %xmm0
990; SSE-NEXT:    packssdw %xmm1, %xmm0
991; SSE-NEXT:    packsswb %xmm0, %xmm0
992; SSE-NEXT:    pmovmskb %xmm0, %eax
993; SSE-NEXT:    cmpb $-1, %al
994; SSE-NEXT:    sete %al
995; SSE-NEXT:    retq
996;
997; AVX-LABEL: bool_reduction_v8f32:
998; AVX:       # %bb.0:
999; AVX-NEXT:    vcmpneqps %ymm1, %ymm0, %ymm0
1000; AVX-NEXT:    vmovmskps %ymm0, %eax
1001; AVX-NEXT:    cmpb $-1, %al
1002; AVX-NEXT:    sete %al
1003; AVX-NEXT:    vzeroupper
1004; AVX-NEXT:    retq
1005;
1006; AVX512-LABEL: bool_reduction_v8f32:
1007; AVX512:       # %bb.0:
1008; AVX512-NEXT:    vcmpneqps %ymm1, %ymm0, %k0
1009; AVX512-NEXT:    kmovd %k0, %eax
1010; AVX512-NEXT:    cmpb $-1, %al
1011; AVX512-NEXT:    sete %al
1012; AVX512-NEXT:    vzeroupper
1013; AVX512-NEXT:    retq
1014  %a = fcmp une <8 x float> %x, %y
1015  %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1016  %b = and <8 x i1> %s1, %a
1017  %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1018  %c = and <8 x i1> %s2, %b
1019  %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1020  %d = and <8 x i1> %s3, %c
1021  %e = extractelement <8 x i1> %d, i32 0
1022  ret i1 %e
1023}
1024
1025define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) {
1026; SSE-LABEL: bool_reduction_v2i64:
1027; SSE:       # %bb.0:
1028; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1029; SSE-NEXT:    pxor %xmm2, %xmm1
1030; SSE-NEXT:    pxor %xmm2, %xmm0
1031; SSE-NEXT:    pcmpgtq %xmm1, %xmm0
1032; SSE-NEXT:    movmskpd %xmm0, %eax
1033; SSE-NEXT:    cmpb $3, %al
1034; SSE-NEXT:    sete %al
1035; SSE-NEXT:    retq
1036;
1037; AVX-LABEL: bool_reduction_v2i64:
1038; AVX:       # %bb.0:
1039; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1040; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm1
1041; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0
1042; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
1043; AVX-NEXT:    vmovmskpd %xmm0, %eax
1044; AVX-NEXT:    cmpb $3, %al
1045; AVX-NEXT:    sete %al
1046; AVX-NEXT:    retq
1047;
1048; AVX512-LABEL: bool_reduction_v2i64:
1049; AVX512:       # %bb.0:
1050; AVX512-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k0
1051; AVX512-NEXT:    kmovd %k0, %eax
1052; AVX512-NEXT:    cmpb $3, %al
1053; AVX512-NEXT:    sete %al
1054; AVX512-NEXT:    retq
1055  %a = icmp ugt <2 x i64> %x, %y
1056  %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
1057  %c = and <2 x i1> %a, %b
1058  %d = extractelement <2 x i1> %c, i32 0
1059  ret i1 %d
1060}
1061
1062define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
1063; SSE-LABEL: bool_reduction_v4i32:
1064; SSE:       # %bb.0:
1065; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
1066; SSE-NEXT:    movmskps %xmm0, %eax
1067; SSE-NEXT:    xorl $15, %eax
1068; SSE-NEXT:    cmpb $15, %al
1069; SSE-NEXT:    sete %al
1070; SSE-NEXT:    retq
1071;
1072; AVX-LABEL: bool_reduction_v4i32:
1073; AVX:       # %bb.0:
1074; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1075; AVX-NEXT:    vmovmskps %xmm0, %eax
1076; AVX-NEXT:    xorl $15, %eax
1077; AVX-NEXT:    cmpb $15, %al
1078; AVX-NEXT:    sete %al
1079; AVX-NEXT:    retq
1080;
1081; AVX512-LABEL: bool_reduction_v4i32:
1082; AVX512:       # %bb.0:
1083; AVX512-NEXT:    vpcmpneqd %xmm1, %xmm0, %k0
1084; AVX512-NEXT:    kmovd %k0, %eax
1085; AVX512-NEXT:    cmpb $15, %al
1086; AVX512-NEXT:    sete %al
1087; AVX512-NEXT:    retq
1088  %a = icmp ne <4 x i32> %x, %y
1089  %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1090  %b = and <4 x i1> %s1, %a
1091  %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1092  %c = and <4 x i1> %s2, %b
1093  %d = extractelement <4 x i1> %c, i32 0
1094  ret i1 %d
1095}
1096
1097define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) {
1098; SSE-LABEL: bool_reduction_v8i16:
1099; SSE:       # %bb.0:
1100; SSE-NEXT:    pcmpgtw %xmm0, %xmm1
1101; SSE-NEXT:    packsswb %xmm1, %xmm1
1102; SSE-NEXT:    pmovmskb %xmm1, %eax
1103; SSE-NEXT:    cmpb $-1, %al
1104; SSE-NEXT:    sete %al
1105; SSE-NEXT:    retq
1106;
1107; AVX-LABEL: bool_reduction_v8i16:
1108; AVX:       # %bb.0:
1109; AVX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
1110; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
1111; AVX-NEXT:    vpmovmskb %xmm0, %eax
1112; AVX-NEXT:    cmpb $-1, %al
1113; AVX-NEXT:    sete %al
1114; AVX-NEXT:    retq
1115;
1116; AVX512-LABEL: bool_reduction_v8i16:
1117; AVX512:       # %bb.0:
1118; AVX512-NEXT:    vpcmpgtw %xmm0, %xmm1, %k0
1119; AVX512-NEXT:    kmovd %k0, %eax
1120; AVX512-NEXT:    cmpb $-1, %al
1121; AVX512-NEXT:    sete %al
1122; AVX512-NEXT:    retq
1123  %a = icmp slt <8 x i16> %x, %y
1124  %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1125  %b = and <8 x i1> %s1, %a
1126  %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1127  %c = and <8 x i1> %s2, %b
1128  %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1129  %d = and <8 x i1> %s3, %c
1130  %e = extractelement <8 x i1> %d, i32 0
1131  ret i1 %e
1132}
1133
1134define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) {
1135; SSE-LABEL: bool_reduction_v16i8:
1136; SSE:       # %bb.0:
1137; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
1138; SSE-NEXT:    pmovmskb %xmm0, %eax
1139; SSE-NEXT:    cmpw $-1, %ax
1140; SSE-NEXT:    sete %al
1141; SSE-NEXT:    retq
1142;
1143; AVX-LABEL: bool_reduction_v16i8:
1144; AVX:       # %bb.0:
1145; AVX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
1146; AVX-NEXT:    vpmovmskb %xmm0, %eax
1147; AVX-NEXT:    cmpw $-1, %ax
1148; AVX-NEXT:    sete %al
1149; AVX-NEXT:    retq
1150;
1151; AVX512-LABEL: bool_reduction_v16i8:
1152; AVX512:       # %bb.0:
1153; AVX512-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0
1154; AVX512-NEXT:    kortestw %k0, %k0
1155; AVX512-NEXT:    setb %al
1156; AVX512-NEXT:    retq
1157  %a = icmp sgt <16 x i8> %x, %y
1158  %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1159  %b = and <16 x i1> %s1, %a
1160  %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1161  %c = and <16 x i1> %s2, %b
1162  %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1163  %d = and <16 x i1> %s3, %c
1164  %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1165  %e = and <16 x i1> %s4, %d
1166  %f = extractelement <16 x i1> %e, i32 0
1167  ret i1 %f
1168}
1169
1170define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
1171; SSE-LABEL: bool_reduction_v4i64:
1172; SSE:       # %bb.0:
1173; SSE-NEXT:    pcmpgtq %xmm1, %xmm3
1174; SSE-NEXT:    pcmpgtq %xmm0, %xmm2
1175; SSE-NEXT:    packssdw %xmm3, %xmm2
1176; SSE-NEXT:    movmskps %xmm2, %eax
1177; SSE-NEXT:    cmpb $15, %al
1178; SSE-NEXT:    sete %al
1179; SSE-NEXT:    retq
1180;
1181; AVX1-LABEL: bool_reduction_v4i64:
1182; AVX1:       # %bb.0:
1183; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1184; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
1185; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
1186; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
1187; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1188; AVX1-NEXT:    vmovmskpd %ymm0, %eax
1189; AVX1-NEXT:    cmpb $15, %al
1190; AVX1-NEXT:    sete %al
1191; AVX1-NEXT:    vzeroupper
1192; AVX1-NEXT:    retq
1193;
1194; AVX2-LABEL: bool_reduction_v4i64:
1195; AVX2:       # %bb.0:
1196; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
1197; AVX2-NEXT:    vmovmskpd %ymm0, %eax
1198; AVX2-NEXT:    cmpb $15, %al
1199; AVX2-NEXT:    sete %al
1200; AVX2-NEXT:    vzeroupper
1201; AVX2-NEXT:    retq
1202;
1203; AVX512-LABEL: bool_reduction_v4i64:
1204; AVX512:       # %bb.0:
1205; AVX512-NEXT:    vpcmpgtq %ymm0, %ymm1, %k0
1206; AVX512-NEXT:    kmovd %k0, %eax
1207; AVX512-NEXT:    cmpb $15, %al
1208; AVX512-NEXT:    sete %al
1209; AVX512-NEXT:    vzeroupper
1210; AVX512-NEXT:    retq
1211  %a = icmp slt <4 x i64> %x, %y
1212  %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1213  %b = and <4 x i1> %s1, %a
1214  %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1215  %c = and <4 x i1> %s2, %b
1216  %d = extractelement <4 x i1> %c, i32 0
1217  ret i1 %d
1218}
1219
1220define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
1221; SSE-LABEL: bool_reduction_v8i32:
1222; SSE:       # %bb.0:
1223; SSE-NEXT:    pminud %xmm1, %xmm3
1224; SSE-NEXT:    pcmpeqd %xmm1, %xmm3
1225; SSE-NEXT:    pminud %xmm0, %xmm2
1226; SSE-NEXT:    pcmpeqd %xmm0, %xmm2
1227; SSE-NEXT:    packssdw %xmm3, %xmm2
1228; SSE-NEXT:    packsswb %xmm2, %xmm2
1229; SSE-NEXT:    pmovmskb %xmm2, %eax
1230; SSE-NEXT:    cmpb $-1, %al
1231; SSE-NEXT:    sete %al
1232; SSE-NEXT:    retq
1233;
1234; AVX1-LABEL: bool_reduction_v8i32:
1235; AVX1:       # %bb.0:
1236; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1237; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1238; AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm2
1239; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm3, %xmm2
1240; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm1
1241; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
1242; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1243; AVX1-NEXT:    vmovmskps %ymm0, %eax
1244; AVX1-NEXT:    cmpb $-1, %al
1245; AVX1-NEXT:    sete %al
1246; AVX1-NEXT:    vzeroupper
1247; AVX1-NEXT:    retq
1248;
1249; AVX2-LABEL: bool_reduction_v8i32:
1250; AVX2:       # %bb.0:
1251; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm1
1252; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
1253; AVX2-NEXT:    vmovmskps %ymm0, %eax
1254; AVX2-NEXT:    cmpb $-1, %al
1255; AVX2-NEXT:    sete %al
1256; AVX2-NEXT:    vzeroupper
1257; AVX2-NEXT:    retq
1258;
1259; AVX512-LABEL: bool_reduction_v8i32:
1260; AVX512:       # %bb.0:
1261; AVX512-NEXT:    vpcmpleud %ymm1, %ymm0, %k0
1262; AVX512-NEXT:    kmovd %k0, %eax
1263; AVX512-NEXT:    cmpb $-1, %al
1264; AVX512-NEXT:    sete %al
1265; AVX512-NEXT:    vzeroupper
1266; AVX512-NEXT:    retq
1267  %a = icmp ule <8 x i32> %x, %y
1268  %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1269  %b = and <8 x i1> %s1, %a
1270  %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1271  %c = and <8 x i1> %s2, %b
1272  %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1273  %d = and <8 x i1> %s3, %c
1274  %e = extractelement <8 x i1> %d, i32 0
1275  ret i1 %e
1276}
1277
1278define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
1279; SSE-LABEL: bool_reduction_v16i16:
1280; SSE:       # %bb.0:
1281; SSE-NEXT:    pcmpeqw %xmm3, %xmm1
1282; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
1283; SSE-NEXT:    packsswb %xmm1, %xmm0
1284; SSE-NEXT:    pmovmskb %xmm0, %eax
1285; SSE-NEXT:    cmpw $-1, %ax
1286; SSE-NEXT:    sete %al
1287; SSE-NEXT:    retq
1288;
1289; AVX1-LABEL: bool_reduction_v16i16:
1290; AVX1:       # %bb.0:
1291; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1292; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1293; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm3, %xmm2
1294; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
1295; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
1296; AVX1-NEXT:    vpmovmskb %xmm0, %eax
1297; AVX1-NEXT:    cmpw $-1, %ax
1298; AVX1-NEXT:    sete %al
1299; AVX1-NEXT:    vzeroupper
1300; AVX1-NEXT:    retq
1301;
1302; AVX2-LABEL: bool_reduction_v16i16:
1303; AVX2:       # %bb.0:
1304; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
1305; AVX2-NEXT:    vpmovmskb %ymm0, %eax
1306; AVX2-NEXT:    cmpl $-1, %eax
1307; AVX2-NEXT:    sete %al
1308; AVX2-NEXT:    vzeroupper
1309; AVX2-NEXT:    retq
1310;
1311; AVX512-LABEL: bool_reduction_v16i16:
1312; AVX512:       # %bb.0:
1313; AVX512-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0
1314; AVX512-NEXT:    kortestw %k0, %k0
1315; AVX512-NEXT:    setb %al
1316; AVX512-NEXT:    vzeroupper
1317; AVX512-NEXT:    retq
1318  %a = icmp eq <16 x i16> %x, %y
1319  %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1320  %b = and <16 x i1> %s1, %a
1321  %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1322  %c = and <16 x i1> %s2, %b
1323  %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1324  %d = and <16 x i1> %s3, %c
1325  %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1326  %e = and <16 x i1> %s4, %d
1327  %f = extractelement <16 x i1> %e, i32 0
1328  ret i1 %f
1329}
1330
1331define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
1332; SSE-LABEL: bool_reduction_v32i8:
1333; SSE:       # %bb.0:
1334; SSE-NEXT:    pcmpeqb %xmm3, %xmm1
1335; SSE-NEXT:    pcmpeqb %xmm2, %xmm0
1336; SSE-NEXT:    pand %xmm1, %xmm0
1337; SSE-NEXT:    pmovmskb %xmm0, %eax
1338; SSE-NEXT:    cmpw $-1, %ax
1339; SSE-NEXT:    sete %al
1340; SSE-NEXT:    retq
1341;
1342; AVX1-LABEL: bool_reduction_v32i8:
1343; AVX1:       # %bb.0:
1344; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1345; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1346; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm3, %xmm2
1347; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
1348; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
1349; AVX1-NEXT:    vpmovmskb %xmm0, %eax
1350; AVX1-NEXT:    cmpw $-1, %ax
1351; AVX1-NEXT:    sete %al
1352; AVX1-NEXT:    vzeroupper
1353; AVX1-NEXT:    retq
1354;
1355; AVX2-LABEL: bool_reduction_v32i8:
1356; AVX2:       # %bb.0:
1357; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
1358; AVX2-NEXT:    vpmovmskb %ymm0, %eax
1359; AVX2-NEXT:    cmpl $-1, %eax
1360; AVX2-NEXT:    sete %al
1361; AVX2-NEXT:    vzeroupper
1362; AVX2-NEXT:    retq
1363;
1364; AVX512-LABEL: bool_reduction_v32i8:
1365; AVX512:       # %bb.0:
1366; AVX512-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0
1367; AVX512-NEXT:    kortestd %k0, %k0
1368; AVX512-NEXT:    setb %al
1369; AVX512-NEXT:    vzeroupper
1370; AVX512-NEXT:    retq
1371  %a = icmp eq <32 x i8> %x, %y
1372  %s1 = shufflevector <32 x i1> %a, <32 x i1> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1373  %b = and <32 x i1> %s1, %a
1374  %s2 = shufflevector <32 x i1> %b, <32 x i1> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1375  %c = and <32 x i1> %s2, %b
1376  %s3 = shufflevector <32 x i1> %c, <32 x i1> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1377  %d = and <32 x i1> %s3, %c
1378  %s4 = shufflevector <32 x i1> %d, <32 x i1> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1379  %e = and <32 x i1> %s4, %d
1380  %s5 = shufflevector <32 x i1> %e, <32 x i1> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1381  %f = and <32 x i1> %s5, %e
1382  %g = extractelement <32 x i1> %f, i32 0
1383  ret i1 %g
1384}
1385