1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2     | FileCheck %s --check-prefix=X86-SSE2
3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2   | FileCheck %s --check-prefix=X86-SSE42
4; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx      | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1
5; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2     | FileCheck %s --check-prefixes=X86-AVX,X86-AVX2
6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2   | FileCheck %s --check-prefix=X64-SSE2
7; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE42
8; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx    | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1
9; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2   | FileCheck %s --check-prefixes=X64-AVX,X64-AVX2
10; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512
11
12;
13; 128-bit Vectors
14;
15
16define i64 @test_reduce_v2i64(<2 x i64> %a0) {
17; X86-SSE2-LABEL: test_reduce_v2i64:
18; X86-SSE2:       ## %bb.0:
19; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
20; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
21; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
22; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
23; X86-SSE2-NEXT:    pxor %xmm1, %xmm2
24; X86-SSE2-NEXT:    movdqa %xmm3, %xmm4
25; X86-SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
26; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
27; X86-SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
28; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
29; X86-SSE2-NEXT:    pand %xmm5, %xmm2
30; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
31; X86-SSE2-NEXT:    por %xmm2, %xmm3
32; X86-SSE2-NEXT:    pand %xmm3, %xmm0
33; X86-SSE2-NEXT:    pandn %xmm1, %xmm3
34; X86-SSE2-NEXT:    por %xmm0, %xmm3
35; X86-SSE2-NEXT:    movd %xmm3, %eax
36; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1]
37; X86-SSE2-NEXT:    movd %xmm0, %edx
38; X86-SSE2-NEXT:    retl
39;
40; X86-SSE42-LABEL: test_reduce_v2i64:
41; X86-SSE42:       ## %bb.0:
42; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
43; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
44; X86-SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
45; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
46; X86-SSE42-NEXT:    movd %xmm2, %eax
47; X86-SSE42-NEXT:    pextrd $1, %xmm2, %edx
48; X86-SSE42-NEXT:    retl
49;
50; X86-AVX-LABEL: test_reduce_v2i64:
51; X86-AVX:       ## %bb.0:
52; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
53; X86-AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
54; X86-AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
55; X86-AVX-NEXT:    vmovd %xmm0, %eax
56; X86-AVX-NEXT:    vpextrd $1, %xmm0, %edx
57; X86-AVX-NEXT:    retl
58;
59; X64-SSE2-LABEL: test_reduce_v2i64:
60; X64-SSE2:       ## %bb.0:
61; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
62; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
63; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
64; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
65; X64-SSE2-NEXT:    pxor %xmm1, %xmm2
66; X64-SSE2-NEXT:    movdqa %xmm3, %xmm4
67; X64-SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
68; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
69; X64-SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
70; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
71; X64-SSE2-NEXT:    pand %xmm5, %xmm2
72; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
73; X64-SSE2-NEXT:    por %xmm2, %xmm3
74; X64-SSE2-NEXT:    pand %xmm3, %xmm0
75; X64-SSE2-NEXT:    pandn %xmm1, %xmm3
76; X64-SSE2-NEXT:    por %xmm0, %xmm3
77; X64-SSE2-NEXT:    movq %xmm3, %rax
78; X64-SSE2-NEXT:    retq
79;
80; X64-SSE42-LABEL: test_reduce_v2i64:
81; X64-SSE42:       ## %bb.0:
82; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
83; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
84; X64-SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
85; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
86; X64-SSE42-NEXT:    movq %xmm2, %rax
87; X64-SSE42-NEXT:    retq
88;
89; X64-AVX1-LABEL: test_reduce_v2i64:
90; X64-AVX1:       ## %bb.0:
91; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
92; X64-AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
93; X64-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
94; X64-AVX1-NEXT:    vmovq %xmm0, %rax
95; X64-AVX1-NEXT:    retq
96;
97; X64-AVX2-LABEL: test_reduce_v2i64:
98; X64-AVX2:       ## %bb.0:
99; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
100; X64-AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
101; X64-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
102; X64-AVX2-NEXT:    vmovq %xmm0, %rax
103; X64-AVX2-NEXT:    retq
104;
105; X64-AVX512-LABEL: test_reduce_v2i64:
106; X64-AVX512:       ## %bb.0:
107; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
108; X64-AVX512-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
109; X64-AVX512-NEXT:    vmovq %xmm0, %rax
110; X64-AVX512-NEXT:    retq
111  %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
112  %2 = icmp sgt <2 x i64> %a0, %1
113  %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1
114  %4 = extractelement <2 x i64> %3, i32 0
115  ret i64 %4
116}
117
118define i32 @test_reduce_v4i32(<4 x i32> %a0) {
119; X86-SSE2-LABEL: test_reduce_v4i32:
120; X86-SSE2:       ## %bb.0:
121; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
122; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
123; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
124; X86-SSE2-NEXT:    pand %xmm2, %xmm0
125; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
126; X86-SSE2-NEXT:    por %xmm0, %xmm2
127; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
128; X86-SSE2-NEXT:    movdqa %xmm2, %xmm1
129; X86-SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
130; X86-SSE2-NEXT:    pand %xmm1, %xmm2
131; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
132; X86-SSE2-NEXT:    por %xmm2, %xmm1
133; X86-SSE2-NEXT:    movd %xmm1, %eax
134; X86-SSE2-NEXT:    retl
135;
136; X86-SSE42-LABEL: test_reduce_v4i32:
137; X86-SSE42:       ## %bb.0:
138; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
139; X86-SSE42-NEXT:    pmaxsd %xmm0, %xmm1
140; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
141; X86-SSE42-NEXT:    pmaxsd %xmm1, %xmm0
142; X86-SSE42-NEXT:    movd %xmm0, %eax
143; X86-SSE42-NEXT:    retl
144;
145; X86-AVX-LABEL: test_reduce_v4i32:
146; X86-AVX:       ## %bb.0:
147; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
148; X86-AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
149; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
150; X86-AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
151; X86-AVX-NEXT:    vmovd %xmm0, %eax
152; X86-AVX-NEXT:    retl
153;
154; X64-SSE2-LABEL: test_reduce_v4i32:
155; X64-SSE2:       ## %bb.0:
156; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
157; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
158; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
159; X64-SSE2-NEXT:    pand %xmm2, %xmm0
160; X64-SSE2-NEXT:    pandn %xmm1, %xmm2
161; X64-SSE2-NEXT:    por %xmm0, %xmm2
162; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
163; X64-SSE2-NEXT:    movdqa %xmm2, %xmm1
164; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
165; X64-SSE2-NEXT:    pand %xmm1, %xmm2
166; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
167; X64-SSE2-NEXT:    por %xmm2, %xmm1
168; X64-SSE2-NEXT:    movd %xmm1, %eax
169; X64-SSE2-NEXT:    retq
170;
171; X64-SSE42-LABEL: test_reduce_v4i32:
172; X64-SSE42:       ## %bb.0:
173; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
174; X64-SSE42-NEXT:    pmaxsd %xmm0, %xmm1
175; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
176; X64-SSE42-NEXT:    pmaxsd %xmm1, %xmm0
177; X64-SSE42-NEXT:    movd %xmm0, %eax
178; X64-SSE42-NEXT:    retq
179;
180; X64-AVX-LABEL: test_reduce_v4i32:
181; X64-AVX:       ## %bb.0:
182; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
183; X64-AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
184; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
185; X64-AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
186; X64-AVX-NEXT:    vmovd %xmm0, %eax
187; X64-AVX-NEXT:    retq
188  %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
189  %2 = icmp sgt <4 x i32> %a0, %1
190  %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1
191  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
192  %5 = icmp sgt <4 x i32> %3, %4
193  %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4
194  %7 = extractelement <4 x i32> %6, i32 0
195  ret i32 %7
196}
197
198define i16 @test_reduce_v8i16(<8 x i16> %a0) {
199; X86-SSE2-LABEL: test_reduce_v8i16:
200; X86-SSE2:       ## %bb.0:
201; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
202; X86-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
203; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
204; X86-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
205; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
206; X86-SSE2-NEXT:    psrld $16, %xmm1
207; X86-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
208; X86-SSE2-NEXT:    movd %xmm1, %eax
209; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
210; X86-SSE2-NEXT:    retl
211;
212; X86-SSE42-LABEL: test_reduce_v8i16:
213; X86-SSE42:       ## %bb.0:
214; X86-SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
215; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
216; X86-SSE42-NEXT:    movd %xmm0, %eax
217; X86-SSE42-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
218; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
219; X86-SSE42-NEXT:    retl
220;
221; X86-AVX-LABEL: test_reduce_v8i16:
222; X86-AVX:       ## %bb.0:
223; X86-AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
224; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
225; X86-AVX-NEXT:    vmovd %xmm0, %eax
226; X86-AVX-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
227; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
228; X86-AVX-NEXT:    retl
229;
230; X64-SSE2-LABEL: test_reduce_v8i16:
231; X64-SSE2:       ## %bb.0:
232; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
233; X64-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
234; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
235; X64-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
236; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
237; X64-SSE2-NEXT:    psrld $16, %xmm1
238; X64-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
239; X64-SSE2-NEXT:    movd %xmm1, %eax
240; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
241; X64-SSE2-NEXT:    retq
242;
243; X64-SSE42-LABEL: test_reduce_v8i16:
244; X64-SSE42:       ## %bb.0:
245; X64-SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
246; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
247; X64-SSE42-NEXT:    movd %xmm0, %eax
248; X64-SSE42-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
249; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
250; X64-SSE42-NEXT:    retq
251;
252; X64-AVX-LABEL: test_reduce_v8i16:
253; X64-AVX:       ## %bb.0:
254; X64-AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
255; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
256; X64-AVX-NEXT:    vmovd %xmm0, %eax
257; X64-AVX-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
258; X64-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
259; X64-AVX-NEXT:    retq
260  %1  = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
261  %2  = icmp sgt <8 x i16> %a0, %1
262  %3  = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1
263  %4  = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
264  %5  = icmp sgt <8 x i16> %3, %4
265  %6  = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4
266  %7  = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
267  %8  = icmp sgt <8 x i16> %6, %7
268  %9  = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7
269  %10 = extractelement <8 x i16> %9, i32 0
270  ret i16 %10
271}
272
273define i8 @test_reduce_v16i8(<16 x i8> %a0) {
274; X86-SSE2-LABEL: test_reduce_v16i8:
275; X86-SSE2:       ## %bb.0:
276; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
277; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
278; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
279; X86-SSE2-NEXT:    pand %xmm2, %xmm0
280; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
281; X86-SSE2-NEXT:    por %xmm0, %xmm2
282; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
283; X86-SSE2-NEXT:    movdqa %xmm2, %xmm1
284; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
285; X86-SSE2-NEXT:    pand %xmm1, %xmm2
286; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
287; X86-SSE2-NEXT:    por %xmm2, %xmm1
288; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
289; X86-SSE2-NEXT:    psrld $16, %xmm0
290; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
291; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
292; X86-SSE2-NEXT:    pand %xmm2, %xmm1
293; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
294; X86-SSE2-NEXT:    por %xmm1, %xmm2
295; X86-SSE2-NEXT:    movdqa %xmm2, %xmm0
296; X86-SSE2-NEXT:    psrlw $8, %xmm0
297; X86-SSE2-NEXT:    movdqa %xmm2, %xmm1
298; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
299; X86-SSE2-NEXT:    pand %xmm1, %xmm2
300; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
301; X86-SSE2-NEXT:    por %xmm2, %xmm1
302; X86-SSE2-NEXT:    movd %xmm1, %eax
303; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
304; X86-SSE2-NEXT:    retl
305;
306; X86-SSE42-LABEL: test_reduce_v16i8:
307; X86-SSE42:       ## %bb.0:
308; X86-SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
309; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
310; X86-SSE42-NEXT:    psrlw $8, %xmm1
311; X86-SSE42-NEXT:    pminub %xmm0, %xmm1
312; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
313; X86-SSE42-NEXT:    movd %xmm0, %eax
314; X86-SSE42-NEXT:    xorb $127, %al
315; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
316; X86-SSE42-NEXT:    retl
317;
318; X86-AVX-LABEL: test_reduce_v16i8:
319; X86-AVX:       ## %bb.0:
320; X86-AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
321; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
322; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
323; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
324; X86-AVX-NEXT:    vmovd %xmm0, %eax
325; X86-AVX-NEXT:    xorb $127, %al
326; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
327; X86-AVX-NEXT:    retl
328;
329; X64-SSE2-LABEL: test_reduce_v16i8:
330; X64-SSE2:       ## %bb.0:
331; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
332; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
333; X64-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
334; X64-SSE2-NEXT:    pand %xmm2, %xmm0
335; X64-SSE2-NEXT:    pandn %xmm1, %xmm2
336; X64-SSE2-NEXT:    por %xmm0, %xmm2
337; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
338; X64-SSE2-NEXT:    movdqa %xmm2, %xmm1
339; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
340; X64-SSE2-NEXT:    pand %xmm1, %xmm2
341; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
342; X64-SSE2-NEXT:    por %xmm2, %xmm1
343; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
344; X64-SSE2-NEXT:    psrld $16, %xmm0
345; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
346; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
347; X64-SSE2-NEXT:    pand %xmm2, %xmm1
348; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
349; X64-SSE2-NEXT:    por %xmm1, %xmm2
350; X64-SSE2-NEXT:    movdqa %xmm2, %xmm0
351; X64-SSE2-NEXT:    psrlw $8, %xmm0
352; X64-SSE2-NEXT:    movdqa %xmm2, %xmm1
353; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
354; X64-SSE2-NEXT:    pand %xmm1, %xmm2
355; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
356; X64-SSE2-NEXT:    por %xmm2, %xmm1
357; X64-SSE2-NEXT:    movd %xmm1, %eax
358; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
359; X64-SSE2-NEXT:    retq
360;
361; X64-SSE42-LABEL: test_reduce_v16i8:
362; X64-SSE42:       ## %bb.0:
363; X64-SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
364; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
365; X64-SSE42-NEXT:    psrlw $8, %xmm1
366; X64-SSE42-NEXT:    pminub %xmm0, %xmm1
367; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
368; X64-SSE42-NEXT:    movd %xmm0, %eax
369; X64-SSE42-NEXT:    xorb $127, %al
370; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
371; X64-SSE42-NEXT:    retq
372;
373; X64-AVX-LABEL: test_reduce_v16i8:
374; X64-AVX:       ## %bb.0:
375; X64-AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
376; X64-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
377; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
378; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
379; X64-AVX-NEXT:    vmovd %xmm0, %eax
380; X64-AVX-NEXT:    xorb $127, %al
381; X64-AVX-NEXT:    ## kill: def $al killed $al killed $eax
382; X64-AVX-NEXT:    retq
383  %1  = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
384  %2  = icmp sgt <16 x i8> %a0, %1
385  %3  = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1
386  %4  = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
387  %5  = icmp sgt <16 x i8> %3, %4
388  %6  = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4
389  %7  = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
390  %8  = icmp sgt <16 x i8> %6, %7
391  %9  = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7
392  %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
393  %11 = icmp sgt <16 x i8> %9, %10
394  %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10
395  %13 = extractelement <16 x i8> %12, i32 0
396  ret i8 %13
397}
398
399;
400; 256-bit Vectors
401;
402
403define i64 @test_reduce_v4i64(<4 x i64> %a0) {
404; X86-SSE2-LABEL: test_reduce_v4i64:
405; X86-SSE2:       ## %bb.0:
406; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
407; X86-SSE2-NEXT:    movdqa %xmm1, %xmm3
408; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
409; X86-SSE2-NEXT:    movdqa %xmm0, %xmm4
410; X86-SSE2-NEXT:    pxor %xmm2, %xmm4
411; X86-SSE2-NEXT:    movdqa %xmm4, %xmm5
412; X86-SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
413; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
414; X86-SSE2-NEXT:    pcmpeqd %xmm3, %xmm4
415; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
416; X86-SSE2-NEXT:    pand %xmm6, %xmm3
417; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
418; X86-SSE2-NEXT:    por %xmm3, %xmm4
419; X86-SSE2-NEXT:    pand %xmm4, %xmm0
420; X86-SSE2-NEXT:    pandn %xmm1, %xmm4
421; X86-SSE2-NEXT:    por %xmm0, %xmm4
422; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
423; X86-SSE2-NEXT:    movdqa %xmm4, %xmm1
424; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
425; X86-SSE2-NEXT:    pxor %xmm0, %xmm2
426; X86-SSE2-NEXT:    movdqa %xmm1, %xmm3
427; X86-SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
428; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
429; X86-SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
430; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
431; X86-SSE2-NEXT:    pand %xmm5, %xmm1
432; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
433; X86-SSE2-NEXT:    por %xmm1, %xmm2
434; X86-SSE2-NEXT:    pand %xmm2, %xmm4
435; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
436; X86-SSE2-NEXT:    por %xmm4, %xmm2
437; X86-SSE2-NEXT:    movd %xmm2, %eax
438; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
439; X86-SSE2-NEXT:    movd %xmm0, %edx
440; X86-SSE2-NEXT:    retl
441;
442; X86-SSE42-LABEL: test_reduce_v4i64:
443; X86-SSE42:       ## %bb.0:
444; X86-SSE42-NEXT:    movdqa %xmm0, %xmm2
445; X86-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
446; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
447; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
448; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
449; X86-SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
450; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
451; X86-SSE42-NEXT:    movd %xmm2, %eax
452; X86-SSE42-NEXT:    pextrd $1, %xmm2, %edx
453; X86-SSE42-NEXT:    retl
454;
455; X86-AVX1-LABEL: test_reduce_v4i64:
456; X86-AVX1:       ## %bb.0:
457; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
458; X86-AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
459; X86-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
460; X86-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
461; X86-AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
462; X86-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
463; X86-AVX1-NEXT:    vmovd %xmm0, %eax
464; X86-AVX1-NEXT:    vpextrd $1, %xmm0, %edx
465; X86-AVX1-NEXT:    vzeroupper
466; X86-AVX1-NEXT:    retl
467;
468; X86-AVX2-LABEL: test_reduce_v4i64:
469; X86-AVX2:       ## %bb.0:
470; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
471; X86-AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
472; X86-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
473; X86-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
474; X86-AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
475; X86-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
476; X86-AVX2-NEXT:    vmovd %xmm0, %eax
477; X86-AVX2-NEXT:    vpextrd $1, %xmm0, %edx
478; X86-AVX2-NEXT:    vzeroupper
479; X86-AVX2-NEXT:    retl
480;
481; X64-SSE2-LABEL: test_reduce_v4i64:
482; X64-SSE2:       ## %bb.0:
483; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
484; X64-SSE2-NEXT:    movdqa %xmm1, %xmm3
485; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
486; X64-SSE2-NEXT:    movdqa %xmm0, %xmm4
487; X64-SSE2-NEXT:    pxor %xmm2, %xmm4
488; X64-SSE2-NEXT:    movdqa %xmm4, %xmm5
489; X64-SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
490; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
491; X64-SSE2-NEXT:    pcmpeqd %xmm3, %xmm4
492; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
493; X64-SSE2-NEXT:    pand %xmm6, %xmm3
494; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
495; X64-SSE2-NEXT:    por %xmm3, %xmm4
496; X64-SSE2-NEXT:    pand %xmm4, %xmm0
497; X64-SSE2-NEXT:    pandn %xmm1, %xmm4
498; X64-SSE2-NEXT:    por %xmm0, %xmm4
499; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
500; X64-SSE2-NEXT:    movdqa %xmm4, %xmm1
501; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
502; X64-SSE2-NEXT:    pxor %xmm0, %xmm2
503; X64-SSE2-NEXT:    movdqa %xmm1, %xmm3
504; X64-SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
505; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
506; X64-SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
507; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
508; X64-SSE2-NEXT:    pand %xmm5, %xmm1
509; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
510; X64-SSE2-NEXT:    por %xmm1, %xmm2
511; X64-SSE2-NEXT:    pand %xmm2, %xmm4
512; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
513; X64-SSE2-NEXT:    por %xmm4, %xmm2
514; X64-SSE2-NEXT:    movq %xmm2, %rax
515; X64-SSE2-NEXT:    retq
516;
517; X64-SSE42-LABEL: test_reduce_v4i64:
518; X64-SSE42:       ## %bb.0:
519; X64-SSE42-NEXT:    movdqa %xmm0, %xmm2
520; X64-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
521; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
522; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
523; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
524; X64-SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
525; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
526; X64-SSE42-NEXT:    movq %xmm2, %rax
527; X64-SSE42-NEXT:    retq
528;
529; X64-AVX1-LABEL: test_reduce_v4i64:
530; X64-AVX1:       ## %bb.0:
531; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
532; X64-AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
533; X64-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
534; X64-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
535; X64-AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
536; X64-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
537; X64-AVX1-NEXT:    vmovq %xmm0, %rax
538; X64-AVX1-NEXT:    vzeroupper
539; X64-AVX1-NEXT:    retq
540;
541; X64-AVX2-LABEL: test_reduce_v4i64:
542; X64-AVX2:       ## %bb.0:
543; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
544; X64-AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
545; X64-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
546; X64-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
547; X64-AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
548; X64-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
549; X64-AVX2-NEXT:    vmovq %xmm0, %rax
550; X64-AVX2-NEXT:    vzeroupper
551; X64-AVX2-NEXT:    retq
552;
553; X64-AVX512-LABEL: test_reduce_v4i64:
554; X64-AVX512:       ## %bb.0:
555; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
556; X64-AVX512-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
557; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
558; X64-AVX512-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
559; X64-AVX512-NEXT:    vmovq %xmm0, %rax
560; X64-AVX512-NEXT:    vzeroupper
561; X64-AVX512-NEXT:    retq
562  %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
563  %2 = icmp sgt <4 x i64> %a0, %1
564  %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1
565  %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
566  %5 = icmp sgt <4 x i64> %3, %4
567  %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4
568  %7 = extractelement <4 x i64> %6, i32 0
569  ret i64 %7
570}
571
572define i32 @test_reduce_v8i32(<8 x i32> %a0) {
573; X86-SSE2-LABEL: test_reduce_v8i32:
574; X86-SSE2:       ## %bb.0:
575; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
576; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
577; X86-SSE2-NEXT:    pand %xmm2, %xmm0
578; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
579; X86-SSE2-NEXT:    por %xmm0, %xmm2
580; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
581; X86-SSE2-NEXT:    movdqa %xmm2, %xmm1
582; X86-SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
583; X86-SSE2-NEXT:    pand %xmm1, %xmm2
584; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
585; X86-SSE2-NEXT:    por %xmm2, %xmm1
586; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
587; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
588; X86-SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
589; X86-SSE2-NEXT:    pand %xmm2, %xmm1
590; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
591; X86-SSE2-NEXT:    por %xmm1, %xmm2
592; X86-SSE2-NEXT:    movd %xmm2, %eax
593; X86-SSE2-NEXT:    retl
594;
595; X86-SSE42-LABEL: test_reduce_v8i32:
596; X86-SSE42:       ## %bb.0:
597; X86-SSE42-NEXT:    pmaxsd %xmm1, %xmm0
598; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
599; X86-SSE42-NEXT:    pmaxsd %xmm0, %xmm1
600; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
601; X86-SSE42-NEXT:    pmaxsd %xmm1, %xmm0
602; X86-SSE42-NEXT:    movd %xmm0, %eax
603; X86-SSE42-NEXT:    retl
604;
605; X86-AVX1-LABEL: test_reduce_v8i32:
606; X86-AVX1:       ## %bb.0:
607; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
608; X86-AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
609; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
610; X86-AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
611; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
612; X86-AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
613; X86-AVX1-NEXT:    vmovd %xmm0, %eax
614; X86-AVX1-NEXT:    vzeroupper
615; X86-AVX1-NEXT:    retl
616;
617; X86-AVX2-LABEL: test_reduce_v8i32:
618; X86-AVX2:       ## %bb.0:
619; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
620; X86-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
621; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
622; X86-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
623; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
624; X86-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
625; X86-AVX2-NEXT:    vmovd %xmm0, %eax
626; X86-AVX2-NEXT:    vzeroupper
627; X86-AVX2-NEXT:    retl
628;
629; X64-SSE2-LABEL: test_reduce_v8i32:
630; X64-SSE2:       ## %bb.0:
631; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
632; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
633; X64-SSE2-NEXT:    pand %xmm2, %xmm0
634; X64-SSE2-NEXT:    pandn %xmm1, %xmm2
635; X64-SSE2-NEXT:    por %xmm0, %xmm2
636; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
637; X64-SSE2-NEXT:    movdqa %xmm2, %xmm1
638; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
639; X64-SSE2-NEXT:    pand %xmm1, %xmm2
640; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
641; X64-SSE2-NEXT:    por %xmm2, %xmm1
642; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
643; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
644; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
645; X64-SSE2-NEXT:    pand %xmm2, %xmm1
646; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
647; X64-SSE2-NEXT:    por %xmm1, %xmm2
648; X64-SSE2-NEXT:    movd %xmm2, %eax
649; X64-SSE2-NEXT:    retq
650;
651; X64-SSE42-LABEL: test_reduce_v8i32:
652; X64-SSE42:       ## %bb.0:
653; X64-SSE42-NEXT:    pmaxsd %xmm1, %xmm0
654; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
655; X64-SSE42-NEXT:    pmaxsd %xmm0, %xmm1
656; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
657; X64-SSE42-NEXT:    pmaxsd %xmm1, %xmm0
658; X64-SSE42-NEXT:    movd %xmm0, %eax
659; X64-SSE42-NEXT:    retq
660;
661; X64-AVX1-LABEL: test_reduce_v8i32:
662; X64-AVX1:       ## %bb.0:
663; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
664; X64-AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
665; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
666; X64-AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
667; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
668; X64-AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
669; X64-AVX1-NEXT:    vmovd %xmm0, %eax
670; X64-AVX1-NEXT:    vzeroupper
671; X64-AVX1-NEXT:    retq
672;
673; X64-AVX2-LABEL: test_reduce_v8i32:
674; X64-AVX2:       ## %bb.0:
675; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
676; X64-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
677; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
678; X64-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
679; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
680; X64-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
681; X64-AVX2-NEXT:    vmovd %xmm0, %eax
682; X64-AVX2-NEXT:    vzeroupper
683; X64-AVX2-NEXT:    retq
684;
685; X64-AVX512-LABEL: test_reduce_v8i32:
686; X64-AVX512:       ## %bb.0:
687; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
688; X64-AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
689; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
690; X64-AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
691; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
692; X64-AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
693; X64-AVX512-NEXT:    vmovd %xmm0, %eax
694; X64-AVX512-NEXT:    vzeroupper
695; X64-AVX512-NEXT:    retq
696  %1  = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
697  %2  = icmp sgt <8 x i32> %a0, %1
698  %3  = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1
699  %4  = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
700  %5  = icmp sgt <8 x i32> %3, %4
701  %6  = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4
702  %7  = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
703  %8  = icmp sgt <8 x i32> %6, %7
704  %9  = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7
705  %10 = extractelement <8 x i32> %9, i32 0
706  ret i32 %10
707}
708
709define i16 @test_reduce_v16i16(<16 x i16> %a0) {
710; X86-SSE2-LABEL: test_reduce_v16i16:
711; X86-SSE2:       ## %bb.0:
712; X86-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
713; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
714; X86-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
715; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
716; X86-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
717; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
718; X86-SSE2-NEXT:    psrld $16, %xmm1
719; X86-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
720; X86-SSE2-NEXT:    movd %xmm1, %eax
721; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
722; X86-SSE2-NEXT:    retl
723;
724; X86-SSE42-LABEL: test_reduce_v16i16:
725; X86-SSE42:       ## %bb.0:
726; X86-SSE42-NEXT:    pmaxsw %xmm1, %xmm0
727; X86-SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
728; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
729; X86-SSE42-NEXT:    movd %xmm0, %eax
730; X86-SSE42-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
731; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
732; X86-SSE42-NEXT:    retl
733;
734; X86-AVX1-LABEL: test_reduce_v16i16:
735; X86-AVX1:       ## %bb.0:
736; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
737; X86-AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
738; X86-AVX1-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
739; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
740; X86-AVX1-NEXT:    vmovd %xmm0, %eax
741; X86-AVX1-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
742; X86-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
743; X86-AVX1-NEXT:    vzeroupper
744; X86-AVX1-NEXT:    retl
745;
746; X86-AVX2-LABEL: test_reduce_v16i16:
747; X86-AVX2:       ## %bb.0:
748; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
749; X86-AVX2-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
750; X86-AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
751; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
752; X86-AVX2-NEXT:    vmovd %xmm0, %eax
753; X86-AVX2-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
754; X86-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
755; X86-AVX2-NEXT:    vzeroupper
756; X86-AVX2-NEXT:    retl
757;
758; X64-SSE2-LABEL: test_reduce_v16i16:
759; X64-SSE2:       ## %bb.0:
760; X64-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
761; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
762; X64-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
763; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
764; X64-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
765; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
766; X64-SSE2-NEXT:    psrld $16, %xmm1
767; X64-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
768; X64-SSE2-NEXT:    movd %xmm1, %eax
769; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
770; X64-SSE2-NEXT:    retq
771;
772; X64-SSE42-LABEL: test_reduce_v16i16:
773; X64-SSE42:       ## %bb.0:
774; X64-SSE42-NEXT:    pmaxsw %xmm1, %xmm0
775; X64-SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
776; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
777; X64-SSE42-NEXT:    movd %xmm0, %eax
778; X64-SSE42-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
779; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
780; X64-SSE42-NEXT:    retq
781;
782; X64-AVX1-LABEL: test_reduce_v16i16:
783; X64-AVX1:       ## %bb.0:
784; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
785; X64-AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
786; X64-AVX1-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
787; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
788; X64-AVX1-NEXT:    vmovd %xmm0, %eax
789; X64-AVX1-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
790; X64-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
791; X64-AVX1-NEXT:    vzeroupper
792; X64-AVX1-NEXT:    retq
793;
794; X64-AVX2-LABEL: test_reduce_v16i16:
795; X64-AVX2:       ## %bb.0:
796; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
797; X64-AVX2-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
798; X64-AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
799; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
800; X64-AVX2-NEXT:    vmovd %xmm0, %eax
801; X64-AVX2-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
802; X64-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
803; X64-AVX2-NEXT:    vzeroupper
804; X64-AVX2-NEXT:    retq
805;
806; X64-AVX512-LABEL: test_reduce_v16i16:
807; X64-AVX512:       ## %bb.0:
808; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
809; X64-AVX512-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
810; X64-AVX512-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
811; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
812; X64-AVX512-NEXT:    vmovd %xmm0, %eax
813; X64-AVX512-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
814; X64-AVX512-NEXT:    ## kill: def $ax killed $ax killed $eax
815; X64-AVX512-NEXT:    vzeroupper
816; X64-AVX512-NEXT:    retq
817  %1  = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
818  %2  = icmp sgt <16 x i16> %a0, %1
819  %3  = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1
820  %4  = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
821  %5  = icmp sgt <16 x i16> %3, %4
822  %6  = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4
823  %7  = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
824  %8  = icmp sgt <16 x i16> %6, %7
825  %9  = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7
826  %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
827  %11 = icmp sgt <16 x i16> %9, %10
828  %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10
829  %13 = extractelement <16 x i16> %12, i32 0
830  ret i16 %13
831}
832
833define i8 @test_reduce_v32i8(<32 x i8> %a0) {
834; X86-SSE2-LABEL: test_reduce_v32i8:
835; X86-SSE2:       ## %bb.0:
836; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
837; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
838; X86-SSE2-NEXT:    pand %xmm2, %xmm0
839; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
840; X86-SSE2-NEXT:    por %xmm0, %xmm2
841; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
842; X86-SSE2-NEXT:    movdqa %xmm2, %xmm1
843; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
844; X86-SSE2-NEXT:    pand %xmm1, %xmm2
845; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
846; X86-SSE2-NEXT:    por %xmm2, %xmm1
847; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
848; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
849; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
850; X86-SSE2-NEXT:    pand %xmm2, %xmm1
851; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
852; X86-SSE2-NEXT:    por %xmm1, %xmm2
853; X86-SSE2-NEXT:    movdqa %xmm2, %xmm0
854; X86-SSE2-NEXT:    psrld $16, %xmm0
855; X86-SSE2-NEXT:    movdqa %xmm2, %xmm1
856; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
857; X86-SSE2-NEXT:    pand %xmm1, %xmm2
858; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
859; X86-SSE2-NEXT:    por %xmm2, %xmm1
860; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
861; X86-SSE2-NEXT:    psrlw $8, %xmm0
862; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
863; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
864; X86-SSE2-NEXT:    pand %xmm2, %xmm1
865; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
866; X86-SSE2-NEXT:    por %xmm1, %xmm2
867; X86-SSE2-NEXT:    movd %xmm2, %eax
868; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
869; X86-SSE2-NEXT:    retl
870;
871; X86-SSE42-LABEL: test_reduce_v32i8:
872; X86-SSE42:       ## %bb.0:
873; X86-SSE42-NEXT:    pmaxsb %xmm1, %xmm0
874; X86-SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
875; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
876; X86-SSE42-NEXT:    psrlw $8, %xmm1
877; X86-SSE42-NEXT:    pminub %xmm0, %xmm1
878; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
879; X86-SSE42-NEXT:    movd %xmm0, %eax
880; X86-SSE42-NEXT:    xorb $127, %al
881; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
882; X86-SSE42-NEXT:    retl
883;
884; X86-AVX1-LABEL: test_reduce_v32i8:
885; X86-AVX1:       ## %bb.0:
886; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
887; X86-AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
888; X86-AVX1-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
889; X86-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
890; X86-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
891; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
892; X86-AVX1-NEXT:    vmovd %xmm0, %eax
893; X86-AVX1-NEXT:    xorb $127, %al
894; X86-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
895; X86-AVX1-NEXT:    vzeroupper
896; X86-AVX1-NEXT:    retl
897;
898; X86-AVX2-LABEL: test_reduce_v32i8:
899; X86-AVX2:       ## %bb.0:
900; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
901; X86-AVX2-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
902; X86-AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
903; X86-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
904; X86-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
905; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
906; X86-AVX2-NEXT:    vmovd %xmm0, %eax
907; X86-AVX2-NEXT:    xorb $127, %al
908; X86-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
909; X86-AVX2-NEXT:    vzeroupper
910; X86-AVX2-NEXT:    retl
911;
912; X64-SSE2-LABEL: test_reduce_v32i8:
913; X64-SSE2:       ## %bb.0:
914; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
915; X64-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
916; X64-SSE2-NEXT:    pand %xmm2, %xmm0
917; X64-SSE2-NEXT:    pandn %xmm1, %xmm2
918; X64-SSE2-NEXT:    por %xmm0, %xmm2
919; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
920; X64-SSE2-NEXT:    movdqa %xmm2, %xmm1
921; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
922; X64-SSE2-NEXT:    pand %xmm1, %xmm2
923; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
924; X64-SSE2-NEXT:    por %xmm2, %xmm1
925; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
926; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
927; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
928; X64-SSE2-NEXT:    pand %xmm2, %xmm1
929; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
930; X64-SSE2-NEXT:    por %xmm1, %xmm2
931; X64-SSE2-NEXT:    movdqa %xmm2, %xmm0
932; X64-SSE2-NEXT:    psrld $16, %xmm0
933; X64-SSE2-NEXT:    movdqa %xmm2, %xmm1
934; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
935; X64-SSE2-NEXT:    pand %xmm1, %xmm2
936; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
937; X64-SSE2-NEXT:    por %xmm2, %xmm1
938; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
939; X64-SSE2-NEXT:    psrlw $8, %xmm0
940; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
941; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
942; X64-SSE2-NEXT:    pand %xmm2, %xmm1
943; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
944; X64-SSE2-NEXT:    por %xmm1, %xmm2
945; X64-SSE2-NEXT:    movd %xmm2, %eax
946; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
947; X64-SSE2-NEXT:    retq
948;
949; X64-SSE42-LABEL: test_reduce_v32i8:
950; X64-SSE42:       ## %bb.0:
951; X64-SSE42-NEXT:    pmaxsb %xmm1, %xmm0
952; X64-SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
953; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
954; X64-SSE42-NEXT:    psrlw $8, %xmm1
955; X64-SSE42-NEXT:    pminub %xmm0, %xmm1
956; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
957; X64-SSE42-NEXT:    movd %xmm0, %eax
958; X64-SSE42-NEXT:    xorb $127, %al
959; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
960; X64-SSE42-NEXT:    retq
961;
962; X64-AVX1-LABEL: test_reduce_v32i8:
963; X64-AVX1:       ## %bb.0:
964; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
965; X64-AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
966; X64-AVX1-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
967; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
968; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
969; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
970; X64-AVX1-NEXT:    vmovd %xmm0, %eax
971; X64-AVX1-NEXT:    xorb $127, %al
972; X64-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
973; X64-AVX1-NEXT:    vzeroupper
974; X64-AVX1-NEXT:    retq
975;
976; X64-AVX2-LABEL: test_reduce_v32i8:
977; X64-AVX2:       ## %bb.0:
978; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
979; X64-AVX2-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
980; X64-AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
981; X64-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
982; X64-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
983; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
984; X64-AVX2-NEXT:    vmovd %xmm0, %eax
985; X64-AVX2-NEXT:    xorb $127, %al
986; X64-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
987; X64-AVX2-NEXT:    vzeroupper
988; X64-AVX2-NEXT:    retq
989;
990; X64-AVX512-LABEL: test_reduce_v32i8:
991; X64-AVX512:       ## %bb.0:
992; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
993; X64-AVX512-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
994; X64-AVX512-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
995; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
996; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
997; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
998; X64-AVX512-NEXT:    vmovd %xmm0, %eax
999; X64-AVX512-NEXT:    xorb $127, %al
1000; X64-AVX512-NEXT:    ## kill: def $al killed $al killed $eax
1001; X64-AVX512-NEXT:    vzeroupper
1002; X64-AVX512-NEXT:    retq
1003  %1  = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1004  %2  = icmp sgt <32 x i8> %a0, %1
1005  %3  = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
1006  %4  = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1007  %5  = icmp sgt <32 x i8> %3, %4
1008  %6  = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4
1009  %7  = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1010  %8  = icmp sgt <32 x i8> %6, %7
1011  %9  = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7
1012  %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1013  %11 = icmp sgt <32 x i8> %9, %10
1014  %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10
1015  %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1016  %14 = icmp sgt <32 x i8> %12, %13
1017  %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13
1018  %16 = extractelement <32 x i8> %15, i32 0
1019  ret i8 %16
1020}
1021
1022;
1023; 512-bit Vectors
1024;
1025
1026define i64 @test_reduce_v8i64(<8 x i64> %a0) {
1027; X86-SSE2-LABEL: test_reduce_v8i64:
1028; X86-SSE2:       ## %bb.0:
1029; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
1030; X86-SSE2-NEXT:    movdqa %xmm2, %xmm5
1031; X86-SSE2-NEXT:    pxor %xmm4, %xmm5
1032; X86-SSE2-NEXT:    movdqa %xmm0, %xmm6
1033; X86-SSE2-NEXT:    pxor %xmm4, %xmm6
1034; X86-SSE2-NEXT:    movdqa %xmm6, %xmm7
1035; X86-SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
1036; X86-SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
1037; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2]
1038; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1039; X86-SSE2-NEXT:    pand %xmm5, %xmm6
1040; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1041; X86-SSE2-NEXT:    por %xmm6, %xmm5
1042; X86-SSE2-NEXT:    pand %xmm5, %xmm0
1043; X86-SSE2-NEXT:    pandn %xmm2, %xmm5
1044; X86-SSE2-NEXT:    por %xmm0, %xmm5
1045; X86-SSE2-NEXT:    movdqa %xmm3, %xmm0
1046; X86-SSE2-NEXT:    pxor %xmm4, %xmm0
1047; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
1048; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
1049; X86-SSE2-NEXT:    movdqa %xmm2, %xmm6
1050; X86-SSE2-NEXT:    pcmpgtd %xmm0, %xmm6
1051; X86-SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
1052; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
1053; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1054; X86-SSE2-NEXT:    pand %xmm0, %xmm2
1055; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
1056; X86-SSE2-NEXT:    por %xmm2, %xmm0
1057; X86-SSE2-NEXT:    pand %xmm0, %xmm1
1058; X86-SSE2-NEXT:    pandn %xmm3, %xmm0
1059; X86-SSE2-NEXT:    por %xmm1, %xmm0
1060; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1061; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
1062; X86-SSE2-NEXT:    movdqa %xmm5, %xmm2
1063; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
1064; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
1065; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
1066; X86-SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
1067; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,0,2,2]
1068; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1069; X86-SSE2-NEXT:    pand %xmm1, %xmm2
1070; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
1071; X86-SSE2-NEXT:    por %xmm2, %xmm1
1072; X86-SSE2-NEXT:    pand %xmm1, %xmm5
1073; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
1074; X86-SSE2-NEXT:    por %xmm5, %xmm1
1075; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1076; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
1077; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
1078; X86-SSE2-NEXT:    pxor %xmm0, %xmm4
1079; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
1080; X86-SSE2-NEXT:    pcmpgtd %xmm4, %xmm3
1081; X86-SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
1082; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
1083; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1084; X86-SSE2-NEXT:    pand %xmm2, %xmm4
1085; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
1086; X86-SSE2-NEXT:    por %xmm4, %xmm2
1087; X86-SSE2-NEXT:    pand %xmm2, %xmm1
1088; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
1089; X86-SSE2-NEXT:    por %xmm1, %xmm2
1090; X86-SSE2-NEXT:    movd %xmm2, %eax
1091; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
1092; X86-SSE2-NEXT:    movd %xmm0, %edx
1093; X86-SSE2-NEXT:    retl
1094;
1095; X86-SSE42-LABEL: test_reduce_v8i64:
1096; X86-SSE42:       ## %bb.0:
1097; X86-SSE42-NEXT:    movdqa %xmm0, %xmm4
1098; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
1099; X86-SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
1100; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
1101; X86-SSE42-NEXT:    movdqa %xmm4, %xmm0
1102; X86-SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
1103; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
1104; X86-SSE42-NEXT:    movapd %xmm2, %xmm0
1105; X86-SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
1106; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm3
1107; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
1108; X86-SSE42-NEXT:    movdqa %xmm3, %xmm0
1109; X86-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
1110; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm3, %xmm1
1111; X86-SSE42-NEXT:    movd %xmm1, %eax
1112; X86-SSE42-NEXT:    pextrd $1, %xmm1, %edx
1113; X86-SSE42-NEXT:    retl
1114;
1115; X86-AVX1-LABEL: test_reduce_v8i64:
1116; X86-AVX1:       ## %bb.0:
1117; X86-AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
1118; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
1119; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
1120; X86-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm5
1121; X86-AVX1-NEXT:    vblendvpd %xmm5, %xmm4, %xmm3, %xmm3
1122; X86-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1123; X86-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm1
1124; X86-AVX1-NEXT:    vblendvpd %xmm1, %xmm0, %xmm3, %xmm0
1125; X86-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1126; X86-AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
1127; X86-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1128; X86-AVX1-NEXT:    vmovd %xmm0, %eax
1129; X86-AVX1-NEXT:    vpextrd $1, %xmm0, %edx
1130; X86-AVX1-NEXT:    vzeroupper
1131; X86-AVX1-NEXT:    retl
1132;
1133; X86-AVX2-LABEL: test_reduce_v8i64:
1134; X86-AVX2:       ## %bb.0:
1135; X86-AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
1136; X86-AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1137; X86-AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
1138; X86-AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
1139; X86-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1140; X86-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1141; X86-AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
1142; X86-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1143; X86-AVX2-NEXT:    vmovd %xmm0, %eax
1144; X86-AVX2-NEXT:    vpextrd $1, %xmm0, %edx
1145; X86-AVX2-NEXT:    vzeroupper
1146; X86-AVX2-NEXT:    retl
1147;
1148; X64-SSE2-LABEL: test_reduce_v8i64:
1149; X64-SSE2:       ## %bb.0:
1150; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
1151; X64-SSE2-NEXT:    movdqa %xmm2, %xmm5
1152; X64-SSE2-NEXT:    pxor %xmm4, %xmm5
1153; X64-SSE2-NEXT:    movdqa %xmm0, %xmm6
1154; X64-SSE2-NEXT:    pxor %xmm4, %xmm6
1155; X64-SSE2-NEXT:    movdqa %xmm6, %xmm7
1156; X64-SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
1157; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
1158; X64-SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
1159; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1160; X64-SSE2-NEXT:    pand %xmm8, %xmm6
1161; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1162; X64-SSE2-NEXT:    por %xmm6, %xmm5
1163; X64-SSE2-NEXT:    pand %xmm5, %xmm0
1164; X64-SSE2-NEXT:    pandn %xmm2, %xmm5
1165; X64-SSE2-NEXT:    por %xmm0, %xmm5
1166; X64-SSE2-NEXT:    movdqa %xmm3, %xmm0
1167; X64-SSE2-NEXT:    pxor %xmm4, %xmm0
1168; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
1169; X64-SSE2-NEXT:    pxor %xmm4, %xmm2
1170; X64-SSE2-NEXT:    movdqa %xmm2, %xmm6
1171; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm6
1172; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1173; X64-SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
1174; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1175; X64-SSE2-NEXT:    pand %xmm7, %xmm0
1176; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
1177; X64-SSE2-NEXT:    por %xmm0, %xmm2
1178; X64-SSE2-NEXT:    pand %xmm2, %xmm1
1179; X64-SSE2-NEXT:    pandn %xmm3, %xmm2
1180; X64-SSE2-NEXT:    por %xmm1, %xmm2
1181; X64-SSE2-NEXT:    movdqa %xmm2, %xmm0
1182; X64-SSE2-NEXT:    pxor %xmm4, %xmm0
1183; X64-SSE2-NEXT:    movdqa %xmm5, %xmm1
1184; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
1185; X64-SSE2-NEXT:    movdqa %xmm1, %xmm3
1186; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm3
1187; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
1188; X64-SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
1189; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1190; X64-SSE2-NEXT:    pand %xmm6, %xmm0
1191; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
1192; X64-SSE2-NEXT:    por %xmm0, %xmm1
1193; X64-SSE2-NEXT:    pand %xmm1, %xmm5
1194; X64-SSE2-NEXT:    pandn %xmm2, %xmm1
1195; X64-SSE2-NEXT:    por %xmm5, %xmm1
1196; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1197; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
1198; X64-SSE2-NEXT:    pxor %xmm4, %xmm2
1199; X64-SSE2-NEXT:    pxor %xmm0, %xmm4
1200; X64-SSE2-NEXT:    movdqa %xmm2, %xmm3
1201; X64-SSE2-NEXT:    pcmpgtd %xmm4, %xmm3
1202; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
1203; X64-SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
1204; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1205; X64-SSE2-NEXT:    pand %xmm5, %xmm2
1206; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1207; X64-SSE2-NEXT:    por %xmm2, %xmm3
1208; X64-SSE2-NEXT:    pand %xmm3, %xmm1
1209; X64-SSE2-NEXT:    pandn %xmm0, %xmm3
1210; X64-SSE2-NEXT:    por %xmm1, %xmm3
1211; X64-SSE2-NEXT:    movq %xmm3, %rax
1212; X64-SSE2-NEXT:    retq
1213;
1214; X64-SSE42-LABEL: test_reduce_v8i64:
1215; X64-SSE42:       ## %bb.0:
1216; X64-SSE42-NEXT:    movdqa %xmm0, %xmm4
1217; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
1218; X64-SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
1219; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
1220; X64-SSE42-NEXT:    movdqa %xmm4, %xmm0
1221; X64-SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
1222; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
1223; X64-SSE42-NEXT:    movapd %xmm2, %xmm0
1224; X64-SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
1225; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm3
1226; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
1227; X64-SSE42-NEXT:    movdqa %xmm3, %xmm0
1228; X64-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
1229; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm3, %xmm1
1230; X64-SSE42-NEXT:    movq %xmm1, %rax
1231; X64-SSE42-NEXT:    retq
1232;
1233; X64-AVX1-LABEL: test_reduce_v8i64:
1234; X64-AVX1:       ## %bb.0:
1235; X64-AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
1236; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
1237; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
1238; X64-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm5
1239; X64-AVX1-NEXT:    vblendvpd %xmm5, %xmm4, %xmm3, %xmm3
1240; X64-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1241; X64-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm1
1242; X64-AVX1-NEXT:    vblendvpd %xmm1, %xmm0, %xmm3, %xmm0
1243; X64-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1244; X64-AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
1245; X64-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1246; X64-AVX1-NEXT:    vmovq %xmm0, %rax
1247; X64-AVX1-NEXT:    vzeroupper
1248; X64-AVX1-NEXT:    retq
1249;
1250; X64-AVX2-LABEL: test_reduce_v8i64:
1251; X64-AVX2:       ## %bb.0:
1252; X64-AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
1253; X64-AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1254; X64-AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
1255; X64-AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
1256; X64-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1257; X64-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1258; X64-AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
1259; X64-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1260; X64-AVX2-NEXT:    vmovq %xmm0, %rax
1261; X64-AVX2-NEXT:    vzeroupper
1262; X64-AVX2-NEXT:    retq
1263;
1264; X64-AVX512-LABEL: test_reduce_v8i64:
1265; X64-AVX512:       ## %bb.0:
1266; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1267; X64-AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
1268; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1269; X64-AVX512-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
1270; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1271; X64-AVX512-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
1272; X64-AVX512-NEXT:    vmovq %xmm0, %rax
1273; X64-AVX512-NEXT:    vzeroupper
1274; X64-AVX512-NEXT:    retq
1275  %1  = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1276  %2  = icmp sgt <8 x i64> %a0, %1
1277  %3  = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1
1278  %4  = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1279  %5  = icmp sgt <8 x i64> %3, %4
1280  %6  = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4
1281  %7  = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1282  %8  = icmp sgt <8 x i64> %6, %7
1283  %9  = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7
1284  %10 = extractelement <8 x i64> %9, i32 0
1285  ret i64 %10
1286}
1287
1288define i32 @test_reduce_v16i32(<16 x i32> %a0) {
1289; X86-SSE2-LABEL: test_reduce_v16i32:
1290; X86-SSE2:       ## %bb.0:
1291; X86-SSE2-NEXT:    movdqa %xmm1, %xmm4
1292; X86-SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
1293; X86-SSE2-NEXT:    pand %xmm4, %xmm1
1294; X86-SSE2-NEXT:    pandn %xmm3, %xmm4
1295; X86-SSE2-NEXT:    por %xmm1, %xmm4
1296; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1297; X86-SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
1298; X86-SSE2-NEXT:    pand %xmm1, %xmm0
1299; X86-SSE2-NEXT:    pandn %xmm2, %xmm1
1300; X86-SSE2-NEXT:    por %xmm0, %xmm1
1301; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
1302; X86-SSE2-NEXT:    pcmpgtd %xmm4, %xmm0
1303; X86-SSE2-NEXT:    pand %xmm0, %xmm1
1304; X86-SSE2-NEXT:    pandn %xmm4, %xmm0
1305; X86-SSE2-NEXT:    por %xmm1, %xmm0
1306; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1307; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1308; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
1309; X86-SSE2-NEXT:    pand %xmm2, %xmm0
1310; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
1311; X86-SSE2-NEXT:    por %xmm0, %xmm2
1312; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
1313; X86-SSE2-NEXT:    movdqa %xmm2, %xmm1
1314; X86-SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
1315; X86-SSE2-NEXT:    pand %xmm1, %xmm2
1316; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
1317; X86-SSE2-NEXT:    por %xmm2, %xmm1
1318; X86-SSE2-NEXT:    movd %xmm1, %eax
1319; X86-SSE2-NEXT:    retl
1320;
1321; X86-SSE42-LABEL: test_reduce_v16i32:
1322; X86-SSE42:       ## %bb.0:
1323; X86-SSE42-NEXT:    pmaxsd %xmm3, %xmm1
1324; X86-SSE42-NEXT:    pmaxsd %xmm2, %xmm1
1325; X86-SSE42-NEXT:    pmaxsd %xmm0, %xmm1
1326; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1327; X86-SSE42-NEXT:    pmaxsd %xmm1, %xmm0
1328; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1329; X86-SSE42-NEXT:    pmaxsd %xmm0, %xmm1
1330; X86-SSE42-NEXT:    movd %xmm1, %eax
1331; X86-SSE42-NEXT:    retl
1332;
1333; X86-AVX1-LABEL: test_reduce_v16i32:
1334; X86-AVX1:       ## %bb.0:
1335; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1336; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1337; X86-AVX1-NEXT:    vpmaxsd %xmm2, %xmm3, %xmm2
1338; X86-AVX1-NEXT:    vpmaxsd %xmm2, %xmm1, %xmm1
1339; X86-AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
1340; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1341; X86-AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
1342; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1343; X86-AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
1344; X86-AVX1-NEXT:    vmovd %xmm0, %eax
1345; X86-AVX1-NEXT:    vzeroupper
1346; X86-AVX1-NEXT:    retl
1347;
1348; X86-AVX2-LABEL: test_reduce_v16i32:
1349; X86-AVX2:       ## %bb.0:
1350; X86-AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
1351; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1352; X86-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
1353; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1354; X86-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
1355; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1356; X86-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
1357; X86-AVX2-NEXT:    vmovd %xmm0, %eax
1358; X86-AVX2-NEXT:    vzeroupper
1359; X86-AVX2-NEXT:    retl
1360;
1361; X64-SSE2-LABEL: test_reduce_v16i32:
1362; X64-SSE2:       ## %bb.0:
1363; X64-SSE2-NEXT:    movdqa %xmm1, %xmm4
1364; X64-SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
1365; X64-SSE2-NEXT:    pand %xmm4, %xmm1
1366; X64-SSE2-NEXT:    pandn %xmm3, %xmm4
1367; X64-SSE2-NEXT:    por %xmm1, %xmm4
1368; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
1369; X64-SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
1370; X64-SSE2-NEXT:    pand %xmm1, %xmm0
1371; X64-SSE2-NEXT:    pandn %xmm2, %xmm1
1372; X64-SSE2-NEXT:    por %xmm0, %xmm1
1373; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
1374; X64-SSE2-NEXT:    pcmpgtd %xmm4, %xmm0
1375; X64-SSE2-NEXT:    pand %xmm0, %xmm1
1376; X64-SSE2-NEXT:    pandn %xmm4, %xmm0
1377; X64-SSE2-NEXT:    por %xmm1, %xmm0
1378; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1379; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
1380; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
1381; X64-SSE2-NEXT:    pand %xmm2, %xmm0
1382; X64-SSE2-NEXT:    pandn %xmm1, %xmm2
1383; X64-SSE2-NEXT:    por %xmm0, %xmm2
1384; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
1385; X64-SSE2-NEXT:    movdqa %xmm2, %xmm1
1386; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
1387; X64-SSE2-NEXT:    pand %xmm1, %xmm2
1388; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
1389; X64-SSE2-NEXT:    por %xmm2, %xmm1
1390; X64-SSE2-NEXT:    movd %xmm1, %eax
1391; X64-SSE2-NEXT:    retq
1392;
1393; X64-SSE42-LABEL: test_reduce_v16i32:
1394; X64-SSE42:       ## %bb.0:
1395; X64-SSE42-NEXT:    pmaxsd %xmm3, %xmm1
1396; X64-SSE42-NEXT:    pmaxsd %xmm2, %xmm1
1397; X64-SSE42-NEXT:    pmaxsd %xmm0, %xmm1
1398; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1399; X64-SSE42-NEXT:    pmaxsd %xmm1, %xmm0
1400; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1401; X64-SSE42-NEXT:    pmaxsd %xmm0, %xmm1
1402; X64-SSE42-NEXT:    movd %xmm1, %eax
1403; X64-SSE42-NEXT:    retq
1404;
1405; X64-AVX1-LABEL: test_reduce_v16i32:
1406; X64-AVX1:       ## %bb.0:
1407; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1408; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1409; X64-AVX1-NEXT:    vpmaxsd %xmm2, %xmm3, %xmm2
1410; X64-AVX1-NEXT:    vpmaxsd %xmm2, %xmm1, %xmm1
1411; X64-AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
1412; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1413; X64-AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
1414; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1415; X64-AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
1416; X64-AVX1-NEXT:    vmovd %xmm0, %eax
1417; X64-AVX1-NEXT:    vzeroupper
1418; X64-AVX1-NEXT:    retq
1419;
1420; X64-AVX2-LABEL: test_reduce_v16i32:
1421; X64-AVX2:       ## %bb.0:
1422; X64-AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
1423; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1424; X64-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
1425; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1426; X64-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
1427; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1428; X64-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
1429; X64-AVX2-NEXT:    vmovd %xmm0, %eax
1430; X64-AVX2-NEXT:    vzeroupper
1431; X64-AVX2-NEXT:    retq
1432;
1433; X64-AVX512-LABEL: test_reduce_v16i32:
1434; X64-AVX512:       ## %bb.0:
1435; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1436; X64-AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
1437; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1438; X64-AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
1439; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1440; X64-AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
1441; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1442; X64-AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
1443; X64-AVX512-NEXT:    vmovd %xmm0, %eax
1444; X64-AVX512-NEXT:    vzeroupper
1445; X64-AVX512-NEXT:    retq
1446  %1  = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1447  %2  = icmp sgt <16 x i32> %a0, %1
1448  %3  = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1
1449  %4  = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1450  %5  = icmp sgt <16 x i32> %3, %4
1451  %6  = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4
1452  %7  = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1453  %8  = icmp sgt <16 x i32> %6, %7
1454  %9  = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7
1455  %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1456  %11 = icmp sgt <16 x i32> %9, %10
1457  %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10
1458  %13 = extractelement <16 x i32> %12, i32 0
1459  ret i32 %13
1460}
1461
1462define i16 @test_reduce_v32i16(<32 x i16> %a0) {
1463; X86-SSE2-LABEL: test_reduce_v32i16:
1464; X86-SSE2:       ## %bb.0:
1465; X86-SSE2-NEXT:    pmaxsw %xmm3, %xmm1
1466; X86-SSE2-NEXT:    pmaxsw %xmm2, %xmm1
1467; X86-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
1468; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1469; X86-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
1470; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1471; X86-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
1472; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
1473; X86-SSE2-NEXT:    psrld $16, %xmm0
1474; X86-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
1475; X86-SSE2-NEXT:    movd %xmm0, %eax
1476; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
1477; X86-SSE2-NEXT:    retl
1478;
1479; X86-SSE42-LABEL: test_reduce_v32i16:
1480; X86-SSE42:       ## %bb.0:
1481; X86-SSE42-NEXT:    pmaxsw %xmm3, %xmm1
1482; X86-SSE42-NEXT:    pmaxsw %xmm2, %xmm1
1483; X86-SSE42-NEXT:    pmaxsw %xmm0, %xmm1
1484; X86-SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
1485; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
1486; X86-SSE42-NEXT:    movd %xmm0, %eax
1487; X86-SSE42-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
1488; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
1489; X86-SSE42-NEXT:    retl
1490;
1491; X86-AVX1-LABEL: test_reduce_v32i16:
1492; X86-AVX1:       ## %bb.0:
1493; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1494; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1495; X86-AVX1-NEXT:    vpmaxsw %xmm2, %xmm3, %xmm2
1496; X86-AVX1-NEXT:    vpmaxsw %xmm2, %xmm1, %xmm1
1497; X86-AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
1498; X86-AVX1-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
1499; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1500; X86-AVX1-NEXT:    vmovd %xmm0, %eax
1501; X86-AVX1-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
1502; X86-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
1503; X86-AVX1-NEXT:    vzeroupper
1504; X86-AVX1-NEXT:    retl
1505;
1506; X86-AVX2-LABEL: test_reduce_v32i16:
1507; X86-AVX2:       ## %bb.0:
1508; X86-AVX2-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
1509; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1510; X86-AVX2-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
1511; X86-AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
1512; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1513; X86-AVX2-NEXT:    vmovd %xmm0, %eax
1514; X86-AVX2-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
1515; X86-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
1516; X86-AVX2-NEXT:    vzeroupper
1517; X86-AVX2-NEXT:    retl
1518;
1519; X64-SSE2-LABEL: test_reduce_v32i16:
1520; X64-SSE2:       ## %bb.0:
1521; X64-SSE2-NEXT:    pmaxsw %xmm3, %xmm1
1522; X64-SSE2-NEXT:    pmaxsw %xmm2, %xmm1
1523; X64-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
1524; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1525; X64-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
1526; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1527; X64-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
1528; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
1529; X64-SSE2-NEXT:    psrld $16, %xmm0
1530; X64-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
1531; X64-SSE2-NEXT:    movd %xmm0, %eax
1532; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
1533; X64-SSE2-NEXT:    retq
1534;
1535; X64-SSE42-LABEL: test_reduce_v32i16:
1536; X64-SSE42:       ## %bb.0:
1537; X64-SSE42-NEXT:    pmaxsw %xmm3, %xmm1
1538; X64-SSE42-NEXT:    pmaxsw %xmm2, %xmm1
1539; X64-SSE42-NEXT:    pmaxsw %xmm0, %xmm1
1540; X64-SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1541; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
1542; X64-SSE42-NEXT:    movd %xmm0, %eax
1543; X64-SSE42-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
1544; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
1545; X64-SSE42-NEXT:    retq
1546;
1547; X64-AVX1-LABEL: test_reduce_v32i16:
1548; X64-AVX1:       ## %bb.0:
1549; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1550; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1551; X64-AVX1-NEXT:    vpmaxsw %xmm2, %xmm3, %xmm2
1552; X64-AVX1-NEXT:    vpmaxsw %xmm2, %xmm1, %xmm1
1553; X64-AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
1554; X64-AVX1-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1555; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1556; X64-AVX1-NEXT:    vmovd %xmm0, %eax
1557; X64-AVX1-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
1558; X64-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
1559; X64-AVX1-NEXT:    vzeroupper
1560; X64-AVX1-NEXT:    retq
1561;
1562; X64-AVX2-LABEL: test_reduce_v32i16:
1563; X64-AVX2:       ## %bb.0:
1564; X64-AVX2-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
1565; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1566; X64-AVX2-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
1567; X64-AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1568; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1569; X64-AVX2-NEXT:    vmovd %xmm0, %eax
1570; X64-AVX2-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
1571; X64-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
1572; X64-AVX2-NEXT:    vzeroupper
1573; X64-AVX2-NEXT:    retq
1574;
1575; X64-AVX512-LABEL: test_reduce_v32i16:
1576; X64-AVX512:       ## %bb.0:
1577; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1578; X64-AVX512-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
1579; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1580; X64-AVX512-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
1581; X64-AVX512-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1582; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
1583; X64-AVX512-NEXT:    vmovd %xmm0, %eax
1584; X64-AVX512-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
1585; X64-AVX512-NEXT:    ## kill: def $ax killed $ax killed $eax
1586; X64-AVX512-NEXT:    vzeroupper
1587; X64-AVX512-NEXT:    retq
1588  %1  = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1589  %2  = icmp sgt <32 x i16> %a0, %1
1590  %3  = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1
1591  %4  = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1592  %5  = icmp sgt <32 x i16> %3, %4
1593  %6  = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4
1594  %7  = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1595  %8  = icmp sgt <32 x i16> %6, %7
1596  %9  = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7
1597  %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1598  %11 = icmp sgt <32 x i16> %9, %10
1599  %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10
1600  %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1601  %14 = icmp sgt <32 x i16> %12, %13
1602  %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13
1603  %16 = extractelement <32 x i16> %15, i32 0
1604  ret i16 %16
1605}
1606
1607define i8 @test_reduce_v64i8(<64 x i8> %a0) {
1608; X86-SSE2-LABEL: test_reduce_v64i8:
1609; X86-SSE2:       ## %bb.0:
1610; X86-SSE2-NEXT:    movdqa %xmm1, %xmm4
1611; X86-SSE2-NEXT:    pcmpgtb %xmm3, %xmm4
1612; X86-SSE2-NEXT:    pand %xmm4, %xmm1
1613; X86-SSE2-NEXT:    pandn %xmm3, %xmm4
1614; X86-SSE2-NEXT:    por %xmm1, %xmm4
1615; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1616; X86-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
1617; X86-SSE2-NEXT:    pand %xmm1, %xmm0
1618; X86-SSE2-NEXT:    pandn %xmm2, %xmm1
1619; X86-SSE2-NEXT:    por %xmm0, %xmm1
1620; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
1621; X86-SSE2-NEXT:    pcmpgtb %xmm4, %xmm0
1622; X86-SSE2-NEXT:    pand %xmm0, %xmm1
1623; X86-SSE2-NEXT:    pandn %xmm4, %xmm0
1624; X86-SSE2-NEXT:    por %xmm1, %xmm0
1625; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1626; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1627; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
1628; X86-SSE2-NEXT:    pand %xmm2, %xmm0
1629; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
1630; X86-SSE2-NEXT:    por %xmm0, %xmm2
1631; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
1632; X86-SSE2-NEXT:    movdqa %xmm2, %xmm1
1633; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
1634; X86-SSE2-NEXT:    pand %xmm1, %xmm2
1635; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
1636; X86-SSE2-NEXT:    por %xmm2, %xmm1
1637; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
1638; X86-SSE2-NEXT:    psrld $16, %xmm0
1639; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
1640; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
1641; X86-SSE2-NEXT:    pand %xmm2, %xmm1
1642; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
1643; X86-SSE2-NEXT:    por %xmm1, %xmm2
1644; X86-SSE2-NEXT:    movdqa %xmm2, %xmm0
1645; X86-SSE2-NEXT:    psrlw $8, %xmm0
1646; X86-SSE2-NEXT:    movdqa %xmm2, %xmm1
1647; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
1648; X86-SSE2-NEXT:    pand %xmm1, %xmm2
1649; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
1650; X86-SSE2-NEXT:    por %xmm2, %xmm1
1651; X86-SSE2-NEXT:    movd %xmm1, %eax
1652; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
1653; X86-SSE2-NEXT:    retl
1654;
1655; X86-SSE42-LABEL: test_reduce_v64i8:
1656; X86-SSE42:       ## %bb.0:
1657; X86-SSE42-NEXT:    pmaxsb %xmm3, %xmm1
1658; X86-SSE42-NEXT:    pmaxsb %xmm2, %xmm1
1659; X86-SSE42-NEXT:    pmaxsb %xmm0, %xmm1
1660; X86-SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
1661; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
1662; X86-SSE42-NEXT:    psrlw $8, %xmm0
1663; X86-SSE42-NEXT:    pminub %xmm1, %xmm0
1664; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
1665; X86-SSE42-NEXT:    movd %xmm0, %eax
1666; X86-SSE42-NEXT:    xorb $127, %al
1667; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
1668; X86-SSE42-NEXT:    retl
1669;
1670; X86-AVX1-LABEL: test_reduce_v64i8:
1671; X86-AVX1:       ## %bb.0:
1672; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1673; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1674; X86-AVX1-NEXT:    vpmaxsb %xmm2, %xmm3, %xmm2
1675; X86-AVX1-NEXT:    vpmaxsb %xmm2, %xmm1, %xmm1
1676; X86-AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
1677; X86-AVX1-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
1678; X86-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
1679; X86-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1680; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1681; X86-AVX1-NEXT:    vmovd %xmm0, %eax
1682; X86-AVX1-NEXT:    xorb $127, %al
1683; X86-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
1684; X86-AVX1-NEXT:    vzeroupper
1685; X86-AVX1-NEXT:    retl
1686;
1687; X86-AVX2-LABEL: test_reduce_v64i8:
1688; X86-AVX2:       ## %bb.0:
1689; X86-AVX2-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
1690; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1691; X86-AVX2-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
1692; X86-AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
1693; X86-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
1694; X86-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1695; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1696; X86-AVX2-NEXT:    vmovd %xmm0, %eax
1697; X86-AVX2-NEXT:    xorb $127, %al
1698; X86-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
1699; X86-AVX2-NEXT:    vzeroupper
1700; X86-AVX2-NEXT:    retl
1701;
1702; X64-SSE2-LABEL: test_reduce_v64i8:
1703; X64-SSE2:       ## %bb.0:
1704; X64-SSE2-NEXT:    movdqa %xmm1, %xmm4
1705; X64-SSE2-NEXT:    pcmpgtb %xmm3, %xmm4
1706; X64-SSE2-NEXT:    pand %xmm4, %xmm1
1707; X64-SSE2-NEXT:    pandn %xmm3, %xmm4
1708; X64-SSE2-NEXT:    por %xmm1, %xmm4
1709; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
1710; X64-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
1711; X64-SSE2-NEXT:    pand %xmm1, %xmm0
1712; X64-SSE2-NEXT:    pandn %xmm2, %xmm1
1713; X64-SSE2-NEXT:    por %xmm0, %xmm1
1714; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
1715; X64-SSE2-NEXT:    pcmpgtb %xmm4, %xmm0
1716; X64-SSE2-NEXT:    pand %xmm0, %xmm1
1717; X64-SSE2-NEXT:    pandn %xmm4, %xmm0
1718; X64-SSE2-NEXT:    por %xmm1, %xmm0
1719; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1720; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
1721; X64-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
1722; X64-SSE2-NEXT:    pand %xmm2, %xmm0
1723; X64-SSE2-NEXT:    pandn %xmm1, %xmm2
1724; X64-SSE2-NEXT:    por %xmm0, %xmm2
1725; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
1726; X64-SSE2-NEXT:    movdqa %xmm2, %xmm1
1727; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
1728; X64-SSE2-NEXT:    pand %xmm1, %xmm2
1729; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
1730; X64-SSE2-NEXT:    por %xmm2, %xmm1
1731; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
1732; X64-SSE2-NEXT:    psrld $16, %xmm0
1733; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
1734; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
1735; X64-SSE2-NEXT:    pand %xmm2, %xmm1
1736; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
1737; X64-SSE2-NEXT:    por %xmm1, %xmm2
1738; X64-SSE2-NEXT:    movdqa %xmm2, %xmm0
1739; X64-SSE2-NEXT:    psrlw $8, %xmm0
1740; X64-SSE2-NEXT:    movdqa %xmm2, %xmm1
1741; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
1742; X64-SSE2-NEXT:    pand %xmm1, %xmm2
1743; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
1744; X64-SSE2-NEXT:    por %xmm2, %xmm1
1745; X64-SSE2-NEXT:    movd %xmm1, %eax
1746; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
1747; X64-SSE2-NEXT:    retq
1748;
1749; X64-SSE42-LABEL: test_reduce_v64i8:
1750; X64-SSE42:       ## %bb.0:
1751; X64-SSE42-NEXT:    pmaxsb %xmm3, %xmm1
1752; X64-SSE42-NEXT:    pmaxsb %xmm2, %xmm1
1753; X64-SSE42-NEXT:    pmaxsb %xmm0, %xmm1
1754; X64-SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1755; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
1756; X64-SSE42-NEXT:    psrlw $8, %xmm0
1757; X64-SSE42-NEXT:    pminub %xmm1, %xmm0
1758; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
1759; X64-SSE42-NEXT:    movd %xmm0, %eax
1760; X64-SSE42-NEXT:    xorb $127, %al
1761; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
1762; X64-SSE42-NEXT:    retq
1763;
1764; X64-AVX1-LABEL: test_reduce_v64i8:
1765; X64-AVX1:       ## %bb.0:
1766; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1767; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1768; X64-AVX1-NEXT:    vpmaxsb %xmm2, %xmm3, %xmm2
1769; X64-AVX1-NEXT:    vpmaxsb %xmm2, %xmm1, %xmm1
1770; X64-AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
1771; X64-AVX1-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1772; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
1773; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1774; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1775; X64-AVX1-NEXT:    vmovd %xmm0, %eax
1776; X64-AVX1-NEXT:    xorb $127, %al
1777; X64-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
1778; X64-AVX1-NEXT:    vzeroupper
1779; X64-AVX1-NEXT:    retq
1780;
1781; X64-AVX2-LABEL: test_reduce_v64i8:
1782; X64-AVX2:       ## %bb.0:
1783; X64-AVX2-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
1784; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1785; X64-AVX2-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
1786; X64-AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1787; X64-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
1788; X64-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1789; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1790; X64-AVX2-NEXT:    vmovd %xmm0, %eax
1791; X64-AVX2-NEXT:    xorb $127, %al
1792; X64-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
1793; X64-AVX2-NEXT:    vzeroupper
1794; X64-AVX2-NEXT:    retq
1795;
1796; X64-AVX512-LABEL: test_reduce_v64i8:
1797; X64-AVX512:       ## %bb.0:
1798; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1799; X64-AVX512-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
1800; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1801; X64-AVX512-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
1802; X64-AVX512-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1803; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
1804; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1805; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
1806; X64-AVX512-NEXT:    vmovd %xmm0, %eax
1807; X64-AVX512-NEXT:    xorb $127, %al
1808; X64-AVX512-NEXT:    ## kill: def $al killed $al killed $eax
1809; X64-AVX512-NEXT:    vzeroupper
1810; X64-AVX512-NEXT:    retq
1811  %1  = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1812  %2  = icmp sgt <64 x i8> %a0, %1
1813  %3  = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
1814  %4  = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1815  %5  = icmp sgt <64 x i8> %3, %4
1816  %6  = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4
1817  %7  = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1818  %8  = icmp sgt <64 x i8> %6, %7
1819  %9  = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7
1820  %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1821  %11 = icmp sgt <64 x i8> %9, %10
1822  %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10
1823  %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1824  %14 = icmp sgt <64 x i8> %12, %13
1825  %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13
1826  %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1827  %17 = icmp sgt <64 x i8> %15, %16
1828  %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16
1829  %19 = extractelement <64 x i8> %18, i32 0
1830  ret i8 %19
1831}
1832
1833;
1834; Partial Vector Reductions
1835;
1836
1837define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
1838; X86-SSE2-LABEL: test_reduce_v16i16_v8i16:
1839; X86-SSE2:       ## %bb.0:
1840; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1841; X86-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
1842; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
1843; X86-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
1844; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1845; X86-SSE2-NEXT:    psrld $16, %xmm1
1846; X86-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
1847; X86-SSE2-NEXT:    movd %xmm1, %eax
1848; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
1849; X86-SSE2-NEXT:    retl
1850;
1851; X86-SSE42-LABEL: test_reduce_v16i16_v8i16:
1852; X86-SSE42:       ## %bb.0:
1853; X86-SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1854; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
1855; X86-SSE42-NEXT:    movd %xmm0, %eax
1856; X86-SSE42-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
1857; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
1858; X86-SSE42-NEXT:    retl
1859;
1860; X86-AVX-LABEL: test_reduce_v16i16_v8i16:
1861; X86-AVX:       ## %bb.0:
1862; X86-AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
1863; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
1864; X86-AVX-NEXT:    vmovd %xmm0, %eax
1865; X86-AVX-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
1866; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
1867; X86-AVX-NEXT:    vzeroupper
1868; X86-AVX-NEXT:    retl
1869;
1870; X64-SSE2-LABEL: test_reduce_v16i16_v8i16:
1871; X64-SSE2:       ## %bb.0:
1872; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1873; X64-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
1874; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
1875; X64-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
1876; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
1877; X64-SSE2-NEXT:    psrld $16, %xmm1
1878; X64-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
1879; X64-SSE2-NEXT:    movd %xmm1, %eax
1880; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
1881; X64-SSE2-NEXT:    retq
1882;
1883; X64-SSE42-LABEL: test_reduce_v16i16_v8i16:
1884; X64-SSE42:       ## %bb.0:
1885; X64-SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1886; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
1887; X64-SSE42-NEXT:    movd %xmm0, %eax
1888; X64-SSE42-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
1889; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
1890; X64-SSE42-NEXT:    retq
1891;
1892; X64-AVX-LABEL: test_reduce_v16i16_v8i16:
1893; X64-AVX:       ## %bb.0:
1894; X64-AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1895; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
1896; X64-AVX-NEXT:    vmovd %xmm0, %eax
1897; X64-AVX-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
1898; X64-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
1899; X64-AVX-NEXT:    vzeroupper
1900; X64-AVX-NEXT:    retq
1901  %1  = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1902  %2  = icmp sgt <16 x i16> %a0, %1
1903  %3  = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1
1904  %4  = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1905  %5  = icmp sgt <16 x i16> %3, %4
1906  %6  = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4
1907  %7  = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1908  %8  = icmp sgt <16 x i16> %6, %7
1909  %9  = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7
1910  %10 = extractelement <16 x i16> %9, i32 0
1911  ret i16 %10
1912}
1913
1914define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
1915; X86-SSE2-LABEL: test_reduce_v32i16_v8i16:
1916; X86-SSE2:       ## %bb.0:
1917; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1918; X86-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
1919; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
1920; X86-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
1921; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1922; X86-SSE2-NEXT:    psrld $16, %xmm1
1923; X86-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
1924; X86-SSE2-NEXT:    movd %xmm1, %eax
1925; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
1926; X86-SSE2-NEXT:    retl
1927;
1928; X86-SSE42-LABEL: test_reduce_v32i16_v8i16:
1929; X86-SSE42:       ## %bb.0:
1930; X86-SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1931; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
1932; X86-SSE42-NEXT:    movd %xmm0, %eax
1933; X86-SSE42-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
1934; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
1935; X86-SSE42-NEXT:    retl
1936;
1937; X86-AVX-LABEL: test_reduce_v32i16_v8i16:
1938; X86-AVX:       ## %bb.0:
1939; X86-AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
1940; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
1941; X86-AVX-NEXT:    vmovd %xmm0, %eax
1942; X86-AVX-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
1943; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
1944; X86-AVX-NEXT:    vzeroupper
1945; X86-AVX-NEXT:    retl
1946;
1947; X64-SSE2-LABEL: test_reduce_v32i16_v8i16:
1948; X64-SSE2:       ## %bb.0:
1949; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1950; X64-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
1951; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
1952; X64-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
1953; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
1954; X64-SSE2-NEXT:    psrld $16, %xmm1
1955; X64-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
1956; X64-SSE2-NEXT:    movd %xmm1, %eax
1957; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
1958; X64-SSE2-NEXT:    retq
1959;
1960; X64-SSE42-LABEL: test_reduce_v32i16_v8i16:
1961; X64-SSE42:       ## %bb.0:
1962; X64-SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1963; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
1964; X64-SSE42-NEXT:    movd %xmm0, %eax
1965; X64-SSE42-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
1966; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
1967; X64-SSE42-NEXT:    retq
1968;
1969; X64-AVX-LABEL: test_reduce_v32i16_v8i16:
1970; X64-AVX:       ## %bb.0:
1971; X64-AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1972; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
1973; X64-AVX-NEXT:    vmovd %xmm0, %eax
1974; X64-AVX-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
1975; X64-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
1976; X64-AVX-NEXT:    vzeroupper
1977; X64-AVX-NEXT:    retq
1978  %1  = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1979  %2  = icmp sgt <32 x i16> %a0, %1
1980  %3  = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1
1981  %4  = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1982  %5  = icmp sgt <32 x i16> %3, %4
1983  %6  = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4
1984  %7  = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1985  %8  = icmp sgt <32 x i16> %6, %7
1986  %9  = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7
1987  %10 = extractelement <32 x i16> %9, i32 0
1988  ret i16 %10
1989}
1990
1991define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
1992; X86-SSE2-LABEL: test_reduce_v32i8_v16i8:
1993; X86-SSE2:       ## %bb.0:
1994; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1995; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1996; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
1997; X86-SSE2-NEXT:    pand %xmm2, %xmm0
1998; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
1999; X86-SSE2-NEXT:    por %xmm0, %xmm2
2000; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
2001; X86-SSE2-NEXT:    movdqa %xmm2, %xmm1
2002; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
2003; X86-SSE2-NEXT:    pand %xmm1, %xmm2
2004; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
2005; X86-SSE2-NEXT:    por %xmm2, %xmm1
2006; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
2007; X86-SSE2-NEXT:    psrld $16, %xmm0
2008; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
2009; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
2010; X86-SSE2-NEXT:    pand %xmm2, %xmm1
2011; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
2012; X86-SSE2-NEXT:    por %xmm1, %xmm2
2013; X86-SSE2-NEXT:    movdqa %xmm2, %xmm0
2014; X86-SSE2-NEXT:    psrlw $8, %xmm0
2015; X86-SSE2-NEXT:    movdqa %xmm2, %xmm1
2016; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
2017; X86-SSE2-NEXT:    pand %xmm1, %xmm2
2018; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
2019; X86-SSE2-NEXT:    por %xmm2, %xmm1
2020; X86-SSE2-NEXT:    movd %xmm1, %eax
2021; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
2022; X86-SSE2-NEXT:    retl
2023;
2024; X86-SSE42-LABEL: test_reduce_v32i8_v16i8:
2025; X86-SSE42:       ## %bb.0:
2026; X86-SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
2027; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
2028; X86-SSE42-NEXT:    psrlw $8, %xmm1
2029; X86-SSE42-NEXT:    pminub %xmm0, %xmm1
2030; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
2031; X86-SSE42-NEXT:    movd %xmm0, %eax
2032; X86-SSE42-NEXT:    xorb $127, %al
2033; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
2034; X86-SSE42-NEXT:    retl
2035;
2036; X86-AVX-LABEL: test_reduce_v32i8_v16i8:
2037; X86-AVX:       ## %bb.0:
2038; X86-AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
2039; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
2040; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
2041; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
2042; X86-AVX-NEXT:    vmovd %xmm0, %eax
2043; X86-AVX-NEXT:    xorb $127, %al
2044; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
2045; X86-AVX-NEXT:    vzeroupper
2046; X86-AVX-NEXT:    retl
2047;
2048; X64-SSE2-LABEL: test_reduce_v32i8_v16i8:
2049; X64-SSE2:       ## %bb.0:
2050; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2051; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
2052; X64-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
2053; X64-SSE2-NEXT:    pand %xmm2, %xmm0
2054; X64-SSE2-NEXT:    pandn %xmm1, %xmm2
2055; X64-SSE2-NEXT:    por %xmm0, %xmm2
2056; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
2057; X64-SSE2-NEXT:    movdqa %xmm2, %xmm1
2058; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
2059; X64-SSE2-NEXT:    pand %xmm1, %xmm2
2060; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
2061; X64-SSE2-NEXT:    por %xmm2, %xmm1
2062; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
2063; X64-SSE2-NEXT:    psrld $16, %xmm0
2064; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
2065; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
2066; X64-SSE2-NEXT:    pand %xmm2, %xmm1
2067; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
2068; X64-SSE2-NEXT:    por %xmm1, %xmm2
2069; X64-SSE2-NEXT:    movdqa %xmm2, %xmm0
2070; X64-SSE2-NEXT:    psrlw $8, %xmm0
2071; X64-SSE2-NEXT:    movdqa %xmm2, %xmm1
2072; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
2073; X64-SSE2-NEXT:    pand %xmm1, %xmm2
2074; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
2075; X64-SSE2-NEXT:    por %xmm2, %xmm1
2076; X64-SSE2-NEXT:    movd %xmm1, %eax
2077; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
2078; X64-SSE2-NEXT:    retq
2079;
2080; X64-SSE42-LABEL: test_reduce_v32i8_v16i8:
2081; X64-SSE42:       ## %bb.0:
2082; X64-SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2083; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
2084; X64-SSE42-NEXT:    psrlw $8, %xmm1
2085; X64-SSE42-NEXT:    pminub %xmm0, %xmm1
2086; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
2087; X64-SSE42-NEXT:    movd %xmm0, %eax
2088; X64-SSE42-NEXT:    xorb $127, %al
2089; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
2090; X64-SSE42-NEXT:    retq
2091;
2092; X64-AVX-LABEL: test_reduce_v32i8_v16i8:
2093; X64-AVX:       ## %bb.0:
2094; X64-AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2095; X64-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
2096; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
2097; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
2098; X64-AVX-NEXT:    vmovd %xmm0, %eax
2099; X64-AVX-NEXT:    xorb $127, %al
2100; X64-AVX-NEXT:    ## kill: def $al killed $al killed $eax
2101; X64-AVX-NEXT:    vzeroupper
2102; X64-AVX-NEXT:    retq
2103  %1  = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2104  %2  = icmp sgt <32 x i8> %a0, %1
2105  %3  = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
2106  %4  = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2107  %5  = icmp sgt <32 x i8> %3, %4
2108  %6  = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4
2109  %7  = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2110  %8  = icmp sgt <32 x i8> %6, %7
2111  %9  = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7
2112  %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2113  %11 = icmp sgt <32 x i8> %9, %10
2114  %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10
2115  %13 = extractelement <32 x i8> %12, i32 0
2116  ret i8 %13
2117}
2118
2119define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
2120; X86-SSE2-LABEL: test_reduce_v64i8_v16i8:
2121; X86-SSE2:       ## %bb.0:
2122; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2123; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
2124; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
2125; X86-SSE2-NEXT:    pand %xmm2, %xmm0
2126; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
2127; X86-SSE2-NEXT:    por %xmm0, %xmm2
2128; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
2129; X86-SSE2-NEXT:    movdqa %xmm2, %xmm1
2130; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
2131; X86-SSE2-NEXT:    pand %xmm1, %xmm2
2132; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
2133; X86-SSE2-NEXT:    por %xmm2, %xmm1
2134; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
2135; X86-SSE2-NEXT:    psrld $16, %xmm0
2136; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
2137; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
2138; X86-SSE2-NEXT:    pand %xmm2, %xmm1
2139; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
2140; X86-SSE2-NEXT:    por %xmm1, %xmm2
2141; X86-SSE2-NEXT:    movdqa %xmm2, %xmm0
2142; X86-SSE2-NEXT:    psrlw $8, %xmm0
2143; X86-SSE2-NEXT:    movdqa %xmm2, %xmm1
2144; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
2145; X86-SSE2-NEXT:    pand %xmm1, %xmm2
2146; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
2147; X86-SSE2-NEXT:    por %xmm2, %xmm1
2148; X86-SSE2-NEXT:    movd %xmm1, %eax
2149; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
2150; X86-SSE2-NEXT:    retl
2151;
2152; X86-SSE42-LABEL: test_reduce_v64i8_v16i8:
2153; X86-SSE42:       ## %bb.0:
2154; X86-SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
2155; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
2156; X86-SSE42-NEXT:    psrlw $8, %xmm1
2157; X86-SSE42-NEXT:    pminub %xmm0, %xmm1
2158; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
2159; X86-SSE42-NEXT:    movd %xmm0, %eax
2160; X86-SSE42-NEXT:    xorb $127, %al
2161; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
2162; X86-SSE42-NEXT:    retl
2163;
2164; X86-AVX-LABEL: test_reduce_v64i8_v16i8:
2165; X86-AVX:       ## %bb.0:
2166; X86-AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
2167; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
2168; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
2169; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
2170; X86-AVX-NEXT:    vmovd %xmm0, %eax
2171; X86-AVX-NEXT:    xorb $127, %al
2172; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
2173; X86-AVX-NEXT:    vzeroupper
2174; X86-AVX-NEXT:    retl
2175;
2176; X64-SSE2-LABEL: test_reduce_v64i8_v16i8:
2177; X64-SSE2:       ## %bb.0:
2178; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2179; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
2180; X64-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
2181; X64-SSE2-NEXT:    pand %xmm2, %xmm0
2182; X64-SSE2-NEXT:    pandn %xmm1, %xmm2
2183; X64-SSE2-NEXT:    por %xmm0, %xmm2
2184; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
2185; X64-SSE2-NEXT:    movdqa %xmm2, %xmm1
2186; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
2187; X64-SSE2-NEXT:    pand %xmm1, %xmm2
2188; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
2189; X64-SSE2-NEXT:    por %xmm2, %xmm1
2190; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
2191; X64-SSE2-NEXT:    psrld $16, %xmm0
2192; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
2193; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
2194; X64-SSE2-NEXT:    pand %xmm2, %xmm1
2195; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
2196; X64-SSE2-NEXT:    por %xmm1, %xmm2
2197; X64-SSE2-NEXT:    movdqa %xmm2, %xmm0
2198; X64-SSE2-NEXT:    psrlw $8, %xmm0
2199; X64-SSE2-NEXT:    movdqa %xmm2, %xmm1
2200; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
2201; X64-SSE2-NEXT:    pand %xmm1, %xmm2
2202; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
2203; X64-SSE2-NEXT:    por %xmm2, %xmm1
2204; X64-SSE2-NEXT:    movd %xmm1, %eax
2205; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
2206; X64-SSE2-NEXT:    retq
2207;
2208; X64-SSE42-LABEL: test_reduce_v64i8_v16i8:
2209; X64-SSE42:       ## %bb.0:
2210; X64-SSE42-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2211; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
2212; X64-SSE42-NEXT:    psrlw $8, %xmm1
2213; X64-SSE42-NEXT:    pminub %xmm0, %xmm1
2214; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
2215; X64-SSE42-NEXT:    movd %xmm0, %eax
2216; X64-SSE42-NEXT:    xorb $127, %al
2217; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
2218; X64-SSE42-NEXT:    retq
2219;
2220; X64-AVX-LABEL: test_reduce_v64i8_v16i8:
2221; X64-AVX:       ## %bb.0:
2222; X64-AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2223; X64-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
2224; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
2225; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
2226; X64-AVX-NEXT:    vmovd %xmm0, %eax
2227; X64-AVX-NEXT:    xorb $127, %al
2228; X64-AVX-NEXT:    ## kill: def $al killed $al killed $eax
2229; X64-AVX-NEXT:    vzeroupper
2230; X64-AVX-NEXT:    retq
2231  %1  = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2232  %2  = icmp sgt <64 x i8> %a0, %1
2233  %3  = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
2234  %4  = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2235  %5  = icmp sgt <64 x i8> %3, %4
2236  %6  = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4
2237  %7  = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2238  %8  = icmp sgt <64 x i8> %6, %7
2239  %9  = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7
2240  %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2241  %11 = icmp sgt <64 x i8> %9, %10
2242  %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10
2243  %13 = extractelement <64 x i8> %12, i32 0
2244  ret i8 %13
2245}
2246