1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=ALL,SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=ALL,SSE,SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=ALL,AVX512
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=ALL,AVX512
8
9;
10; vXf32
11;
12
13define float @test_v1f32(<1 x float> %a0) {
14; ALL-LABEL: test_v1f32:
15; ALL:       # %bb.0:
16; ALL-NEXT:    retq
17  %1 = call nnan float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a0)
18  ret float %1
19}
20
21define float @test_v2f32(<2 x float> %a0) {
22; SSE2-LABEL: test_v2f32:
23; SSE2:       # %bb.0:
24; SSE2-NEXT:    movaps %xmm0, %xmm1
25; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
26; SSE2-NEXT:    minss %xmm1, %xmm0
27; SSE2-NEXT:    retq
28;
29; SSE41-LABEL: test_v2f32:
30; SSE41:       # %bb.0:
31; SSE41-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
32; SSE41-NEXT:    minss %xmm1, %xmm0
33; SSE41-NEXT:    retq
34;
35; AVX-LABEL: test_v2f32:
36; AVX:       # %bb.0:
37; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
38; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
39; AVX-NEXT:    retq
40;
41; AVX512-LABEL: test_v2f32:
42; AVX512:       # %bb.0:
43; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
44; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm0
45; AVX512-NEXT:    retq
46  %1 = call nnan float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a0)
47  ret float %1
48}
49
50define float @test_v3f32(<3 x float> %a0) {
51; SSE2-LABEL: test_v3f32:
52; SSE2:       # %bb.0:
53; SSE2-NEXT:    movaps %xmm0, %xmm2
54; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1]
55; SSE2-NEXT:    movaps %xmm0, %xmm1
56; SSE2-NEXT:    minss %xmm2, %xmm1
57; SSE2-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
58; SSE2-NEXT:    minss %xmm0, %xmm1
59; SSE2-NEXT:    movaps %xmm1, %xmm0
60; SSE2-NEXT:    retq
61;
62; SSE41-LABEL: test_v3f32:
63; SSE41:       # %bb.0:
64; SSE41-NEXT:    movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
65; SSE41-NEXT:    movaps %xmm0, %xmm1
66; SSE41-NEXT:    minss %xmm2, %xmm1
67; SSE41-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
68; SSE41-NEXT:    minss %xmm0, %xmm1
69; SSE41-NEXT:    movaps %xmm1, %xmm0
70; SSE41-NEXT:    retq
71;
72; AVX-LABEL: test_v3f32:
73; AVX:       # %bb.0:
74; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
75; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm1
76; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
77; AVX-NEXT:    vminss %xmm0, %xmm1, %xmm0
78; AVX-NEXT:    retq
79;
80; AVX512-LABEL: test_v3f32:
81; AVX512:       # %bb.0:
82; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
83; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm1
84; AVX512-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
85; AVX512-NEXT:    vminss %xmm0, %xmm1, %xmm0
86; AVX512-NEXT:    retq
87  %1 = call nnan float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a0)
88  ret float %1
89}
90
91define float @test_v4f32(<4 x float> %a0) {
92; SSE2-LABEL: test_v4f32:
93; SSE2:       # %bb.0:
94; SSE2-NEXT:    movaps %xmm0, %xmm1
95; SSE2-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
96; SSE2-NEXT:    minps %xmm1, %xmm0
97; SSE2-NEXT:    movaps %xmm0, %xmm1
98; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
99; SSE2-NEXT:    minss %xmm1, %xmm0
100; SSE2-NEXT:    retq
101;
102; SSE41-LABEL: test_v4f32:
103; SSE41:       # %bb.0:
104; SSE41-NEXT:    movaps %xmm0, %xmm1
105; SSE41-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
106; SSE41-NEXT:    minps %xmm1, %xmm0
107; SSE41-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
108; SSE41-NEXT:    minss %xmm1, %xmm0
109; SSE41-NEXT:    retq
110;
111; AVX-LABEL: test_v4f32:
112; AVX:       # %bb.0:
113; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
114; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
115; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
116; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
117; AVX-NEXT:    retq
118;
119; AVX512-LABEL: test_v4f32:
120; AVX512:       # %bb.0:
121; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
122; AVX512-NEXT:    vminps %xmm1, %xmm0, %xmm0
123; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
124; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm0
125; AVX512-NEXT:    retq
126  %1 = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a0)
127  ret float %1
128}
129
130define float @test_v8f32(<8 x float> %a0) {
131; SSE2-LABEL: test_v8f32:
132; SSE2:       # %bb.0:
133; SSE2-NEXT:    minps %xmm1, %xmm0
134; SSE2-NEXT:    movaps %xmm0, %xmm1
135; SSE2-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
136; SSE2-NEXT:    minps %xmm1, %xmm0
137; SSE2-NEXT:    movaps %xmm0, %xmm1
138; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
139; SSE2-NEXT:    minss %xmm1, %xmm0
140; SSE2-NEXT:    retq
141;
142; SSE41-LABEL: test_v8f32:
143; SSE41:       # %bb.0:
144; SSE41-NEXT:    minps %xmm1, %xmm0
145; SSE41-NEXT:    movaps %xmm0, %xmm1
146; SSE41-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
147; SSE41-NEXT:    minps %xmm1, %xmm0
148; SSE41-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
149; SSE41-NEXT:    minss %xmm1, %xmm0
150; SSE41-NEXT:    retq
151;
152; AVX-LABEL: test_v8f32:
153; AVX:       # %bb.0:
154; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
155; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
156; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
157; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
158; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
159; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
160; AVX-NEXT:    vzeroupper
161; AVX-NEXT:    retq
162;
163; AVX512-LABEL: test_v8f32:
164; AVX512:       # %bb.0:
165; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
166; AVX512-NEXT:    vminps %xmm1, %xmm0, %xmm0
167; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
168; AVX512-NEXT:    vminps %xmm1, %xmm0, %xmm0
169; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
170; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm0
171; AVX512-NEXT:    vzeroupper
172; AVX512-NEXT:    retq
173  %1 = call nnan float @llvm.vector.reduce.fmin.v8f32(<8 x float> %a0)
174  ret float %1
175}
176
177define float @test_v16f32(<16 x float> %a0) {
178; SSE2-LABEL: test_v16f32:
179; SSE2:       # %bb.0:
180; SSE2-NEXT:    minps %xmm3, %xmm1
181; SSE2-NEXT:    minps %xmm2, %xmm0
182; SSE2-NEXT:    minps %xmm1, %xmm0
183; SSE2-NEXT:    movaps %xmm0, %xmm1
184; SSE2-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
185; SSE2-NEXT:    minps %xmm1, %xmm0
186; SSE2-NEXT:    movaps %xmm0, %xmm1
187; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
188; SSE2-NEXT:    minss %xmm1, %xmm0
189; SSE2-NEXT:    retq
190;
191; SSE41-LABEL: test_v16f32:
192; SSE41:       # %bb.0:
193; SSE41-NEXT:    minps %xmm3, %xmm1
194; SSE41-NEXT:    minps %xmm2, %xmm0
195; SSE41-NEXT:    minps %xmm1, %xmm0
196; SSE41-NEXT:    movaps %xmm0, %xmm1
197; SSE41-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
198; SSE41-NEXT:    minps %xmm1, %xmm0
199; SSE41-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
200; SSE41-NEXT:    minss %xmm1, %xmm0
201; SSE41-NEXT:    retq
202;
203; AVX-LABEL: test_v16f32:
204; AVX:       # %bb.0:
205; AVX-NEXT:    vminps %ymm1, %ymm0, %ymm0
206; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
207; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
208; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
209; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
210; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
211; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
212; AVX-NEXT:    vzeroupper
213; AVX-NEXT:    retq
214;
215; AVX512-LABEL: test_v16f32:
216; AVX512:       # %bb.0:
217; AVX512-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
218; AVX512-NEXT:    vminps %zmm1, %zmm0, %zmm0
219; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
220; AVX512-NEXT:    vminps %xmm1, %xmm0, %xmm0
221; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
222; AVX512-NEXT:    vminps %xmm1, %xmm0, %xmm0
223; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
224; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm0
225; AVX512-NEXT:    vzeroupper
226; AVX512-NEXT:    retq
227  %1 = call nnan float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a0)
228  ret float %1
229}
230
231;
232; vXf64
233;
234
235define double @test_v2f64(<2 x double> %a0) {
236; SSE-LABEL: test_v2f64:
237; SSE:       # %bb.0:
238; SSE-NEXT:    movapd %xmm0, %xmm1
239; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
240; SSE-NEXT:    minsd %xmm1, %xmm0
241; SSE-NEXT:    retq
242;
243; AVX-LABEL: test_v2f64:
244; AVX:       # %bb.0:
245; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
246; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
247; AVX-NEXT:    retq
248;
249; AVX512-LABEL: test_v2f64:
250; AVX512:       # %bb.0:
251; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
252; AVX512-NEXT:    vminsd %xmm1, %xmm0, %xmm0
253; AVX512-NEXT:    retq
254  %1 = call nnan double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a0)
255  ret double %1
256}
257
258define double @test_v4f64(<4 x double> %a0) {
259; SSE-LABEL: test_v4f64:
260; SSE:       # %bb.0:
261; SSE-NEXT:    minpd %xmm1, %xmm0
262; SSE-NEXT:    movapd %xmm0, %xmm1
263; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
264; SSE-NEXT:    minsd %xmm1, %xmm0
265; SSE-NEXT:    retq
266;
267; AVX-LABEL: test_v4f64:
268; AVX:       # %bb.0:
269; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
270; AVX-NEXT:    vminpd %xmm1, %xmm0, %xmm0
271; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
272; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
273; AVX-NEXT:    vzeroupper
274; AVX-NEXT:    retq
275;
276; AVX512-LABEL: test_v4f64:
277; AVX512:       # %bb.0:
278; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
279; AVX512-NEXT:    vminpd %xmm1, %xmm0, %xmm0
280; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
281; AVX512-NEXT:    vminsd %xmm1, %xmm0, %xmm0
282; AVX512-NEXT:    vzeroupper
283; AVX512-NEXT:    retq
284  %1 = call nnan double @llvm.vector.reduce.fmin.v4f64(<4 x double> %a0)
285  ret double %1
286}
287
288define double @test_v8f64(<8 x double> %a0) {
289; SSE-LABEL: test_v8f64:
290; SSE:       # %bb.0:
291; SSE-NEXT:    minpd %xmm3, %xmm1
292; SSE-NEXT:    minpd %xmm2, %xmm0
293; SSE-NEXT:    minpd %xmm1, %xmm0
294; SSE-NEXT:    movapd %xmm0, %xmm1
295; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
296; SSE-NEXT:    minsd %xmm1, %xmm0
297; SSE-NEXT:    retq
298;
299; AVX-LABEL: test_v8f64:
300; AVX:       # %bb.0:
301; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
302; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
303; AVX-NEXT:    vminpd %xmm1, %xmm0, %xmm0
304; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
305; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
306; AVX-NEXT:    vzeroupper
307; AVX-NEXT:    retq
308;
309; AVX512-LABEL: test_v8f64:
310; AVX512:       # %bb.0:
311; AVX512-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
312; AVX512-NEXT:    vminpd %zmm1, %zmm0, %zmm0
313; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
314; AVX512-NEXT:    vminpd %xmm1, %xmm0, %xmm0
315; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
316; AVX512-NEXT:    vminsd %xmm1, %xmm0, %xmm0
317; AVX512-NEXT:    vzeroupper
318; AVX512-NEXT:    retq
319  %1 = call nnan double @llvm.vector.reduce.fmin.v8f64(<8 x double> %a0)
320  ret double %1
321}
322
323define double @test_v16f64(<16 x double> %a0) {
324; SSE-LABEL: test_v16f64:
325; SSE:       # %bb.0:
326; SSE-NEXT:    minpd %xmm6, %xmm2
327; SSE-NEXT:    minpd %xmm4, %xmm0
328; SSE-NEXT:    minpd %xmm2, %xmm0
329; SSE-NEXT:    minpd %xmm7, %xmm3
330; SSE-NEXT:    minpd %xmm5, %xmm1
331; SSE-NEXT:    minpd %xmm3, %xmm1
332; SSE-NEXT:    minpd %xmm1, %xmm0
333; SSE-NEXT:    movapd %xmm0, %xmm1
334; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
335; SSE-NEXT:    minsd %xmm1, %xmm0
336; SSE-NEXT:    retq
337;
338; AVX-LABEL: test_v16f64:
339; AVX:       # %bb.0:
340; AVX-NEXT:    vminpd %ymm3, %ymm1, %ymm1
341; AVX-NEXT:    vminpd %ymm2, %ymm0, %ymm0
342; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
343; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
344; AVX-NEXT:    vminpd %xmm1, %xmm0, %xmm0
345; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
346; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
347; AVX-NEXT:    vzeroupper
348; AVX-NEXT:    retq
349;
350; AVX512-LABEL: test_v16f64:
351; AVX512:       # %bb.0:
352; AVX512-NEXT:    vminpd %zmm1, %zmm0, %zmm0
353; AVX512-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
354; AVX512-NEXT:    vminpd %zmm1, %zmm0, %zmm0
355; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
356; AVX512-NEXT:    vminpd %xmm1, %xmm0, %xmm0
357; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
358; AVX512-NEXT:    vminsd %xmm1, %xmm0, %xmm0
359; AVX512-NEXT:    vzeroupper
360; AVX512-NEXT:    retq
361  %1 = call nnan double @llvm.vector.reduce.fmin.v16f64(<16 x double> %a0)
362  ret double %1
363}
364
365define half @test_v2f16(<2 x half> %a0) nounwind {
366; SSE-LABEL: test_v2f16:
367; SSE:       # %bb.0:
368; SSE-NEXT:    pushq %rbp
369; SSE-NEXT:    pushq %r14
370; SSE-NEXT:    pushq %rbx
371; SSE-NEXT:    subq $16, %rsp
372; SSE-NEXT:    movl %esi, %ebx
373; SSE-NEXT:    movl %edi, %r14d
374; SSE-NEXT:    movzwl %bx, %ebp
375; SSE-NEXT:    movl %ebp, %edi
376; SSE-NEXT:    callq __gnu_h2f_ieee@PLT
377; SSE-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
378; SSE-NEXT:    movzwl %r14w, %edi
379; SSE-NEXT:    callq __gnu_h2f_ieee@PLT
380; SSE-NEXT:    ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
381; SSE-NEXT:    movw %bp, {{[0-9]+}}(%rsp)
382; SSE-NEXT:    cmovbl %r14d, %ebx
383; SSE-NEXT:    movw %bx, (%rsp)
384; SSE-NEXT:    movl (%rsp), %eax
385; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
386; SSE-NEXT:    addq $16, %rsp
387; SSE-NEXT:    popq %rbx
388; SSE-NEXT:    popq %r14
389; SSE-NEXT:    popq %rbp
390; SSE-NEXT:    retq
391;
392; AVX-LABEL: test_v2f16:
393; AVX:       # %bb.0:
394; AVX-NEXT:    pushq %rbp
395; AVX-NEXT:    pushq %r14
396; AVX-NEXT:    pushq %rbx
397; AVX-NEXT:    subq $16, %rsp
398; AVX-NEXT:    movl %esi, %ebx
399; AVX-NEXT:    movl %edi, %r14d
400; AVX-NEXT:    movzwl %bx, %ebp
401; AVX-NEXT:    movl %ebp, %edi
402; AVX-NEXT:    callq __gnu_h2f_ieee@PLT
403; AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
404; AVX-NEXT:    movzwl %r14w, %edi
405; AVX-NEXT:    callq __gnu_h2f_ieee@PLT
406; AVX-NEXT:    vucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
407; AVX-NEXT:    movw %bp, {{[0-9]+}}(%rsp)
408; AVX-NEXT:    cmovbl %r14d, %ebx
409; AVX-NEXT:    movw %bx, (%rsp)
410; AVX-NEXT:    movl (%rsp), %eax
411; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
412; AVX-NEXT:    addq $16, %rsp
413; AVX-NEXT:    popq %rbx
414; AVX-NEXT:    popq %r14
415; AVX-NEXT:    popq %rbp
416; AVX-NEXT:    retq
417;
418; AVX512-LABEL: test_v2f16:
419; AVX512:       # %bb.0:
420; AVX512-NEXT:    movzwl %si, %eax
421; AVX512-NEXT:    vmovd %eax, %xmm0
422; AVX512-NEXT:    vcvtph2ps %xmm0, %xmm0
423; AVX512-NEXT:    movzwl %di, %ecx
424; AVX512-NEXT:    vmovd %ecx, %xmm1
425; AVX512-NEXT:    vcvtph2ps %xmm1, %xmm1
426; AVX512-NEXT:    vucomiss %xmm0, %xmm1
427; AVX512-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
428; AVX512-NEXT:    cmovbl %edi, %esi
429; AVX512-NEXT:    movw %si, -{{[0-9]+}}(%rsp)
430; AVX512-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
431; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
432; AVX512-NEXT:    retq
433  %1 = call nnan half @llvm.vector.reduce.fmin.v2f16(<2 x half> %a0)
434  ret half %1
435}
436
437declare float @llvm.vector.reduce.fmin.v1f32(<1 x float>)
438declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
439declare float @llvm.vector.reduce.fmin.v3f32(<3 x float>)
440declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
441declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
442declare float @llvm.vector.reduce.fmin.v16f32(<16 x float>)
443
444declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
445declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
446declare double @llvm.vector.reduce.fmin.v8f64(<8 x double>)
447declare double @llvm.vector.reduce.fmin.v16f64(<16 x double>)
448
449declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>)
450