1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer -dce < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer -dce < %s | FileCheck -check-prefixes=GCN,VI %s
4
5define half @reduction_half4(<4 x half> %a) {
6; GFX9-LABEL: @reduction_half4(
7; GFX9-NEXT:  entry:
8; GFX9-NEXT:    [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH8000, <4 x half> [[A:%.*]])
9; GFX9-NEXT:    ret half [[TMP0]]
10;
11; VI-LABEL: @reduction_half4(
12; VI-NEXT:  entry:
13; VI-NEXT:    [[ELT0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
14; VI-NEXT:    [[ELT1:%.*]] = extractelement <4 x half> [[A]], i64 1
15; VI-NEXT:    [[ELT2:%.*]] = extractelement <4 x half> [[A]], i64 2
16; VI-NEXT:    [[ELT3:%.*]] = extractelement <4 x half> [[A]], i64 3
17; VI-NEXT:    [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]]
18; VI-NEXT:    [[ADD2:%.*]] = fadd fast half [[ELT2]], [[ADD1]]
19; VI-NEXT:    [[ADD3:%.*]] = fadd fast half [[ELT3]], [[ADD2]]
20; VI-NEXT:    ret half [[ADD3]]
21;
22entry:
23  %elt0 = extractelement <4 x half> %a, i64 0
24  %elt1 = extractelement <4 x half> %a, i64 1
25  %elt2 = extractelement <4 x half> %a, i64 2
26  %elt3 = extractelement <4 x half> %a, i64 3
27
28  %add1 = fadd fast half %elt1, %elt0
29  %add2 = fadd fast half %elt2, %add1
30  %add3 = fadd fast half %elt3, %add2
31
32  ret half %add3
33}
34
35define half @reduction_half8(<8 x half> %vec8) {
36; GFX9-LABEL: @reduction_half8(
37; GFX9-NEXT:  entry:
38; GFX9-NEXT:    [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[VEC8:%.*]])
39; GFX9-NEXT:    ret half [[TMP0]]
40;
41; VI-LABEL: @reduction_half8(
42; VI-NEXT:  entry:
43; VI-NEXT:    [[ELT0:%.*]] = extractelement <8 x half> [[VEC8:%.*]], i64 0
44; VI-NEXT:    [[ELT1:%.*]] = extractelement <8 x half> [[VEC8]], i64 1
45; VI-NEXT:    [[ELT2:%.*]] = extractelement <8 x half> [[VEC8]], i64 2
46; VI-NEXT:    [[ELT3:%.*]] = extractelement <8 x half> [[VEC8]], i64 3
47; VI-NEXT:    [[ELT4:%.*]] = extractelement <8 x half> [[VEC8]], i64 4
48; VI-NEXT:    [[ELT5:%.*]] = extractelement <8 x half> [[VEC8]], i64 5
49; VI-NEXT:    [[ELT6:%.*]] = extractelement <8 x half> [[VEC8]], i64 6
50; VI-NEXT:    [[ELT7:%.*]] = extractelement <8 x half> [[VEC8]], i64 7
51; VI-NEXT:    [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]]
52; VI-NEXT:    [[ADD2:%.*]] = fadd fast half [[ELT2]], [[ADD1]]
53; VI-NEXT:    [[ADD3:%.*]] = fadd fast half [[ELT3]], [[ADD2]]
54; VI-NEXT:    [[ADD4:%.*]] = fadd fast half [[ELT4]], [[ADD3]]
55; VI-NEXT:    [[ADD5:%.*]] = fadd fast half [[ELT5]], [[ADD4]]
56; VI-NEXT:    [[ADD6:%.*]] = fadd fast half [[ELT6]], [[ADD5]]
57; VI-NEXT:    [[ADD7:%.*]] = fadd fast half [[ELT7]], [[ADD6]]
58; VI-NEXT:    ret half [[ADD7]]
59;
60entry:
61  %elt0 = extractelement <8 x half> %vec8, i64 0
62  %elt1 = extractelement <8 x half> %vec8, i64 1
63  %elt2 = extractelement <8 x half> %vec8, i64 2
64  %elt3 = extractelement <8 x half> %vec8, i64 3
65  %elt4 = extractelement <8 x half> %vec8, i64 4
66  %elt5 = extractelement <8 x half> %vec8, i64 5
67  %elt6 = extractelement <8 x half> %vec8, i64 6
68  %elt7 = extractelement <8 x half> %vec8, i64 7
69
70  %add1 = fadd fast half %elt1, %elt0
71  %add2 = fadd fast half %elt2, %add1
72  %add3 = fadd fast half %elt3, %add2
73  %add4 = fadd fast half %elt4, %add3
74  %add5 = fadd fast half %elt5, %add4
75  %add6 = fadd fast half %elt6, %add5
76  %add7 = fadd fast half %elt7, %add6
77
78  ret half %add7
79}
80
81define half @reduction_half16(<16 x half> %vec16) {
82; GFX9-LABEL: @reduction_half16(
83; GFX9-NEXT:  entry:
84; GFX9-NEXT:    [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> [[VEC16:%.*]])
85; GFX9-NEXT:    ret half [[TMP0]]
86;
87; VI-LABEL: @reduction_half16(
88; VI-NEXT:  entry:
89; VI-NEXT:    [[ELT0:%.*]] = extractelement <16 x half> [[VEC16:%.*]], i64 0
90; VI-NEXT:    [[ELT1:%.*]] = extractelement <16 x half> [[VEC16]], i64 1
91; VI-NEXT:    [[ELT2:%.*]] = extractelement <16 x half> [[VEC16]], i64 2
92; VI-NEXT:    [[ELT3:%.*]] = extractelement <16 x half> [[VEC16]], i64 3
93; VI-NEXT:    [[ELT4:%.*]] = extractelement <16 x half> [[VEC16]], i64 4
94; VI-NEXT:    [[ELT5:%.*]] = extractelement <16 x half> [[VEC16]], i64 5
95; VI-NEXT:    [[ELT6:%.*]] = extractelement <16 x half> [[VEC16]], i64 6
96; VI-NEXT:    [[ELT7:%.*]] = extractelement <16 x half> [[VEC16]], i64 7
97; VI-NEXT:    [[ELT8:%.*]] = extractelement <16 x half> [[VEC16]], i64 8
98; VI-NEXT:    [[ELT9:%.*]] = extractelement <16 x half> [[VEC16]], i64 9
99; VI-NEXT:    [[ELT10:%.*]] = extractelement <16 x half> [[VEC16]], i64 10
100; VI-NEXT:    [[ELT11:%.*]] = extractelement <16 x half> [[VEC16]], i64 11
101; VI-NEXT:    [[ELT12:%.*]] = extractelement <16 x half> [[VEC16]], i64 12
102; VI-NEXT:    [[ELT13:%.*]] = extractelement <16 x half> [[VEC16]], i64 13
103; VI-NEXT:    [[ELT14:%.*]] = extractelement <16 x half> [[VEC16]], i64 14
104; VI-NEXT:    [[ELT15:%.*]] = extractelement <16 x half> [[VEC16]], i64 15
105; VI-NEXT:    [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]]
106; VI-NEXT:    [[ADD2:%.*]] = fadd fast half [[ELT2]], [[ADD1]]
107; VI-NEXT:    [[ADD3:%.*]] = fadd fast half [[ELT3]], [[ADD2]]
108; VI-NEXT:    [[ADD4:%.*]] = fadd fast half [[ELT4]], [[ADD3]]
109; VI-NEXT:    [[ADD5:%.*]] = fadd fast half [[ELT5]], [[ADD4]]
110; VI-NEXT:    [[ADD6:%.*]] = fadd fast half [[ELT6]], [[ADD5]]
111; VI-NEXT:    [[ADD7:%.*]] = fadd fast half [[ELT7]], [[ADD6]]
112; VI-NEXT:    [[ADD8:%.*]] = fadd fast half [[ELT8]], [[ADD7]]
113; VI-NEXT:    [[ADD9:%.*]] = fadd fast half [[ELT9]], [[ADD8]]
114; VI-NEXT:    [[ADD10:%.*]] = fadd fast half [[ELT10]], [[ADD9]]
115; VI-NEXT:    [[ADD11:%.*]] = fadd fast half [[ELT11]], [[ADD10]]
116; VI-NEXT:    [[ADD12:%.*]] = fadd fast half [[ELT12]], [[ADD11]]
117; VI-NEXT:    [[ADD13:%.*]] = fadd fast half [[ELT13]], [[ADD12]]
118; VI-NEXT:    [[ADD14:%.*]] = fadd fast half [[ELT14]], [[ADD13]]
119; VI-NEXT:    [[ADD15:%.*]] = fadd fast half [[ELT15]], [[ADD14]]
120; VI-NEXT:    ret half [[ADD15]]
121;
122entry:
123  %elt0 = extractelement <16 x half> %vec16, i64 0
124  %elt1 = extractelement <16 x half> %vec16, i64 1
125  %elt2 = extractelement <16 x half> %vec16, i64 2
126  %elt3 = extractelement <16 x half> %vec16, i64 3
127  %elt4 = extractelement <16 x half> %vec16, i64 4
128  %elt5 = extractelement <16 x half> %vec16, i64 5
129  %elt6 = extractelement <16 x half> %vec16, i64 6
130  %elt7 = extractelement <16 x half> %vec16, i64 7
131  %elt8 = extractelement <16 x half> %vec16, i64 8
132  %elt9 = extractelement <16 x half> %vec16, i64 9
133  %elt10 = extractelement <16 x half> %vec16, i64 10
134  %elt11 = extractelement <16 x half> %vec16, i64 11
135  %elt12 = extractelement <16 x half> %vec16, i64 12
136  %elt13 = extractelement <16 x half> %vec16, i64 13
137  %elt14 = extractelement <16 x half> %vec16, i64 14
138  %elt15 = extractelement <16 x half> %vec16, i64 15
139
140  %add1 = fadd fast half %elt1, %elt0
141  %add2 = fadd fast half %elt2, %add1
142  %add3 = fadd fast half %elt3, %add2
143  %add4 = fadd fast half %elt4, %add3
144  %add5 = fadd fast half %elt5, %add4
145  %add6 = fadd fast half %elt6, %add5
146  %add7 = fadd fast half %elt7, %add6
147  %add8 = fadd fast half %elt8, %add7
148  %add9 = fadd fast half %elt9, %add8
149  %add10 = fadd fast half %elt10, %add9
150  %add11 = fadd fast half %elt11, %add10
151  %add12 = fadd fast half %elt12, %add11
152  %add13 = fadd fast half %elt13, %add12
153  %add14 = fadd fast half %elt14, %add13
154  %add15 = fadd fast half %elt15, %add14
155
156  ret half %add15
157}
158
159; FIXME: support vectorization;
160define half @reduction_sub_half4(<4 x half> %a) {
161; GCN-LABEL: @reduction_sub_half4(
162; GCN-NEXT:  entry:
163; GCN-NEXT:    [[ELT0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
164; GCN-NEXT:    [[ELT1:%.*]] = extractelement <4 x half> [[A]], i64 1
165; GCN-NEXT:    [[ELT2:%.*]] = extractelement <4 x half> [[A]], i64 2
166; GCN-NEXT:    [[ELT3:%.*]] = extractelement <4 x half> [[A]], i64 3
167; GCN-NEXT:    [[ADD1:%.*]] = fsub fast half [[ELT1]], [[ELT0]]
168; GCN-NEXT:    [[ADD2:%.*]] = fsub fast half [[ELT2]], [[ADD1]]
169; GCN-NEXT:    [[ADD3:%.*]] = fsub fast half [[ELT3]], [[ADD2]]
170; GCN-NEXT:    ret half [[ADD3]]
171;
172entry:
173  %elt0 = extractelement <4 x half> %a, i64 0
174  %elt1 = extractelement <4 x half> %a, i64 1
175  %elt2 = extractelement <4 x half> %a, i64 2
176  %elt3 = extractelement <4 x half> %a, i64 3
177
178  %add1 = fsub fast half %elt1, %elt0
179  %add2 = fsub fast half %elt2, %add1
180  %add3 = fsub fast half %elt3, %add2
181
182  ret half %add3
183}
184
185define i16 @reduction_v4i16(<4 x i16> %a) {
186; GFX9-LABEL: @reduction_v4i16(
187; GFX9-NEXT:  entry:
188; GFX9-NEXT:    [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[A:%.*]])
189; GFX9-NEXT:    ret i16 [[TMP0]]
190;
191; VI-LABEL: @reduction_v4i16(
192; VI-NEXT:  entry:
193; VI-NEXT:    [[ELT0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0
194; VI-NEXT:    [[ELT1:%.*]] = extractelement <4 x i16> [[A]], i64 1
195; VI-NEXT:    [[ELT2:%.*]] = extractelement <4 x i16> [[A]], i64 2
196; VI-NEXT:    [[ELT3:%.*]] = extractelement <4 x i16> [[A]], i64 3
197; VI-NEXT:    [[ADD1:%.*]] = add i16 [[ELT1]], [[ELT0]]
198; VI-NEXT:    [[ADD2:%.*]] = add i16 [[ELT2]], [[ADD1]]
199; VI-NEXT:    [[ADD3:%.*]] = add i16 [[ELT3]], [[ADD2]]
200; VI-NEXT:    ret i16 [[ADD3]]
201;
202entry:
203  %elt0 = extractelement <4 x i16> %a, i64 0
204  %elt1 = extractelement <4 x i16> %a, i64 1
205  %elt2 = extractelement <4 x i16> %a, i64 2
206  %elt3 = extractelement <4 x i16> %a, i64 3
207
208  %add1 = add i16 %elt1, %elt0
209  %add2 = add i16 %elt2, %add1
210  %add3 = add i16 %elt3, %add2
211
212  ret i16 %add3
213}
214
215define i16 @reduction_v8i16(<8 x i16> %vec8) {
216; GFX9-LABEL: @reduction_v8i16(
217; GFX9-NEXT:  entry:
218; GFX9-NEXT:    [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[VEC8:%.*]])
219; GFX9-NEXT:    ret i16 [[TMP0]]
220;
221; VI-LABEL: @reduction_v8i16(
222; VI-NEXT:  entry:
223; VI-NEXT:    [[ELT0:%.*]] = extractelement <8 x i16> [[VEC8:%.*]], i64 0
224; VI-NEXT:    [[ELT1:%.*]] = extractelement <8 x i16> [[VEC8]], i64 1
225; VI-NEXT:    [[ELT2:%.*]] = extractelement <8 x i16> [[VEC8]], i64 2
226; VI-NEXT:    [[ELT3:%.*]] = extractelement <8 x i16> [[VEC8]], i64 3
227; VI-NEXT:    [[ELT4:%.*]] = extractelement <8 x i16> [[VEC8]], i64 4
228; VI-NEXT:    [[ELT5:%.*]] = extractelement <8 x i16> [[VEC8]], i64 5
229; VI-NEXT:    [[ELT6:%.*]] = extractelement <8 x i16> [[VEC8]], i64 6
230; VI-NEXT:    [[ELT7:%.*]] = extractelement <8 x i16> [[VEC8]], i64 7
231; VI-NEXT:    [[ADD1:%.*]] = add i16 [[ELT1]], [[ELT0]]
232; VI-NEXT:    [[ADD2:%.*]] = add i16 [[ELT2]], [[ADD1]]
233; VI-NEXT:    [[ADD3:%.*]] = add i16 [[ELT3]], [[ADD2]]
234; VI-NEXT:    [[ADD4:%.*]] = add i16 [[ELT4]], [[ADD3]]
235; VI-NEXT:    [[ADD5:%.*]] = add i16 [[ELT5]], [[ADD4]]
236; VI-NEXT:    [[ADD6:%.*]] = add i16 [[ELT6]], [[ADD5]]
237; VI-NEXT:    [[ADD7:%.*]] = add i16 [[ELT7]], [[ADD6]]
238; VI-NEXT:    ret i16 [[ADD7]]
239;
240entry:
241  %elt0 = extractelement <8 x i16> %vec8, i64 0
242  %elt1 = extractelement <8 x i16> %vec8, i64 1
243  %elt2 = extractelement <8 x i16> %vec8, i64 2
244  %elt3 = extractelement <8 x i16> %vec8, i64 3
245  %elt4 = extractelement <8 x i16> %vec8, i64 4
246  %elt5 = extractelement <8 x i16> %vec8, i64 5
247  %elt6 = extractelement <8 x i16> %vec8, i64 6
248  %elt7 = extractelement <8 x i16> %vec8, i64 7
249
250  %add1 = add i16 %elt1, %elt0
251  %add2 = add i16 %elt2, %add1
252  %add3 = add i16 %elt3, %add2
253  %add4 = add i16 %elt4, %add3
254  %add5 = add i16 %elt5, %add4
255  %add6 = add i16 %elt6, %add5
256  %add7 = add i16 %elt7, %add6
257
258  ret i16 %add7
259}
260
261define i16 @reduction_umin_v4i16(<4 x i16> %vec4) {
262; GFX9-LABEL: @reduction_umin_v4i16(
263; GFX9-NEXT:  entry:
264; GFX9-NEXT:    [[TMP0:%.*]] = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> [[VEC4:%.*]])
265; GFX9-NEXT:    ret i16 [[TMP0]]
266;
267; VI-LABEL: @reduction_umin_v4i16(
268; VI-NEXT:  entry:
269; VI-NEXT:    [[ELT0:%.*]] = extractelement <4 x i16> [[VEC4:%.*]], i64 0
270; VI-NEXT:    [[ELT1:%.*]] = extractelement <4 x i16> [[VEC4]], i64 1
271; VI-NEXT:    [[ELT2:%.*]] = extractelement <4 x i16> [[VEC4]], i64 2
272; VI-NEXT:    [[ELT3:%.*]] = extractelement <4 x i16> [[VEC4]], i64 3
273; VI-NEXT:    [[CMP1:%.*]] = icmp ult i16 [[ELT1]], [[ELT0]]
274; VI-NEXT:    [[MIN1:%.*]] = select i1 [[CMP1]], i16 [[ELT1]], i16 [[ELT0]]
275; VI-NEXT:    [[CMP2:%.*]] = icmp ult i16 [[ELT2]], [[MIN1]]
276; VI-NEXT:    [[MIN2:%.*]] = select i1 [[CMP2]], i16 [[ELT2]], i16 [[MIN1]]
277; VI-NEXT:    [[CMP3:%.*]] = icmp ult i16 [[ELT3]], [[MIN2]]
278; VI-NEXT:    [[MIN3:%.*]] = select i1 [[CMP3]], i16 [[ELT3]], i16 [[MIN2]]
279; VI-NEXT:    ret i16 [[MIN3]]
280;
281entry:
282  %elt0 = extractelement <4 x i16> %vec4, i64 0
283  %elt1 = extractelement <4 x i16> %vec4, i64 1
284  %elt2 = extractelement <4 x i16> %vec4, i64 2
285  %elt3 = extractelement <4 x i16> %vec4, i64 3
286
287  %cmp1 = icmp ult i16 %elt1, %elt0
288  %min1 = select i1 %cmp1, i16 %elt1, i16 %elt0
289  %cmp2 = icmp ult i16 %elt2, %min1
290  %min2 = select i1 %cmp2, i16 %elt2, i16 %min1
291  %cmp3 = icmp ult i16 %elt3, %min2
292  %min3 = select i1 %cmp3, i16 %elt3, i16 %min2
293
294  ret i16 %min3
295}
296
297define i16 @reduction_icmp_v8i16(<8 x i16> %vec8) {
298; GFX9-LABEL: @reduction_icmp_v8i16(
299; GFX9-NEXT:  entry:
300; GFX9-NEXT:    [[TMP0:%.*]] = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> [[VEC8:%.*]])
301; GFX9-NEXT:    ret i16 [[TMP0]]
302;
303; VI-LABEL: @reduction_icmp_v8i16(
304; VI-NEXT:  entry:
305; VI-NEXT:    [[ELT0:%.*]] = extractelement <8 x i16> [[VEC8:%.*]], i64 0
306; VI-NEXT:    [[ELT1:%.*]] = extractelement <8 x i16> [[VEC8]], i64 1
307; VI-NEXT:    [[ELT2:%.*]] = extractelement <8 x i16> [[VEC8]], i64 2
308; VI-NEXT:    [[ELT3:%.*]] = extractelement <8 x i16> [[VEC8]], i64 3
309; VI-NEXT:    [[ELT4:%.*]] = extractelement <8 x i16> [[VEC8]], i64 4
310; VI-NEXT:    [[ELT5:%.*]] = extractelement <8 x i16> [[VEC8]], i64 5
311; VI-NEXT:    [[ELT6:%.*]] = extractelement <8 x i16> [[VEC8]], i64 6
312; VI-NEXT:    [[ELT7:%.*]] = extractelement <8 x i16> [[VEC8]], i64 7
313; VI-NEXT:    [[CMP0:%.*]] = icmp ult i16 [[ELT1]], [[ELT0]]
314; VI-NEXT:    [[MIN1:%.*]] = select i1 [[CMP0]], i16 [[ELT1]], i16 [[ELT0]]
315; VI-NEXT:    [[CMP1:%.*]] = icmp ult i16 [[ELT2]], [[MIN1]]
316; VI-NEXT:    [[MIN2:%.*]] = select i1 [[CMP1]], i16 [[ELT2]], i16 [[MIN1]]
317; VI-NEXT:    [[CMP2:%.*]] = icmp ult i16 [[ELT3]], [[MIN2]]
318; VI-NEXT:    [[MIN3:%.*]] = select i1 [[CMP2]], i16 [[ELT3]], i16 [[MIN2]]
319; VI-NEXT:    [[CMP3:%.*]] = icmp ult i16 [[ELT4]], [[MIN3]]
320; VI-NEXT:    [[MIN4:%.*]] = select i1 [[CMP3]], i16 [[ELT4]], i16 [[MIN3]]
321; VI-NEXT:    [[CMP4:%.*]] = icmp ult i16 [[ELT5]], [[MIN4]]
322; VI-NEXT:    [[MIN5:%.*]] = select i1 [[CMP4]], i16 [[ELT5]], i16 [[MIN4]]
323; VI-NEXT:    [[CMP5:%.*]] = icmp ult i16 [[ELT6]], [[MIN5]]
324; VI-NEXT:    [[MIN6:%.*]] = select i1 [[CMP5]], i16 [[ELT6]], i16 [[MIN5]]
325; VI-NEXT:    [[CMP6:%.*]] = icmp ult i16 [[ELT7]], [[MIN6]]
326; VI-NEXT:    [[MIN7:%.*]] = select i1 [[CMP6]], i16 [[ELT7]], i16 [[MIN6]]
327; VI-NEXT:    ret i16 [[MIN7]]
328;
329entry:
330  %elt0 = extractelement <8 x i16> %vec8, i64 0
331  %elt1 = extractelement <8 x i16> %vec8, i64 1
332  %elt2 = extractelement <8 x i16> %vec8, i64 2
333  %elt3 = extractelement <8 x i16> %vec8, i64 3
334  %elt4 = extractelement <8 x i16> %vec8, i64 4
335  %elt5 = extractelement <8 x i16> %vec8, i64 5
336  %elt6 = extractelement <8 x i16> %vec8, i64 6
337  %elt7 = extractelement <8 x i16> %vec8, i64 7
338
339  %cmp0 = icmp ult i16 %elt1, %elt0
340  %min1 = select i1 %cmp0, i16 %elt1, i16 %elt0
341  %cmp1 = icmp ult i16 %elt2, %min1
342  %min2 = select i1 %cmp1, i16 %elt2, i16 %min1
343  %cmp2 = icmp ult i16 %elt3, %min2
344  %min3 = select i1 %cmp2, i16 %elt3, i16 %min2
345
346  %cmp3 = icmp ult i16 %elt4, %min3
347  %min4 = select i1 %cmp3, i16 %elt4, i16 %min3
348  %cmp4 = icmp ult i16 %elt5, %min4
349  %min5 = select i1 %cmp4, i16 %elt5, i16 %min4
350
351  %cmp5 = icmp ult i16 %elt6, %min5
352  %min6 = select i1 %cmp5, i16 %elt6, i16 %min5
353  %cmp6 = icmp ult i16 %elt7, %min6
354  %min7 = select i1 %cmp6, i16 %elt7, i16 %min6
355
356  ret i16 %min7
357}
358
359define i16 @reduction_smin_v16i16(<16 x i16> %vec16) {
360; GFX9-LABEL: @reduction_smin_v16i16(
361; GFX9-NEXT:  entry:
362; GFX9-NEXT:    [[TMP0:%.*]] = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> [[VEC16:%.*]])
363; GFX9-NEXT:    ret i16 [[TMP0]]
364;
365; VI-LABEL: @reduction_smin_v16i16(
366; VI-NEXT:  entry:
367; VI-NEXT:    [[ELT0:%.*]] = extractelement <16 x i16> [[VEC16:%.*]], i64 0
368; VI-NEXT:    [[ELT1:%.*]] = extractelement <16 x i16> [[VEC16]], i64 1
369; VI-NEXT:    [[ELT2:%.*]] = extractelement <16 x i16> [[VEC16]], i64 2
370; VI-NEXT:    [[ELT3:%.*]] = extractelement <16 x i16> [[VEC16]], i64 3
371; VI-NEXT:    [[ELT4:%.*]] = extractelement <16 x i16> [[VEC16]], i64 4
372; VI-NEXT:    [[ELT5:%.*]] = extractelement <16 x i16> [[VEC16]], i64 5
373; VI-NEXT:    [[ELT6:%.*]] = extractelement <16 x i16> [[VEC16]], i64 6
374; VI-NEXT:    [[ELT7:%.*]] = extractelement <16 x i16> [[VEC16]], i64 7
375; VI-NEXT:    [[ELT8:%.*]] = extractelement <16 x i16> [[VEC16]], i64 8
376; VI-NEXT:    [[ELT9:%.*]] = extractelement <16 x i16> [[VEC16]], i64 9
377; VI-NEXT:    [[ELT10:%.*]] = extractelement <16 x i16> [[VEC16]], i64 10
378; VI-NEXT:    [[ELT11:%.*]] = extractelement <16 x i16> [[VEC16]], i64 11
379; VI-NEXT:    [[ELT12:%.*]] = extractelement <16 x i16> [[VEC16]], i64 12
380; VI-NEXT:    [[ELT13:%.*]] = extractelement <16 x i16> [[VEC16]], i64 13
381; VI-NEXT:    [[ELT14:%.*]] = extractelement <16 x i16> [[VEC16]], i64 14
382; VI-NEXT:    [[ELT15:%.*]] = extractelement <16 x i16> [[VEC16]], i64 15
383; VI-NEXT:    [[CMP0:%.*]] = icmp slt i16 [[ELT1]], [[ELT0]]
384; VI-NEXT:    [[MIN1:%.*]] = select i1 [[CMP0]], i16 [[ELT1]], i16 [[ELT0]]
385; VI-NEXT:    [[CMP1:%.*]] = icmp slt i16 [[ELT2]], [[MIN1]]
386; VI-NEXT:    [[MIN2:%.*]] = select i1 [[CMP1]], i16 [[ELT2]], i16 [[MIN1]]
387; VI-NEXT:    [[CMP2:%.*]] = icmp slt i16 [[ELT3]], [[MIN2]]
388; VI-NEXT:    [[MIN3:%.*]] = select i1 [[CMP2]], i16 [[ELT3]], i16 [[MIN2]]
389; VI-NEXT:    [[CMP3:%.*]] = icmp slt i16 [[ELT4]], [[MIN3]]
390; VI-NEXT:    [[MIN4:%.*]] = select i1 [[CMP3]], i16 [[ELT4]], i16 [[MIN3]]
391; VI-NEXT:    [[CMP4:%.*]] = icmp slt i16 [[ELT5]], [[MIN4]]
392; VI-NEXT:    [[MIN5:%.*]] = select i1 [[CMP4]], i16 [[ELT5]], i16 [[MIN4]]
393; VI-NEXT:    [[CMP5:%.*]] = icmp slt i16 [[ELT6]], [[MIN5]]
394; VI-NEXT:    [[MIN6:%.*]] = select i1 [[CMP5]], i16 [[ELT6]], i16 [[MIN5]]
395; VI-NEXT:    [[CMP6:%.*]] = icmp slt i16 [[ELT7]], [[MIN6]]
396; VI-NEXT:    [[MIN7:%.*]] = select i1 [[CMP6]], i16 [[ELT7]], i16 [[MIN6]]
397; VI-NEXT:    [[CMP7:%.*]] = icmp slt i16 [[ELT8]], [[MIN7]]
398; VI-NEXT:    [[MIN8:%.*]] = select i1 [[CMP7]], i16 [[ELT8]], i16 [[MIN7]]
399; VI-NEXT:    [[CMP8:%.*]] = icmp slt i16 [[ELT9]], [[MIN8]]
400; VI-NEXT:    [[MIN9:%.*]] = select i1 [[CMP8]], i16 [[ELT9]], i16 [[MIN8]]
401; VI-NEXT:    [[CMP9:%.*]] = icmp slt i16 [[ELT10]], [[MIN9]]
402; VI-NEXT:    [[MIN10:%.*]] = select i1 [[CMP9]], i16 [[ELT10]], i16 [[MIN9]]
403; VI-NEXT:    [[CMP10:%.*]] = icmp slt i16 [[ELT11]], [[MIN10]]
404; VI-NEXT:    [[MIN11:%.*]] = select i1 [[CMP10]], i16 [[ELT11]], i16 [[MIN10]]
405; VI-NEXT:    [[CMP11:%.*]] = icmp slt i16 [[ELT12]], [[MIN11]]
406; VI-NEXT:    [[MIN12:%.*]] = select i1 [[CMP11]], i16 [[ELT12]], i16 [[MIN11]]
407; VI-NEXT:    [[CMP12:%.*]] = icmp slt i16 [[ELT13]], [[MIN12]]
408; VI-NEXT:    [[MIN13:%.*]] = select i1 [[CMP12]], i16 [[ELT13]], i16 [[MIN12]]
409; VI-NEXT:    [[CMP13:%.*]] = icmp slt i16 [[ELT14]], [[MIN13]]
410; VI-NEXT:    [[MIN14:%.*]] = select i1 [[CMP13]], i16 [[ELT14]], i16 [[MIN13]]
411; VI-NEXT:    [[CMP14:%.*]] = icmp slt i16 [[ELT15]], [[MIN14]]
412; VI-NEXT:    [[MIN15:%.*]] = select i1 [[CMP14]], i16 [[ELT15]], i16 [[MIN14]]
413; VI-NEXT:    ret i16 [[MIN15]]
414;
415entry:
416  %elt0 = extractelement <16 x i16> %vec16, i64 0
417  %elt1 = extractelement <16 x i16> %vec16, i64 1
418  %elt2 = extractelement <16 x i16> %vec16, i64 2
419  %elt3 = extractelement <16 x i16> %vec16, i64 3
420  %elt4 = extractelement <16 x i16> %vec16, i64 4
421  %elt5 = extractelement <16 x i16> %vec16, i64 5
422  %elt6 = extractelement <16 x i16> %vec16, i64 6
423  %elt7 = extractelement <16 x i16> %vec16, i64 7
424
425  %elt8 = extractelement <16 x i16> %vec16, i64 8
426  %elt9 = extractelement <16 x i16> %vec16, i64 9
427  %elt10 = extractelement <16 x i16> %vec16, i64 10
428  %elt11 = extractelement <16 x i16> %vec16, i64 11
429  %elt12 = extractelement <16 x i16> %vec16, i64 12
430  %elt13 = extractelement <16 x i16> %vec16, i64 13
431  %elt14 = extractelement <16 x i16> %vec16, i64 14
432  %elt15 = extractelement <16 x i16> %vec16, i64 15
433
434  %cmp0 = icmp slt i16 %elt1, %elt0
435  %min1 = select i1 %cmp0, i16 %elt1, i16 %elt0
436  %cmp1 = icmp slt i16 %elt2, %min1
437  %min2 = select i1 %cmp1, i16 %elt2, i16 %min1
438  %cmp2 = icmp slt i16 %elt3, %min2
439  %min3 = select i1 %cmp2, i16 %elt3, i16 %min2
440
441  %cmp3 = icmp slt i16 %elt4, %min3
442  %min4 = select i1 %cmp3, i16 %elt4, i16 %min3
443  %cmp4 = icmp slt i16 %elt5, %min4
444  %min5 = select i1 %cmp4, i16 %elt5, i16 %min4
445
446  %cmp5 = icmp slt i16 %elt6, %min5
447  %min6 = select i1 %cmp5, i16 %elt6, i16 %min5
448  %cmp6 = icmp slt i16 %elt7, %min6
449  %min7 = select i1 %cmp6, i16 %elt7, i16 %min6
450
451  %cmp7 = icmp slt i16 %elt8, %min7
452  %min8 = select i1 %cmp7, i16 %elt8, i16 %min7
453  %cmp8 = icmp slt i16 %elt9, %min8
454  %min9 = select i1 %cmp8, i16 %elt9, i16 %min8
455
456  %cmp9 = icmp slt i16 %elt10, %min9
457  %min10 = select i1 %cmp9, i16 %elt10, i16 %min9
458  %cmp10 = icmp slt i16 %elt11, %min10
459  %min11 = select i1 %cmp10, i16 %elt11, i16 %min10
460
461  %cmp11 = icmp slt i16 %elt12, %min11
462  %min12 = select i1 %cmp11, i16 %elt12, i16 %min11
463  %cmp12 = icmp slt i16 %elt13, %min12
464  %min13 = select i1 %cmp12, i16 %elt13, i16 %min12
465
466  %cmp13 = icmp slt i16 %elt14, %min13
467  %min14 = select i1 %cmp13, i16 %elt14, i16 %min13
468  %cmp14 = icmp slt i16 %elt15, %min14
469  %min15 = select i1 %cmp14, i16 %elt15, i16 %min14
470
471
472  ret i16 %min15
473}
474
475define i16 @reduction_umax_v4i16(<4 x i16> %vec4) {
476; GFX9-LABEL: @reduction_umax_v4i16(
477; GFX9-NEXT:  entry:
478; GFX9-NEXT:    [[TMP0:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[VEC4:%.*]])
479; GFX9-NEXT:    ret i16 [[TMP0]]
480;
481; VI-LABEL: @reduction_umax_v4i16(
482; VI-NEXT:  entry:
483; VI-NEXT:    [[ELT0:%.*]] = extractelement <4 x i16> [[VEC4:%.*]], i64 0
484; VI-NEXT:    [[ELT1:%.*]] = extractelement <4 x i16> [[VEC4]], i64 1
485; VI-NEXT:    [[ELT2:%.*]] = extractelement <4 x i16> [[VEC4]], i64 2
486; VI-NEXT:    [[ELT3:%.*]] = extractelement <4 x i16> [[VEC4]], i64 3
487; VI-NEXT:    [[CMP1:%.*]] = icmp ugt i16 [[ELT1]], [[ELT0]]
488; VI-NEXT:    [[MAX1:%.*]] = select i1 [[CMP1]], i16 [[ELT1]], i16 [[ELT0]]
489; VI-NEXT:    [[CMP2:%.*]] = icmp ugt i16 [[ELT2]], [[MAX1]]
490; VI-NEXT:    [[MAX2:%.*]] = select i1 [[CMP2]], i16 [[ELT2]], i16 [[MAX1]]
491; VI-NEXT:    [[CMP3:%.*]] = icmp ugt i16 [[ELT3]], [[MAX2]]
492; VI-NEXT:    [[MAX3:%.*]] = select i1 [[CMP3]], i16 [[ELT3]], i16 [[MAX2]]
493; VI-NEXT:    ret i16 [[MAX3]]
494;
495entry:
496  %elt0 = extractelement <4 x i16> %vec4, i64 0
497  %elt1 = extractelement <4 x i16> %vec4, i64 1
498  %elt2 = extractelement <4 x i16> %vec4, i64 2
499  %elt3 = extractelement <4 x i16> %vec4, i64 3
500
501  %cmp1 = icmp ugt i16 %elt1, %elt0
502  %max1 = select i1 %cmp1, i16 %elt1, i16 %elt0
503  %cmp2 = icmp ugt i16 %elt2, %max1
504  %max2 = select i1 %cmp2, i16 %elt2, i16 %max1
505  %cmp3 = icmp ugt i16 %elt3, %max2
506  %max3 = select i1 %cmp3, i16 %elt3, i16 %max2
507
508  ret i16 %max3
509}
510
511define i16 @reduction_smax_v4i16(<4 x i16> %vec4) {
512; GFX9-LABEL: @reduction_smax_v4i16(
513; GFX9-NEXT:  entry:
514; GFX9-NEXT:    [[TMP0:%.*]] = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> [[VEC4:%.*]])
515; GFX9-NEXT:    ret i16 [[TMP0]]
516;
517; VI-LABEL: @reduction_smax_v4i16(
518; VI-NEXT:  entry:
519; VI-NEXT:    [[ELT0:%.*]] = extractelement <4 x i16> [[VEC4:%.*]], i64 0
520; VI-NEXT:    [[ELT1:%.*]] = extractelement <4 x i16> [[VEC4]], i64 1
521; VI-NEXT:    [[ELT2:%.*]] = extractelement <4 x i16> [[VEC4]], i64 2
522; VI-NEXT:    [[ELT3:%.*]] = extractelement <4 x i16> [[VEC4]], i64 3
523; VI-NEXT:    [[CMP1:%.*]] = icmp sgt i16 [[ELT1]], [[ELT0]]
524; VI-NEXT:    [[MAX1:%.*]] = select i1 [[CMP1]], i16 [[ELT1]], i16 [[ELT0]]
525; VI-NEXT:    [[CMP2:%.*]] = icmp sgt i16 [[ELT2]], [[MAX1]]
526; VI-NEXT:    [[MAX2:%.*]] = select i1 [[CMP2]], i16 [[ELT2]], i16 [[MAX1]]
527; VI-NEXT:    [[CMP3:%.*]] = icmp sgt i16 [[ELT3]], [[MAX2]]
528; VI-NEXT:    [[MAX3:%.*]] = select i1 [[CMP3]], i16 [[ELT3]], i16 [[MAX2]]
529; VI-NEXT:    ret i16 [[MAX3]]
530;
531entry:
532  %elt0 = extractelement <4 x i16> %vec4, i64 0
533  %elt1 = extractelement <4 x i16> %vec4, i64 1
534  %elt2 = extractelement <4 x i16> %vec4, i64 2
535  %elt3 = extractelement <4 x i16> %vec4, i64 3
536
537  %cmp1 = icmp sgt i16 %elt1, %elt0
538  %max1 = select i1 %cmp1, i16 %elt1, i16 %elt0
539  %cmp2 = icmp sgt i16 %elt2, %max1
540  %max2 = select i1 %cmp2, i16 %elt2, i16 %max1
541  %cmp3 = icmp sgt i16 %elt3, %max2
542  %max3 = select i1 %cmp3, i16 %elt3, i16 %max2
543
544  ret i16 %max3
545}
546
547; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select
548; with fastmath on the select.
549define half @reduction_fmax_v4half(<4 x half> %vec4) {
550; GCN-LABEL: @reduction_fmax_v4half(
551; GCN-NEXT:  entry:
552; GCN-NEXT:    [[ELT0:%.*]] = extractelement <4 x half> [[VEC4:%.*]], i64 0
553; GCN-NEXT:    [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1
554; GCN-NEXT:    [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2
555; GCN-NEXT:    [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3
556; GCN-NEXT:    [[CMP1:%.*]] = fcmp fast ogt half [[ELT1]], [[ELT0]]
557; GCN-NEXT:    [[MAX1:%.*]] = select i1 [[CMP1]], half [[ELT1]], half [[ELT0]]
558; GCN-NEXT:    [[CMP2:%.*]] = fcmp fast ogt half [[ELT2]], [[MAX1]]
559; GCN-NEXT:    [[MAX2:%.*]] = select i1 [[CMP2]], half [[ELT2]], half [[MAX1]]
560; GCN-NEXT:    [[CMP3:%.*]] = fcmp fast ogt half [[ELT3]], [[MAX2]]
561; GCN-NEXT:    [[MAX3:%.*]] = select i1 [[CMP3]], half [[ELT3]], half [[MAX2]]
562; GCN-NEXT:    ret half [[MAX3]]
563;
564entry:
565  %elt0 = extractelement <4 x half> %vec4, i64 0
566  %elt1 = extractelement <4 x half> %vec4, i64 1
567  %elt2 = extractelement <4 x half> %vec4, i64 2
568  %elt3 = extractelement <4 x half> %vec4, i64 3
569
570  %cmp1 = fcmp fast ogt half %elt1, %elt0
571  %max1 = select i1 %cmp1, half %elt1, half %elt0
572  %cmp2 = fcmp fast ogt half %elt2, %max1
573  %max2 = select i1 %cmp2, half %elt2, half %max1
574  %cmp3 = fcmp fast ogt half %elt3, %max2
575  %max3 = select i1 %cmp3, half %elt3, half %max2
576
577  ret half %max3
578}
579
580; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select
581; with fastmath on the select.
582define half @reduction_fmin_v4half(<4 x half> %vec4) {
583; GCN-LABEL: @reduction_fmin_v4half(
584; GCN-NEXT:  entry:
585; GCN-NEXT:    [[ELT0:%.*]] = extractelement <4 x half> [[VEC4:%.*]], i64 0
586; GCN-NEXT:    [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1
587; GCN-NEXT:    [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2
588; GCN-NEXT:    [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3
589; GCN-NEXT:    [[CMP1:%.*]] = fcmp fast olt half [[ELT1]], [[ELT0]]
590; GCN-NEXT:    [[MIN1:%.*]] = select i1 [[CMP1]], half [[ELT1]], half [[ELT0]]
591; GCN-NEXT:    [[CMP2:%.*]] = fcmp fast olt half [[ELT2]], [[MIN1]]
592; GCN-NEXT:    [[MIN2:%.*]] = select i1 [[CMP2]], half [[ELT2]], half [[MIN1]]
593; GCN-NEXT:    [[CMP3:%.*]] = fcmp fast olt half [[ELT3]], [[MIN2]]
594; GCN-NEXT:    [[MIN3:%.*]] = select i1 [[CMP3]], half [[ELT3]], half [[MIN2]]
595; GCN-NEXT:    ret half [[MIN3]]
596;
597entry:
598  %elt0 = extractelement <4 x half> %vec4, i64 0
599  %elt1 = extractelement <4 x half> %vec4, i64 1
600  %elt2 = extractelement <4 x half> %vec4, i64 2
601  %elt3 = extractelement <4 x half> %vec4, i64 3
602
603  %cmp1 = fcmp fast olt half %elt1, %elt0
604  %min1 = select i1 %cmp1, half %elt1, half %elt0
605  %cmp2 = fcmp fast olt half %elt2, %min1
606  %min2 = select i1 %cmp2, half %elt2, half %min1
607  %cmp3 = fcmp fast olt half %elt3, %min2
608  %min3 = select i1 %cmp3, half %elt3, half %min2
609
610  ret half %min3
611}
612
613; Tests to make sure reduction does not kick in. vega does not support packed math for types larger than 16 bits.
614define float @reduction_v4float(<4 x float> %a) {
615; GCN-LABEL: @reduction_v4float(
616; GCN-NEXT:  entry:
617; GCN-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
618; GCN-NEXT:    [[ELT1:%.*]] = extractelement <4 x float> [[A]], i64 1
619; GCN-NEXT:    [[ELT2:%.*]] = extractelement <4 x float> [[A]], i64 2
620; GCN-NEXT:    [[ELT3:%.*]] = extractelement <4 x float> [[A]], i64 3
621; GCN-NEXT:    [[ADD1:%.*]] = fadd fast float [[ELT1]], [[ELT0]]
622; GCN-NEXT:    [[ADD2:%.*]] = fadd fast float [[ELT2]], [[ADD1]]
623; GCN-NEXT:    [[ADD3:%.*]] = fadd fast float [[ELT3]], [[ADD2]]
624; GCN-NEXT:    ret float [[ADD3]]
625;
626entry:
627  %elt0 = extractelement <4 x float> %a, i64 0
628  %elt1 = extractelement <4 x float> %a, i64 1
629  %elt2 = extractelement <4 x float> %a, i64 2
630  %elt3 = extractelement <4 x float> %a, i64 3
631
632  %add1 = fadd fast float %elt1, %elt0
633  %add2 = fadd fast float %elt2, %add1
634  %add3 = fadd fast float %elt3, %add2
635
636  ret float %add3
637}
638