1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer -dce < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer -dce < %s | FileCheck -check-prefixes=GCN,VI %s
4
5define half @reduction_half4(<4 x half> %a) {
6; GFX9-LABEL: @reduction_half4(
7; GFX9-NEXT:  entry:
8; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
9; GFX9-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x half> [[A]], [[RDX_SHUF]]
10; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x half> [[BIN_RDX]], <4 x half> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
11; GFX9-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x half> [[BIN_RDX]], [[RDX_SHUF1]]
12; GFX9-NEXT:    [[TMP0:%.*]] = extractelement <4 x half> [[BIN_RDX2]], i32 0
13; GFX9-NEXT:    ret half [[TMP0]]
14;
15; VI-LABEL: @reduction_half4(
16; VI-NEXT:  entry:
17; VI-NEXT:    [[ELT0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
18; VI-NEXT:    [[ELT1:%.*]] = extractelement <4 x half> [[A]], i64 1
19; VI-NEXT:    [[ELT2:%.*]] = extractelement <4 x half> [[A]], i64 2
20; VI-NEXT:    [[ELT3:%.*]] = extractelement <4 x half> [[A]], i64 3
21; VI-NEXT:    [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]]
22; VI-NEXT:    [[ADD2:%.*]] = fadd fast half [[ELT2]], [[ADD1]]
23; VI-NEXT:    [[ADD3:%.*]] = fadd fast half [[ELT3]], [[ADD2]]
24; VI-NEXT:    ret half [[ADD3]]
25;
26entry:
27  %elt0 = extractelement <4 x half> %a, i64 0
28  %elt1 = extractelement <4 x half> %a, i64 1
29  %elt2 = extractelement <4 x half> %a, i64 2
30  %elt3 = extractelement <4 x half> %a, i64 3
31
32  %add1 = fadd fast half %elt1, %elt0
33  %add2 = fadd fast half %elt2, %add1
34  %add3 = fadd fast half %elt3, %add2
35
36  ret half %add3
37}
38
39define half @reduction_half8(<8 x half> %vec8) {
40; GFX9-LABEL: @reduction_half8(
41; GFX9-NEXT:  entry:
42; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x half> [[VEC8:%.*]], <8 x half> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
43; GFX9-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x half> [[VEC8]], [[RDX_SHUF]]
44; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x half> [[BIN_RDX]], <8 x half> poison, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
45; GFX9-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <8 x half> [[BIN_RDX]], [[RDX_SHUF1]]
46; GFX9-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x half> [[BIN_RDX2]], <8 x half> poison, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
47; GFX9-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <8 x half> [[BIN_RDX2]], [[RDX_SHUF3]]
48; GFX9-NEXT:    [[TMP0:%.*]] = extractelement <8 x half> [[BIN_RDX4]], i32 0
49; GFX9-NEXT:    ret half [[TMP0]]
50;
51; VI-LABEL: @reduction_half8(
52; VI-NEXT:  entry:
53; VI-NEXT:    [[ELT0:%.*]] = extractelement <8 x half> [[VEC8:%.*]], i64 0
54; VI-NEXT:    [[ELT1:%.*]] = extractelement <8 x half> [[VEC8]], i64 1
55; VI-NEXT:    [[ELT2:%.*]] = extractelement <8 x half> [[VEC8]], i64 2
56; VI-NEXT:    [[ELT3:%.*]] = extractelement <8 x half> [[VEC8]], i64 3
57; VI-NEXT:    [[ELT4:%.*]] = extractelement <8 x half> [[VEC8]], i64 4
58; VI-NEXT:    [[ELT5:%.*]] = extractelement <8 x half> [[VEC8]], i64 5
59; VI-NEXT:    [[ELT6:%.*]] = extractelement <8 x half> [[VEC8]], i64 6
60; VI-NEXT:    [[ELT7:%.*]] = extractelement <8 x half> [[VEC8]], i64 7
61; VI-NEXT:    [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]]
62; VI-NEXT:    [[ADD2:%.*]] = fadd fast half [[ELT2]], [[ADD1]]
63; VI-NEXT:    [[ADD3:%.*]] = fadd fast half [[ELT3]], [[ADD2]]
64; VI-NEXT:    [[ADD4:%.*]] = fadd fast half [[ELT4]], [[ADD3]]
65; VI-NEXT:    [[ADD5:%.*]] = fadd fast half [[ELT5]], [[ADD4]]
66; VI-NEXT:    [[ADD6:%.*]] = fadd fast half [[ELT6]], [[ADD5]]
67; VI-NEXT:    [[ADD7:%.*]] = fadd fast half [[ELT7]], [[ADD6]]
68; VI-NEXT:    ret half [[ADD7]]
69;
70entry:
71  %elt0 = extractelement <8 x half> %vec8, i64 0
72  %elt1 = extractelement <8 x half> %vec8, i64 1
73  %elt2 = extractelement <8 x half> %vec8, i64 2
74  %elt3 = extractelement <8 x half> %vec8, i64 3
75  %elt4 = extractelement <8 x half> %vec8, i64 4
76  %elt5 = extractelement <8 x half> %vec8, i64 5
77  %elt6 = extractelement <8 x half> %vec8, i64 6
78  %elt7 = extractelement <8 x half> %vec8, i64 7
79
80  %add1 = fadd fast half %elt1, %elt0
81  %add2 = fadd fast half %elt2, %add1
82  %add3 = fadd fast half %elt3, %add2
83  %add4 = fadd fast half %elt4, %add3
84  %add5 = fadd fast half %elt5, %add4
85  %add6 = fadd fast half %elt6, %add5
86  %add7 = fadd fast half %elt7, %add6
87
88  ret half %add7
89}
90
91define half @reduction_half16(<16 x half> %vec16) {
92; GFX9-LABEL: @reduction_half16(
93; GFX9-NEXT:  entry:
94; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x half> [[VEC16:%.*]], <16 x half> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
95; GFX9-NEXT:    [[BIN_RDX:%.*]] = fadd fast <16 x half> [[VEC16]], [[RDX_SHUF]]
96; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x half> [[BIN_RDX]], <16 x half> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
97; GFX9-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <16 x half> [[BIN_RDX]], [[RDX_SHUF1]]
98; GFX9-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <16 x half> [[BIN_RDX2]], <16 x half> poison, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
99; GFX9-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <16 x half> [[BIN_RDX2]], [[RDX_SHUF3]]
100; GFX9-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <16 x half> [[BIN_RDX4]], <16 x half> poison, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
101; GFX9-NEXT:    [[BIN_RDX6:%.*]] = fadd fast <16 x half> [[BIN_RDX4]], [[RDX_SHUF5]]
102; GFX9-NEXT:    [[TMP0:%.*]] = extractelement <16 x half> [[BIN_RDX6]], i32 0
103; GFX9-NEXT:    ret half [[TMP0]]
104;
105; VI-LABEL: @reduction_half16(
106; VI-NEXT:  entry:
107; VI-NEXT:    [[ELT0:%.*]] = extractelement <16 x half> [[VEC16:%.*]], i64 0
108; VI-NEXT:    [[ELT1:%.*]] = extractelement <16 x half> [[VEC16]], i64 1
109; VI-NEXT:    [[ELT2:%.*]] = extractelement <16 x half> [[VEC16]], i64 2
110; VI-NEXT:    [[ELT3:%.*]] = extractelement <16 x half> [[VEC16]], i64 3
111; VI-NEXT:    [[ELT4:%.*]] = extractelement <16 x half> [[VEC16]], i64 4
112; VI-NEXT:    [[ELT5:%.*]] = extractelement <16 x half> [[VEC16]], i64 5
113; VI-NEXT:    [[ELT6:%.*]] = extractelement <16 x half> [[VEC16]], i64 6
114; VI-NEXT:    [[ELT7:%.*]] = extractelement <16 x half> [[VEC16]], i64 7
115; VI-NEXT:    [[ELT8:%.*]] = extractelement <16 x half> [[VEC16]], i64 8
116; VI-NEXT:    [[ELT9:%.*]] = extractelement <16 x half> [[VEC16]], i64 9
117; VI-NEXT:    [[ELT10:%.*]] = extractelement <16 x half> [[VEC16]], i64 10
118; VI-NEXT:    [[ELT11:%.*]] = extractelement <16 x half> [[VEC16]], i64 11
119; VI-NEXT:    [[ELT12:%.*]] = extractelement <16 x half> [[VEC16]], i64 12
120; VI-NEXT:    [[ELT13:%.*]] = extractelement <16 x half> [[VEC16]], i64 13
121; VI-NEXT:    [[ELT14:%.*]] = extractelement <16 x half> [[VEC16]], i64 14
122; VI-NEXT:    [[ELT15:%.*]] = extractelement <16 x half> [[VEC16]], i64 15
123; VI-NEXT:    [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]]
124; VI-NEXT:    [[ADD2:%.*]] = fadd fast half [[ELT2]], [[ADD1]]
125; VI-NEXT:    [[ADD3:%.*]] = fadd fast half [[ELT3]], [[ADD2]]
126; VI-NEXT:    [[ADD4:%.*]] = fadd fast half [[ELT4]], [[ADD3]]
127; VI-NEXT:    [[ADD5:%.*]] = fadd fast half [[ELT5]], [[ADD4]]
128; VI-NEXT:    [[ADD6:%.*]] = fadd fast half [[ELT6]], [[ADD5]]
129; VI-NEXT:    [[ADD7:%.*]] = fadd fast half [[ELT7]], [[ADD6]]
130; VI-NEXT:    [[ADD8:%.*]] = fadd fast half [[ELT8]], [[ADD7]]
131; VI-NEXT:    [[ADD9:%.*]] = fadd fast half [[ELT9]], [[ADD8]]
132; VI-NEXT:    [[ADD10:%.*]] = fadd fast half [[ELT10]], [[ADD9]]
133; VI-NEXT:    [[ADD11:%.*]] = fadd fast half [[ELT11]], [[ADD10]]
134; VI-NEXT:    [[ADD12:%.*]] = fadd fast half [[ELT12]], [[ADD11]]
135; VI-NEXT:    [[ADD13:%.*]] = fadd fast half [[ELT13]], [[ADD12]]
136; VI-NEXT:    [[ADD14:%.*]] = fadd fast half [[ELT14]], [[ADD13]]
137; VI-NEXT:    [[ADD15:%.*]] = fadd fast half [[ELT15]], [[ADD14]]
138; VI-NEXT:    ret half [[ADD15]]
139;
140entry:
141  %elt0 = extractelement <16 x half> %vec16, i64 0
142  %elt1 = extractelement <16 x half> %vec16, i64 1
143  %elt2 = extractelement <16 x half> %vec16, i64 2
144  %elt3 = extractelement <16 x half> %vec16, i64 3
145  %elt4 = extractelement <16 x half> %vec16, i64 4
146  %elt5 = extractelement <16 x half> %vec16, i64 5
147  %elt6 = extractelement <16 x half> %vec16, i64 6
148  %elt7 = extractelement <16 x half> %vec16, i64 7
149  %elt8 = extractelement <16 x half> %vec16, i64 8
150  %elt9 = extractelement <16 x half> %vec16, i64 9
151  %elt10 = extractelement <16 x half> %vec16, i64 10
152  %elt11 = extractelement <16 x half> %vec16, i64 11
153  %elt12 = extractelement <16 x half> %vec16, i64 12
154  %elt13 = extractelement <16 x half> %vec16, i64 13
155  %elt14 = extractelement <16 x half> %vec16, i64 14
156  %elt15 = extractelement <16 x half> %vec16, i64 15
157
158  %add1 = fadd fast half %elt1, %elt0
159  %add2 = fadd fast half %elt2, %add1
160  %add3 = fadd fast half %elt3, %add2
161  %add4 = fadd fast half %elt4, %add3
162  %add5 = fadd fast half %elt5, %add4
163  %add6 = fadd fast half %elt6, %add5
164  %add7 = fadd fast half %elt7, %add6
165  %add8 = fadd fast half %elt8, %add7
166  %add9 = fadd fast half %elt9, %add8
167  %add10 = fadd fast half %elt10, %add9
168  %add11 = fadd fast half %elt11, %add10
169  %add12 = fadd fast half %elt12, %add11
170  %add13 = fadd fast half %elt13, %add12
171  %add14 = fadd fast half %elt14, %add13
172  %add15 = fadd fast half %elt15, %add14
173
174  ret half %add15
175}
176
177; FIXME: support vectorization;
178define half @reduction_sub_half4(<4 x half> %a) {
179; GCN-LABEL: @reduction_sub_half4(
180; GCN-NEXT:  entry:
181; GCN-NEXT:    [[ELT0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
182; GCN-NEXT:    [[ELT1:%.*]] = extractelement <4 x half> [[A]], i64 1
183; GCN-NEXT:    [[ELT2:%.*]] = extractelement <4 x half> [[A]], i64 2
184; GCN-NEXT:    [[ELT3:%.*]] = extractelement <4 x half> [[A]], i64 3
185; GCN-NEXT:    [[ADD1:%.*]] = fsub fast half [[ELT1]], [[ELT0]]
186; GCN-NEXT:    [[ADD2:%.*]] = fsub fast half [[ELT2]], [[ADD1]]
187; GCN-NEXT:    [[ADD3:%.*]] = fsub fast half [[ELT3]], [[ADD2]]
188; GCN-NEXT:    ret half [[ADD3]]
189;
190entry:
191  %elt0 = extractelement <4 x half> %a, i64 0
192  %elt1 = extractelement <4 x half> %a, i64 1
193  %elt2 = extractelement <4 x half> %a, i64 2
194  %elt3 = extractelement <4 x half> %a, i64 3
195
196  %add1 = fsub fast half %elt1, %elt0
197  %add2 = fsub fast half %elt2, %add1
198  %add3 = fsub fast half %elt3, %add2
199
200  ret half %add3
201}
202
203define i16 @reduction_v4i16(<4 x i16> %a) {
204; GFX9-LABEL: @reduction_v4i16(
205; GFX9-NEXT:  entry:
206; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i16> [[A:%.*]], <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
207; GFX9-NEXT:    [[BIN_RDX:%.*]] = add <4 x i16> [[A]], [[RDX_SHUF]]
208; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i16> [[BIN_RDX]], <4 x i16> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
209; GFX9-NEXT:    [[BIN_RDX2:%.*]] = add <4 x i16> [[BIN_RDX]], [[RDX_SHUF1]]
210; GFX9-NEXT:    [[TMP0:%.*]] = extractelement <4 x i16> [[BIN_RDX2]], i32 0
211; GFX9-NEXT:    ret i16 [[TMP0]]
212;
213; VI-LABEL: @reduction_v4i16(
214; VI-NEXT:  entry:
215; VI-NEXT:    [[ELT0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0
216; VI-NEXT:    [[ELT1:%.*]] = extractelement <4 x i16> [[A]], i64 1
217; VI-NEXT:    [[ELT2:%.*]] = extractelement <4 x i16> [[A]], i64 2
218; VI-NEXT:    [[ELT3:%.*]] = extractelement <4 x i16> [[A]], i64 3
219; VI-NEXT:    [[ADD1:%.*]] = add i16 [[ELT1]], [[ELT0]]
220; VI-NEXT:    [[ADD2:%.*]] = add i16 [[ELT2]], [[ADD1]]
221; VI-NEXT:    [[ADD3:%.*]] = add i16 [[ELT3]], [[ADD2]]
222; VI-NEXT:    ret i16 [[ADD3]]
223;
224entry:
225  %elt0 = extractelement <4 x i16> %a, i64 0
226  %elt1 = extractelement <4 x i16> %a, i64 1
227  %elt2 = extractelement <4 x i16> %a, i64 2
228  %elt3 = extractelement <4 x i16> %a, i64 3
229
230  %add1 = add i16 %elt1, %elt0
231  %add2 = add i16 %elt2, %add1
232  %add3 = add i16 %elt3, %add2
233
234  ret i16 %add3
235}
236
237define i16 @reduction_v8i16(<8 x i16> %vec8) {
238; GFX9-LABEL: @reduction_v8i16(
239; GFX9-NEXT:  entry:
240; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[VEC8:%.*]], <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
241; GFX9-NEXT:    [[BIN_RDX:%.*]] = add <8 x i16> [[VEC8]], [[RDX_SHUF]]
242; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i16> [[BIN_RDX]], <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
243; GFX9-NEXT:    [[BIN_RDX2:%.*]] = add <8 x i16> [[BIN_RDX]], [[RDX_SHUF1]]
244; GFX9-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i16> [[BIN_RDX2]], <8 x i16> poison, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
245; GFX9-NEXT:    [[BIN_RDX4:%.*]] = add <8 x i16> [[BIN_RDX2]], [[RDX_SHUF3]]
246; GFX9-NEXT:    [[TMP0:%.*]] = extractelement <8 x i16> [[BIN_RDX4]], i32 0
247; GFX9-NEXT:    ret i16 [[TMP0]]
248;
249; VI-LABEL: @reduction_v8i16(
250; VI-NEXT:  entry:
251; VI-NEXT:    [[ELT0:%.*]] = extractelement <8 x i16> [[VEC8:%.*]], i64 0
252; VI-NEXT:    [[ELT1:%.*]] = extractelement <8 x i16> [[VEC8]], i64 1
253; VI-NEXT:    [[ELT2:%.*]] = extractelement <8 x i16> [[VEC8]], i64 2
254; VI-NEXT:    [[ELT3:%.*]] = extractelement <8 x i16> [[VEC8]], i64 3
255; VI-NEXT:    [[ELT4:%.*]] = extractelement <8 x i16> [[VEC8]], i64 4
256; VI-NEXT:    [[ELT5:%.*]] = extractelement <8 x i16> [[VEC8]], i64 5
257; VI-NEXT:    [[ELT6:%.*]] = extractelement <8 x i16> [[VEC8]], i64 6
258; VI-NEXT:    [[ELT7:%.*]] = extractelement <8 x i16> [[VEC8]], i64 7
259; VI-NEXT:    [[ADD1:%.*]] = add i16 [[ELT1]], [[ELT0]]
260; VI-NEXT:    [[ADD2:%.*]] = add i16 [[ELT2]], [[ADD1]]
261; VI-NEXT:    [[ADD3:%.*]] = add i16 [[ELT3]], [[ADD2]]
262; VI-NEXT:    [[ADD4:%.*]] = add i16 [[ELT4]], [[ADD3]]
263; VI-NEXT:    [[ADD5:%.*]] = add i16 [[ELT5]], [[ADD4]]
264; VI-NEXT:    [[ADD6:%.*]] = add i16 [[ELT6]], [[ADD5]]
265; VI-NEXT:    [[ADD7:%.*]] = add i16 [[ELT7]], [[ADD6]]
266; VI-NEXT:    ret i16 [[ADD7]]
267;
268entry:
269  %elt0 = extractelement <8 x i16> %vec8, i64 0
270  %elt1 = extractelement <8 x i16> %vec8, i64 1
271  %elt2 = extractelement <8 x i16> %vec8, i64 2
272  %elt3 = extractelement <8 x i16> %vec8, i64 3
273  %elt4 = extractelement <8 x i16> %vec8, i64 4
274  %elt5 = extractelement <8 x i16> %vec8, i64 5
275  %elt6 = extractelement <8 x i16> %vec8, i64 6
276  %elt7 = extractelement <8 x i16> %vec8, i64 7
277
278  %add1 = add i16 %elt1, %elt0
279  %add2 = add i16 %elt2, %add1
280  %add3 = add i16 %elt3, %add2
281  %add4 = add i16 %elt4, %add3
282  %add5 = add i16 %elt5, %add4
283  %add6 = add i16 %elt6, %add5
284  %add7 = add i16 %elt7, %add6
285
286  ret i16 %add7
287}
288
289define i16 @reduction_umin_v4i16(<4 x i16> %vec4) {
290; GFX9-LABEL: @reduction_umin_v4i16(
291; GFX9-NEXT:  entry:
292; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i16> [[VEC4:%.*]], <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
293; GFX9-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ult <4 x i16> [[VEC4]], [[RDX_SHUF]]
294; GFX9-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i16> [[VEC4]], <4 x i16> [[RDX_SHUF]]
295; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i16> [[RDX_MINMAX_SELECT]], <4 x i16> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
296; GFX9-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp ult <4 x i16> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
297; GFX9-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i16> [[RDX_MINMAX_SELECT]], <4 x i16> [[RDX_SHUF1]]
298; GFX9-NEXT:    [[TMP0:%.*]] = extractelement <4 x i16> [[RDX_MINMAX_SELECT3]], i32 0
299; GFX9-NEXT:    ret i16 [[TMP0]]
300;
301; VI-LABEL: @reduction_umin_v4i16(
302; VI-NEXT:  entry:
303; VI-NEXT:    [[ELT0:%.*]] = extractelement <4 x i16> [[VEC4:%.*]], i64 0
304; VI-NEXT:    [[ELT1:%.*]] = extractelement <4 x i16> [[VEC4]], i64 1
305; VI-NEXT:    [[ELT2:%.*]] = extractelement <4 x i16> [[VEC4]], i64 2
306; VI-NEXT:    [[ELT3:%.*]] = extractelement <4 x i16> [[VEC4]], i64 3
307; VI-NEXT:    [[CMP1:%.*]] = icmp ult i16 [[ELT1]], [[ELT0]]
308; VI-NEXT:    [[MIN1:%.*]] = select i1 [[CMP1]], i16 [[ELT1]], i16 [[ELT0]]
309; VI-NEXT:    [[CMP2:%.*]] = icmp ult i16 [[ELT2]], [[MIN1]]
310; VI-NEXT:    [[MIN2:%.*]] = select i1 [[CMP2]], i16 [[ELT2]], i16 [[MIN1]]
311; VI-NEXT:    [[CMP3:%.*]] = icmp ult i16 [[ELT3]], [[MIN2]]
312; VI-NEXT:    [[MIN3:%.*]] = select i1 [[CMP3]], i16 [[ELT3]], i16 [[MIN2]]
313; VI-NEXT:    ret i16 [[MIN3]]
314;
315entry:
316  %elt0 = extractelement <4 x i16> %vec4, i64 0
317  %elt1 = extractelement <4 x i16> %vec4, i64 1
318  %elt2 = extractelement <4 x i16> %vec4, i64 2
319  %elt3 = extractelement <4 x i16> %vec4, i64 3
320
321  %cmp1 = icmp ult i16 %elt1, %elt0
322  %min1 = select i1 %cmp1, i16 %elt1, i16 %elt0
323  %cmp2 = icmp ult i16 %elt2, %min1
324  %min2 = select i1 %cmp2, i16 %elt2, i16 %min1
325  %cmp3 = icmp ult i16 %elt3, %min2
326  %min3 = select i1 %cmp3, i16 %elt3, i16 %min2
327
328  ret i16 %min3
329}
330
331define i16 @reduction_icmp_v8i16(<8 x i16> %vec8) {
332; GFX9-LABEL: @reduction_icmp_v8i16(
333; GFX9-NEXT:  entry:
334; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[VEC8:%.*]], <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
335; GFX9-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i16> [[VEC8]], [[RDX_SHUF]]
336; GFX9-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i16> [[VEC8]], <8 x i16> [[RDX_SHUF]]
337; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i16> [[RDX_MINMAX_SELECT]], <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
338; GFX9-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp ult <8 x i16> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
339; GFX9-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x i16> [[RDX_MINMAX_SELECT]], <8 x i16> [[RDX_SHUF1]]
340; GFX9-NEXT:    [[RDX_SHUF4:%.*]] = shufflevector <8 x i16> [[RDX_MINMAX_SELECT3]], <8 x i16> poison, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
341; GFX9-NEXT:    [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i16> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
342; GFX9-NEXT:    [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i16> [[RDX_MINMAX_SELECT3]], <8 x i16> [[RDX_SHUF4]]
343; GFX9-NEXT:    [[TMP0:%.*]] = extractelement <8 x i16> [[RDX_MINMAX_SELECT6]], i32 0
344; GFX9-NEXT:    ret i16 [[TMP0]]
345;
346; VI-LABEL: @reduction_icmp_v8i16(
347; VI-NEXT:  entry:
348; VI-NEXT:    [[ELT0:%.*]] = extractelement <8 x i16> [[VEC8:%.*]], i64 0
349; VI-NEXT:    [[ELT1:%.*]] = extractelement <8 x i16> [[VEC8]], i64 1
350; VI-NEXT:    [[ELT2:%.*]] = extractelement <8 x i16> [[VEC8]], i64 2
351; VI-NEXT:    [[ELT3:%.*]] = extractelement <8 x i16> [[VEC8]], i64 3
352; VI-NEXT:    [[ELT4:%.*]] = extractelement <8 x i16> [[VEC8]], i64 4
353; VI-NEXT:    [[ELT5:%.*]] = extractelement <8 x i16> [[VEC8]], i64 5
354; VI-NEXT:    [[ELT6:%.*]] = extractelement <8 x i16> [[VEC8]], i64 6
355; VI-NEXT:    [[ELT7:%.*]] = extractelement <8 x i16> [[VEC8]], i64 7
356; VI-NEXT:    [[CMP0:%.*]] = icmp ult i16 [[ELT1]], [[ELT0]]
357; VI-NEXT:    [[MIN1:%.*]] = select i1 [[CMP0]], i16 [[ELT1]], i16 [[ELT0]]
358; VI-NEXT:    [[CMP1:%.*]] = icmp ult i16 [[ELT2]], [[MIN1]]
359; VI-NEXT:    [[MIN2:%.*]] = select i1 [[CMP1]], i16 [[ELT2]], i16 [[MIN1]]
360; VI-NEXT:    [[CMP2:%.*]] = icmp ult i16 [[ELT3]], [[MIN2]]
361; VI-NEXT:    [[MIN3:%.*]] = select i1 [[CMP2]], i16 [[ELT3]], i16 [[MIN2]]
362; VI-NEXT:    [[CMP3:%.*]] = icmp ult i16 [[ELT4]], [[MIN3]]
363; VI-NEXT:    [[MIN4:%.*]] = select i1 [[CMP3]], i16 [[ELT4]], i16 [[MIN3]]
364; VI-NEXT:    [[CMP4:%.*]] = icmp ult i16 [[ELT5]], [[MIN4]]
365; VI-NEXT:    [[MIN5:%.*]] = select i1 [[CMP4]], i16 [[ELT5]], i16 [[MIN4]]
366; VI-NEXT:    [[CMP5:%.*]] = icmp ult i16 [[ELT6]], [[MIN5]]
367; VI-NEXT:    [[MIN6:%.*]] = select i1 [[CMP5]], i16 [[ELT6]], i16 [[MIN5]]
368; VI-NEXT:    [[CMP6:%.*]] = icmp ult i16 [[ELT7]], [[MIN6]]
369; VI-NEXT:    [[MIN7:%.*]] = select i1 [[CMP6]], i16 [[ELT7]], i16 [[MIN6]]
370; VI-NEXT:    ret i16 [[MIN7]]
371;
372entry:
373  %elt0 = extractelement <8 x i16> %vec8, i64 0
374  %elt1 = extractelement <8 x i16> %vec8, i64 1
375  %elt2 = extractelement <8 x i16> %vec8, i64 2
376  %elt3 = extractelement <8 x i16> %vec8, i64 3
377  %elt4 = extractelement <8 x i16> %vec8, i64 4
378  %elt5 = extractelement <8 x i16> %vec8, i64 5
379  %elt6 = extractelement <8 x i16> %vec8, i64 6
380  %elt7 = extractelement <8 x i16> %vec8, i64 7
381
382  %cmp0 = icmp ult i16 %elt1, %elt0
383  %min1 = select i1 %cmp0, i16 %elt1, i16 %elt0
384  %cmp1 = icmp ult i16 %elt2, %min1
385  %min2 = select i1 %cmp1, i16 %elt2, i16 %min1
386  %cmp2 = icmp ult i16 %elt3, %min2
387  %min3 = select i1 %cmp2, i16 %elt3, i16 %min2
388
389  %cmp3 = icmp ult i16 %elt4, %min3
390  %min4 = select i1 %cmp3, i16 %elt4, i16 %min3
391  %cmp4 = icmp ult i16 %elt5, %min4
392  %min5 = select i1 %cmp4, i16 %elt5, i16 %min4
393
394  %cmp5 = icmp ult i16 %elt6, %min5
395  %min6 = select i1 %cmp5, i16 %elt6, i16 %min5
396  %cmp6 = icmp ult i16 %elt7, %min6
397  %min7 = select i1 %cmp6, i16 %elt7, i16 %min6
398
399  ret i16 %min7
400}
401
402define i16 @reduction_smin_v16i16(<16 x i16> %vec16) {
403; GFX9-LABEL: @reduction_smin_v16i16(
404; GFX9-NEXT:  entry:
405; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x i16> [[VEC16:%.*]], <16 x i16> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
406; GFX9-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt <16 x i16> [[VEC16]], [[RDX_SHUF]]
407; GFX9-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x i16> [[VEC16]], <16 x i16> [[RDX_SHUF]]
408; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x i16> [[RDX_MINMAX_SELECT]], <16 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
409; GFX9-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp slt <16 x i16> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
410; GFX9-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x i16> [[RDX_MINMAX_SELECT]], <16 x i16> [[RDX_SHUF1]]
411; GFX9-NEXT:    [[RDX_SHUF4:%.*]] = shufflevector <16 x i16> [[RDX_MINMAX_SELECT3]], <16 x i16> poison, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
412; GFX9-NEXT:    [[RDX_MINMAX_CMP5:%.*]] = icmp slt <16 x i16> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
413; GFX9-NEXT:    [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x i16> [[RDX_MINMAX_SELECT3]], <16 x i16> [[RDX_SHUF4]]
414; GFX9-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <16 x i16> [[RDX_MINMAX_SELECT6]], <16 x i16> poison, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
415; GFX9-NEXT:    [[RDX_MINMAX_CMP8:%.*]] = icmp slt <16 x i16> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]]
416; GFX9-NEXT:    [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x i16> [[RDX_MINMAX_SELECT6]], <16 x i16> [[RDX_SHUF7]]
417; GFX9-NEXT:    [[TMP0:%.*]] = extractelement <16 x i16> [[RDX_MINMAX_SELECT9]], i32 0
418; GFX9-NEXT:    ret i16 [[TMP0]]
419;
420; VI-LABEL: @reduction_smin_v16i16(
421; VI-NEXT:  entry:
422; VI-NEXT:    [[ELT0:%.*]] = extractelement <16 x i16> [[VEC16:%.*]], i64 0
423; VI-NEXT:    [[ELT1:%.*]] = extractelement <16 x i16> [[VEC16]], i64 1
424; VI-NEXT:    [[ELT2:%.*]] = extractelement <16 x i16> [[VEC16]], i64 2
425; VI-NEXT:    [[ELT3:%.*]] = extractelement <16 x i16> [[VEC16]], i64 3
426; VI-NEXT:    [[ELT4:%.*]] = extractelement <16 x i16> [[VEC16]], i64 4
427; VI-NEXT:    [[ELT5:%.*]] = extractelement <16 x i16> [[VEC16]], i64 5
428; VI-NEXT:    [[ELT6:%.*]] = extractelement <16 x i16> [[VEC16]], i64 6
429; VI-NEXT:    [[ELT7:%.*]] = extractelement <16 x i16> [[VEC16]], i64 7
430; VI-NEXT:    [[ELT8:%.*]] = extractelement <16 x i16> [[VEC16]], i64 8
431; VI-NEXT:    [[ELT9:%.*]] = extractelement <16 x i16> [[VEC16]], i64 9
432; VI-NEXT:    [[ELT10:%.*]] = extractelement <16 x i16> [[VEC16]], i64 10
433; VI-NEXT:    [[ELT11:%.*]] = extractelement <16 x i16> [[VEC16]], i64 11
434; VI-NEXT:    [[ELT12:%.*]] = extractelement <16 x i16> [[VEC16]], i64 12
435; VI-NEXT:    [[ELT13:%.*]] = extractelement <16 x i16> [[VEC16]], i64 13
436; VI-NEXT:    [[ELT14:%.*]] = extractelement <16 x i16> [[VEC16]], i64 14
437; VI-NEXT:    [[ELT15:%.*]] = extractelement <16 x i16> [[VEC16]], i64 15
438; VI-NEXT:    [[CMP0:%.*]] = icmp slt i16 [[ELT1]], [[ELT0]]
439; VI-NEXT:    [[MIN1:%.*]] = select i1 [[CMP0]], i16 [[ELT1]], i16 [[ELT0]]
440; VI-NEXT:    [[CMP1:%.*]] = icmp slt i16 [[ELT2]], [[MIN1]]
441; VI-NEXT:    [[MIN2:%.*]] = select i1 [[CMP1]], i16 [[ELT2]], i16 [[MIN1]]
442; VI-NEXT:    [[CMP2:%.*]] = icmp slt i16 [[ELT3]], [[MIN2]]
443; VI-NEXT:    [[MIN3:%.*]] = select i1 [[CMP2]], i16 [[ELT3]], i16 [[MIN2]]
444; VI-NEXT:    [[CMP3:%.*]] = icmp slt i16 [[ELT4]], [[MIN3]]
445; VI-NEXT:    [[MIN4:%.*]] = select i1 [[CMP3]], i16 [[ELT4]], i16 [[MIN3]]
446; VI-NEXT:    [[CMP4:%.*]] = icmp slt i16 [[ELT5]], [[MIN4]]
447; VI-NEXT:    [[MIN5:%.*]] = select i1 [[CMP4]], i16 [[ELT5]], i16 [[MIN4]]
448; VI-NEXT:    [[CMP5:%.*]] = icmp slt i16 [[ELT6]], [[MIN5]]
449; VI-NEXT:    [[MIN6:%.*]] = select i1 [[CMP5]], i16 [[ELT6]], i16 [[MIN5]]
450; VI-NEXT:    [[CMP6:%.*]] = icmp slt i16 [[ELT7]], [[MIN6]]
451; VI-NEXT:    [[MIN7:%.*]] = select i1 [[CMP6]], i16 [[ELT7]], i16 [[MIN6]]
452; VI-NEXT:    [[CMP7:%.*]] = icmp slt i16 [[ELT8]], [[MIN7]]
453; VI-NEXT:    [[MIN8:%.*]] = select i1 [[CMP7]], i16 [[ELT8]], i16 [[MIN7]]
454; VI-NEXT:    [[CMP8:%.*]] = icmp slt i16 [[ELT9]], [[MIN8]]
455; VI-NEXT:    [[MIN9:%.*]] = select i1 [[CMP8]], i16 [[ELT9]], i16 [[MIN8]]
456; VI-NEXT:    [[CMP9:%.*]] = icmp slt i16 [[ELT10]], [[MIN9]]
457; VI-NEXT:    [[MIN10:%.*]] = select i1 [[CMP9]], i16 [[ELT10]], i16 [[MIN9]]
458; VI-NEXT:    [[CMP10:%.*]] = icmp slt i16 [[ELT11]], [[MIN10]]
459; VI-NEXT:    [[MIN11:%.*]] = select i1 [[CMP10]], i16 [[ELT11]], i16 [[MIN10]]
460; VI-NEXT:    [[CMP11:%.*]] = icmp slt i16 [[ELT12]], [[MIN11]]
461; VI-NEXT:    [[MIN12:%.*]] = select i1 [[CMP11]], i16 [[ELT12]], i16 [[MIN11]]
462; VI-NEXT:    [[CMP12:%.*]] = icmp slt i16 [[ELT13]], [[MIN12]]
463; VI-NEXT:    [[MIN13:%.*]] = select i1 [[CMP12]], i16 [[ELT13]], i16 [[MIN12]]
464; VI-NEXT:    [[CMP13:%.*]] = icmp slt i16 [[ELT14]], [[MIN13]]
465; VI-NEXT:    [[MIN14:%.*]] = select i1 [[CMP13]], i16 [[ELT14]], i16 [[MIN13]]
466; VI-NEXT:    [[CMP14:%.*]] = icmp slt i16 [[ELT15]], [[MIN14]]
467; VI-NEXT:    [[MIN15:%.*]] = select i1 [[CMP14]], i16 [[ELT15]], i16 [[MIN14]]
468; VI-NEXT:    ret i16 [[MIN15]]
469;
470entry:
471  %elt0 = extractelement <16 x i16> %vec16, i64 0
472  %elt1 = extractelement <16 x i16> %vec16, i64 1
473  %elt2 = extractelement <16 x i16> %vec16, i64 2
474  %elt3 = extractelement <16 x i16> %vec16, i64 3
475  %elt4 = extractelement <16 x i16> %vec16, i64 4
476  %elt5 = extractelement <16 x i16> %vec16, i64 5
477  %elt6 = extractelement <16 x i16> %vec16, i64 6
478  %elt7 = extractelement <16 x i16> %vec16, i64 7
479
480  %elt8 = extractelement <16 x i16> %vec16, i64 8
481  %elt9 = extractelement <16 x i16> %vec16, i64 9
482  %elt10 = extractelement <16 x i16> %vec16, i64 10
483  %elt11 = extractelement <16 x i16> %vec16, i64 11
484  %elt12 = extractelement <16 x i16> %vec16, i64 12
485  %elt13 = extractelement <16 x i16> %vec16, i64 13
486  %elt14 = extractelement <16 x i16> %vec16, i64 14
487  %elt15 = extractelement <16 x i16> %vec16, i64 15
488
489  %cmp0 = icmp slt i16 %elt1, %elt0
490  %min1 = select i1 %cmp0, i16 %elt1, i16 %elt0
491  %cmp1 = icmp slt i16 %elt2, %min1
492  %min2 = select i1 %cmp1, i16 %elt2, i16 %min1
493  %cmp2 = icmp slt i16 %elt3, %min2
494  %min3 = select i1 %cmp2, i16 %elt3, i16 %min2
495
496  %cmp3 = icmp slt i16 %elt4, %min3
497  %min4 = select i1 %cmp3, i16 %elt4, i16 %min3
498  %cmp4 = icmp slt i16 %elt5, %min4
499  %min5 = select i1 %cmp4, i16 %elt5, i16 %min4
500
501  %cmp5 = icmp slt i16 %elt6, %min5
502  %min6 = select i1 %cmp5, i16 %elt6, i16 %min5
503  %cmp6 = icmp slt i16 %elt7, %min6
504  %min7 = select i1 %cmp6, i16 %elt7, i16 %min6
505
506  %cmp7 = icmp slt i16 %elt8, %min7
507  %min8 = select i1 %cmp7, i16 %elt8, i16 %min7
508  %cmp8 = icmp slt i16 %elt9, %min8
509  %min9 = select i1 %cmp8, i16 %elt9, i16 %min8
510
511  %cmp9 = icmp slt i16 %elt10, %min9
512  %min10 = select i1 %cmp9, i16 %elt10, i16 %min9
513  %cmp10 = icmp slt i16 %elt11, %min10
514  %min11 = select i1 %cmp10, i16 %elt11, i16 %min10
515
516  %cmp11 = icmp slt i16 %elt12, %min11
517  %min12 = select i1 %cmp11, i16 %elt12, i16 %min11
518  %cmp12 = icmp slt i16 %elt13, %min12
519  %min13 = select i1 %cmp12, i16 %elt13, i16 %min12
520
521  %cmp13 = icmp slt i16 %elt14, %min13
522  %min14 = select i1 %cmp13, i16 %elt14, i16 %min13
523  %cmp14 = icmp slt i16 %elt15, %min14
524  %min15 = select i1 %cmp14, i16 %elt15, i16 %min14
525
526
527  ret i16 %min15
528}
529
530define i16 @reduction_umax_v4i16(<4 x i16> %vec4) {
531; GFX9-LABEL: @reduction_umax_v4i16(
532; GFX9-NEXT:  entry:
533; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i16> [[VEC4:%.*]], <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
534; GFX9-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ugt <4 x i16> [[VEC4]], [[RDX_SHUF]]
535; GFX9-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i16> [[VEC4]], <4 x i16> [[RDX_SHUF]]
536; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i16> [[RDX_MINMAX_SELECT]], <4 x i16> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
537; GFX9-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp ugt <4 x i16> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
538; GFX9-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i16> [[RDX_MINMAX_SELECT]], <4 x i16> [[RDX_SHUF1]]
539; GFX9-NEXT:    [[TMP0:%.*]] = extractelement <4 x i16> [[RDX_MINMAX_SELECT3]], i32 0
540; GFX9-NEXT:    ret i16 [[TMP0]]
541;
542; VI-LABEL: @reduction_umax_v4i16(
543; VI-NEXT:  entry:
544; VI-NEXT:    [[ELT0:%.*]] = extractelement <4 x i16> [[VEC4:%.*]], i64 0
545; VI-NEXT:    [[ELT1:%.*]] = extractelement <4 x i16> [[VEC4]], i64 1
546; VI-NEXT:    [[ELT2:%.*]] = extractelement <4 x i16> [[VEC4]], i64 2
547; VI-NEXT:    [[ELT3:%.*]] = extractelement <4 x i16> [[VEC4]], i64 3
548; VI-NEXT:    [[CMP1:%.*]] = icmp ugt i16 [[ELT1]], [[ELT0]]
549; VI-NEXT:    [[MAX1:%.*]] = select i1 [[CMP1]], i16 [[ELT1]], i16 [[ELT0]]
550; VI-NEXT:    [[CMP2:%.*]] = icmp ugt i16 [[ELT2]], [[MAX1]]
551; VI-NEXT:    [[MAX2:%.*]] = select i1 [[CMP2]], i16 [[ELT2]], i16 [[MAX1]]
552; VI-NEXT:    [[CMP3:%.*]] = icmp ugt i16 [[ELT3]], [[MAX2]]
553; VI-NEXT:    [[MAX3:%.*]] = select i1 [[CMP3]], i16 [[ELT3]], i16 [[MAX2]]
554; VI-NEXT:    ret i16 [[MAX3]]
555;
556entry:
557  %elt0 = extractelement <4 x i16> %vec4, i64 0
558  %elt1 = extractelement <4 x i16> %vec4, i64 1
559  %elt2 = extractelement <4 x i16> %vec4, i64 2
560  %elt3 = extractelement <4 x i16> %vec4, i64 3
561
562  %cmp1 = icmp ugt i16 %elt1, %elt0
563  %max1 = select i1 %cmp1, i16 %elt1, i16 %elt0
564  %cmp2 = icmp ugt i16 %elt2, %max1
565  %max2 = select i1 %cmp2, i16 %elt2, i16 %max1
566  %cmp3 = icmp ugt i16 %elt3, %max2
567  %max3 = select i1 %cmp3, i16 %elt3, i16 %max2
568
569  ret i16 %max3
570}
571
572define i16 @reduction_smax_v4i16(<4 x i16> %vec4) {
573; GFX9-LABEL: @reduction_smax_v4i16(
574; GFX9-NEXT:  entry:
575; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i16> [[VEC4:%.*]], <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
576; GFX9-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i16> [[VEC4]], [[RDX_SHUF]]
577; GFX9-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i16> [[VEC4]], <4 x i16> [[RDX_SHUF]]
578; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i16> [[RDX_MINMAX_SELECT]], <4 x i16> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
579; GFX9-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i16> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
580; GFX9-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i16> [[RDX_MINMAX_SELECT]], <4 x i16> [[RDX_SHUF1]]
581; GFX9-NEXT:    [[TMP0:%.*]] = extractelement <4 x i16> [[RDX_MINMAX_SELECT3]], i32 0
582; GFX9-NEXT:    ret i16 [[TMP0]]
583;
584; VI-LABEL: @reduction_smax_v4i16(
585; VI-NEXT:  entry:
586; VI-NEXT:    [[ELT0:%.*]] = extractelement <4 x i16> [[VEC4:%.*]], i64 0
587; VI-NEXT:    [[ELT1:%.*]] = extractelement <4 x i16> [[VEC4]], i64 1
588; VI-NEXT:    [[ELT2:%.*]] = extractelement <4 x i16> [[VEC4]], i64 2
589; VI-NEXT:    [[ELT3:%.*]] = extractelement <4 x i16> [[VEC4]], i64 3
590; VI-NEXT:    [[CMP1:%.*]] = icmp sgt i16 [[ELT1]], [[ELT0]]
591; VI-NEXT:    [[MAX1:%.*]] = select i1 [[CMP1]], i16 [[ELT1]], i16 [[ELT0]]
592; VI-NEXT:    [[CMP2:%.*]] = icmp sgt i16 [[ELT2]], [[MAX1]]
593; VI-NEXT:    [[MAX2:%.*]] = select i1 [[CMP2]], i16 [[ELT2]], i16 [[MAX1]]
594; VI-NEXT:    [[CMP3:%.*]] = icmp sgt i16 [[ELT3]], [[MAX2]]
595; VI-NEXT:    [[MAX3:%.*]] = select i1 [[CMP3]], i16 [[ELT3]], i16 [[MAX2]]
596; VI-NEXT:    ret i16 [[MAX3]]
597;
598entry:
599  %elt0 = extractelement <4 x i16> %vec4, i64 0
600  %elt1 = extractelement <4 x i16> %vec4, i64 1
601  %elt2 = extractelement <4 x i16> %vec4, i64 2
602  %elt3 = extractelement <4 x i16> %vec4, i64 3
603
604  %cmp1 = icmp sgt i16 %elt1, %elt0
605  %max1 = select i1 %cmp1, i16 %elt1, i16 %elt0
606  %cmp2 = icmp sgt i16 %elt2, %max1
607  %max2 = select i1 %cmp2, i16 %elt2, i16 %max1
608  %cmp3 = icmp sgt i16 %elt3, %max2
609  %max3 = select i1 %cmp3, i16 %elt3, i16 %max2
610
611  ret i16 %max3
612}
613
614; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select
615; with fastmath on the select.
616define half @reduction_fmax_v4half(<4 x half> %vec4) {
617; GCN-LABEL: @reduction_fmax_v4half(
618; GCN-NEXT:  entry:
619; GCN-NEXT:    [[ELT0:%.*]] = extractelement <4 x half> [[VEC4:%.*]], i64 0
620; GCN-NEXT:    [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1
621; GCN-NEXT:    [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2
622; GCN-NEXT:    [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3
623; GCN-NEXT:    [[CMP1:%.*]] = fcmp fast ogt half [[ELT1]], [[ELT0]]
624; GCN-NEXT:    [[MAX1:%.*]] = select i1 [[CMP1]], half [[ELT1]], half [[ELT0]]
625; GCN-NEXT:    [[CMP2:%.*]] = fcmp fast ogt half [[ELT2]], [[MAX1]]
626; GCN-NEXT:    [[MAX2:%.*]] = select i1 [[CMP2]], half [[ELT2]], half [[MAX1]]
627; GCN-NEXT:    [[CMP3:%.*]] = fcmp fast ogt half [[ELT3]], [[MAX2]]
628; GCN-NEXT:    [[MAX3:%.*]] = select i1 [[CMP3]], half [[ELT3]], half [[MAX2]]
629; GCN-NEXT:    ret half [[MAX3]]
630;
631entry:
632  %elt0 = extractelement <4 x half> %vec4, i64 0
633  %elt1 = extractelement <4 x half> %vec4, i64 1
634  %elt2 = extractelement <4 x half> %vec4, i64 2
635  %elt3 = extractelement <4 x half> %vec4, i64 3
636
637  %cmp1 = fcmp fast ogt half %elt1, %elt0
638  %max1 = select i1 %cmp1, half %elt1, half %elt0
639  %cmp2 = fcmp fast ogt half %elt2, %max1
640  %max2 = select i1 %cmp2, half %elt2, half %max1
641  %cmp3 = fcmp fast ogt half %elt3, %max2
642  %max3 = select i1 %cmp3, half %elt3, half %max2
643
644  ret half %max3
645}
646
647; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select
648; with fastmath on the select.
649define half @reduction_fmin_v4half(<4 x half> %vec4) {
650; GCN-LABEL: @reduction_fmin_v4half(
651; GCN-NEXT:  entry:
652; GCN-NEXT:    [[ELT0:%.*]] = extractelement <4 x half> [[VEC4:%.*]], i64 0
653; GCN-NEXT:    [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1
654; GCN-NEXT:    [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2
655; GCN-NEXT:    [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3
656; GCN-NEXT:    [[CMP1:%.*]] = fcmp fast olt half [[ELT1]], [[ELT0]]
657; GCN-NEXT:    [[MIN1:%.*]] = select i1 [[CMP1]], half [[ELT1]], half [[ELT0]]
658; GCN-NEXT:    [[CMP2:%.*]] = fcmp fast olt half [[ELT2]], [[MIN1]]
659; GCN-NEXT:    [[MIN2:%.*]] = select i1 [[CMP2]], half [[ELT2]], half [[MIN1]]
660; GCN-NEXT:    [[CMP3:%.*]] = fcmp fast olt half [[ELT3]], [[MIN2]]
661; GCN-NEXT:    [[MIN3:%.*]] = select i1 [[CMP3]], half [[ELT3]], half [[MIN2]]
662; GCN-NEXT:    ret half [[MIN3]]
663;
664entry:
665  %elt0 = extractelement <4 x half> %vec4, i64 0
666  %elt1 = extractelement <4 x half> %vec4, i64 1
667  %elt2 = extractelement <4 x half> %vec4, i64 2
668  %elt3 = extractelement <4 x half> %vec4, i64 3
669
670  %cmp1 = fcmp fast olt half %elt1, %elt0
671  %min1 = select i1 %cmp1, half %elt1, half %elt0
672  %cmp2 = fcmp fast olt half %elt2, %min1
673  %min2 = select i1 %cmp2, half %elt2, half %min1
674  %cmp3 = fcmp fast olt half %elt3, %min2
675  %min3 = select i1 %cmp3, half %elt3, half %min2
676
677  ret half %min3
678}
679
680; Tests to make sure reduction does not kick in. vega does not support packed math for types larger than 16 bits.
681define float @reduction_v4float(<4 x float> %a) {
682; GCN-LABEL: @reduction_v4float(
683; GCN-NEXT:  entry:
684; GCN-NEXT:    [[ELT0:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
685; GCN-NEXT:    [[ELT1:%.*]] = extractelement <4 x float> [[A]], i64 1
686; GCN-NEXT:    [[ELT2:%.*]] = extractelement <4 x float> [[A]], i64 2
687; GCN-NEXT:    [[ELT3:%.*]] = extractelement <4 x float> [[A]], i64 3
688; GCN-NEXT:    [[ADD1:%.*]] = fadd fast float [[ELT1]], [[ELT0]]
689; GCN-NEXT:    [[ADD2:%.*]] = fadd fast float [[ELT2]], [[ADD1]]
690; GCN-NEXT:    [[ADD3:%.*]] = fadd fast float [[ELT3]], [[ADD2]]
691; GCN-NEXT:    ret float [[ADD3]]
692;
693entry:
694  %elt0 = extractelement <4 x float> %a, i64 0
695  %elt1 = extractelement <4 x float> %a, i64 1
696  %elt2 = extractelement <4 x float> %a, i64 2
697  %elt3 = extractelement <4 x float> %a, i64 3
698
699  %add1 = fadd fast float %elt1, %elt0
700  %add2 = fadd fast float %elt2, %add1
701  %add3 = fadd fast float %elt3, %add2
702
703  ret float %add3
704}
705