1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer -dce < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer -dce < %s | FileCheck -check-prefixes=GCN,VI %s 4 5define half @reduction_half4(<4 x half> %a) { 6; GFX9-LABEL: @reduction_half4( 7; GFX9-NEXT: entry: 8; GFX9-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 9; GFX9-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x half> [[A]], [[RDX_SHUF]] 10; GFX9-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x half> [[BIN_RDX]], <4 x half> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 11; GFX9-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x half> [[BIN_RDX]], [[RDX_SHUF1]] 12; GFX9-NEXT: [[TMP0:%.*]] = extractelement <4 x half> [[BIN_RDX2]], i32 0 13; GFX9-NEXT: ret half [[TMP0]] 14; 15; VI-LABEL: @reduction_half4( 16; VI-NEXT: entry: 17; VI-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 18; VI-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[A]], i64 1 19; VI-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[A]], i64 2 20; VI-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[A]], i64 3 21; VI-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]] 22; VI-NEXT: [[ADD2:%.*]] = fadd fast half [[ELT2]], [[ADD1]] 23; VI-NEXT: [[ADD3:%.*]] = fadd fast half [[ELT3]], [[ADD2]] 24; VI-NEXT: ret half [[ADD3]] 25; 26entry: 27 %elt0 = extractelement <4 x half> %a, i64 0 28 %elt1 = extractelement <4 x half> %a, i64 1 29 %elt2 = extractelement <4 x half> %a, i64 2 30 %elt3 = extractelement <4 x half> %a, i64 3 31 32 %add1 = fadd fast half %elt1, %elt0 33 %add2 = fadd fast half %elt2, %add1 34 %add3 = fadd fast half %elt3, %add2 35 36 ret half %add3 37} 38 39define half @reduction_half8(<8 x half> %vec8) { 40; GFX9-LABEL: @reduction_half8( 41; GFX9-NEXT: entry: 42; GFX9-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x half> [[VEC8:%.*]], <8 x half> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 43; GFX9-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x half> [[VEC8]], [[RDX_SHUF]] 44; GFX9-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x half> [[BIN_RDX]], <8 x half> poison, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 45; GFX9-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x half> [[BIN_RDX]], [[RDX_SHUF1]] 46; GFX9-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x half> [[BIN_RDX2]], <8 x half> poison, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 47; GFX9-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x half> [[BIN_RDX2]], [[RDX_SHUF3]] 48; GFX9-NEXT: [[TMP0:%.*]] = extractelement <8 x half> [[BIN_RDX4]], i32 0 49; GFX9-NEXT: ret half [[TMP0]] 50; 51; VI-LABEL: @reduction_half8( 52; VI-NEXT: entry: 53; VI-NEXT: [[ELT0:%.*]] = extractelement <8 x half> [[VEC8:%.*]], i64 0 54; VI-NEXT: [[ELT1:%.*]] = extractelement <8 x half> [[VEC8]], i64 1 55; VI-NEXT: [[ELT2:%.*]] = extractelement <8 x half> [[VEC8]], i64 2 56; VI-NEXT: [[ELT3:%.*]] = extractelement <8 x half> [[VEC8]], i64 3 57; VI-NEXT: [[ELT4:%.*]] = extractelement <8 x half> [[VEC8]], i64 4 58; VI-NEXT: [[ELT5:%.*]] = extractelement <8 x half> [[VEC8]], i64 5 59; VI-NEXT: [[ELT6:%.*]] = extractelement <8 x half> [[VEC8]], i64 6 60; VI-NEXT: [[ELT7:%.*]] = extractelement <8 x half> [[VEC8]], i64 7 61; VI-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]] 62; VI-NEXT: [[ADD2:%.*]] = fadd fast half [[ELT2]], [[ADD1]] 63; VI-NEXT: [[ADD3:%.*]] = fadd fast half [[ELT3]], [[ADD2]] 64; VI-NEXT: [[ADD4:%.*]] = fadd fast half [[ELT4]], [[ADD3]] 65; VI-NEXT: [[ADD5:%.*]] = fadd fast half [[ELT5]], [[ADD4]] 66; VI-NEXT: [[ADD6:%.*]] = fadd fast half [[ELT6]], [[ADD5]] 67; VI-NEXT: [[ADD7:%.*]] = fadd fast half [[ELT7]], [[ADD6]] 68; VI-NEXT: ret half [[ADD7]] 69; 70entry: 71 %elt0 = extractelement <8 x half> %vec8, i64 0 72 %elt1 = extractelement <8 x half> %vec8, i64 1 73 %elt2 = extractelement <8 x half> %vec8, i64 2 74 %elt3 = extractelement <8 x half> %vec8, i64 3 75 %elt4 = extractelement <8 x half> %vec8, i64 4 76 %elt5 = extractelement <8 x half> %vec8, i64 5 77 %elt6 = extractelement <8 x half> %vec8, i64 6 78 %elt7 = extractelement <8 x half> %vec8, i64 7 79 80 %add1 = fadd fast half %elt1, %elt0 81 %add2 = fadd fast half %elt2, %add1 82 %add3 = fadd fast half %elt3, %add2 83 %add4 = fadd fast half %elt4, %add3 84 %add5 = fadd fast half %elt5, %add4 85 %add6 = fadd fast half %elt6, %add5 86 %add7 = fadd fast half %elt7, %add6 87 88 ret half %add7 89} 90 91define half @reduction_half16(<16 x half> %vec16) { 92; GFX9-LABEL: @reduction_half16( 93; GFX9-NEXT: entry: 94; GFX9-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x half> [[VEC16:%.*]], <16 x half> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 95; GFX9-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x half> [[VEC16]], [[RDX_SHUF]] 96; GFX9-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x half> [[BIN_RDX]], <16 x half> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 97; GFX9-NEXT: [[BIN_RDX2:%.*]] = fadd fast <16 x half> [[BIN_RDX]], [[RDX_SHUF1]] 98; GFX9-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <16 x half> [[BIN_RDX2]], <16 x half> poison, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 99; GFX9-NEXT: [[BIN_RDX4:%.*]] = fadd fast <16 x half> [[BIN_RDX2]], [[RDX_SHUF3]] 100; GFX9-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x half> [[BIN_RDX4]], <16 x half> poison, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 101; GFX9-NEXT: [[BIN_RDX6:%.*]] = fadd fast <16 x half> [[BIN_RDX4]], [[RDX_SHUF5]] 102; GFX9-NEXT: [[TMP0:%.*]] = extractelement <16 x half> [[BIN_RDX6]], i32 0 103; GFX9-NEXT: ret half [[TMP0]] 104; 105; VI-LABEL: @reduction_half16( 106; VI-NEXT: entry: 107; VI-NEXT: [[ELT0:%.*]] = extractelement <16 x half> [[VEC16:%.*]], i64 0 108; VI-NEXT: [[ELT1:%.*]] = extractelement <16 x half> [[VEC16]], i64 1 109; VI-NEXT: [[ELT2:%.*]] = extractelement <16 x half> [[VEC16]], i64 2 110; VI-NEXT: [[ELT3:%.*]] = extractelement <16 x half> [[VEC16]], i64 3 111; VI-NEXT: [[ELT4:%.*]] = extractelement <16 x half> [[VEC16]], i64 4 112; VI-NEXT: [[ELT5:%.*]] = extractelement <16 x half> [[VEC16]], i64 5 113; VI-NEXT: [[ELT6:%.*]] = extractelement <16 x half> [[VEC16]], i64 6 114; VI-NEXT: [[ELT7:%.*]] = extractelement <16 x half> [[VEC16]], i64 7 115; VI-NEXT: [[ELT8:%.*]] = extractelement <16 x half> [[VEC16]], i64 8 116; VI-NEXT: [[ELT9:%.*]] = extractelement <16 x half> [[VEC16]], i64 9 117; VI-NEXT: [[ELT10:%.*]] = extractelement <16 x half> [[VEC16]], i64 10 118; VI-NEXT: [[ELT11:%.*]] = extractelement <16 x half> [[VEC16]], i64 11 119; VI-NEXT: [[ELT12:%.*]] = extractelement <16 x half> [[VEC16]], i64 12 120; VI-NEXT: [[ELT13:%.*]] = extractelement <16 x half> [[VEC16]], i64 13 121; VI-NEXT: [[ELT14:%.*]] = extractelement <16 x half> [[VEC16]], i64 14 122; VI-NEXT: [[ELT15:%.*]] = extractelement <16 x half> [[VEC16]], i64 15 123; VI-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]] 124; VI-NEXT: [[ADD2:%.*]] = fadd fast half [[ELT2]], [[ADD1]] 125; VI-NEXT: [[ADD3:%.*]] = fadd fast half [[ELT3]], [[ADD2]] 126; VI-NEXT: [[ADD4:%.*]] = fadd fast half [[ELT4]], [[ADD3]] 127; VI-NEXT: [[ADD5:%.*]] = fadd fast half [[ELT5]], [[ADD4]] 128; VI-NEXT: [[ADD6:%.*]] = fadd fast half [[ELT6]], [[ADD5]] 129; VI-NEXT: [[ADD7:%.*]] = fadd fast half [[ELT7]], [[ADD6]] 130; VI-NEXT: [[ADD8:%.*]] = fadd fast half [[ELT8]], [[ADD7]] 131; VI-NEXT: [[ADD9:%.*]] = fadd fast half [[ELT9]], [[ADD8]] 132; VI-NEXT: [[ADD10:%.*]] = fadd fast half [[ELT10]], [[ADD9]] 133; VI-NEXT: [[ADD11:%.*]] = fadd fast half [[ELT11]], [[ADD10]] 134; VI-NEXT: [[ADD12:%.*]] = fadd fast half [[ELT12]], [[ADD11]] 135; VI-NEXT: [[ADD13:%.*]] = fadd fast half [[ELT13]], [[ADD12]] 136; VI-NEXT: [[ADD14:%.*]] = fadd fast half [[ELT14]], [[ADD13]] 137; VI-NEXT: [[ADD15:%.*]] = fadd fast half [[ELT15]], [[ADD14]] 138; VI-NEXT: ret half [[ADD15]] 139; 140entry: 141 %elt0 = extractelement <16 x half> %vec16, i64 0 142 %elt1 = extractelement <16 x half> %vec16, i64 1 143 %elt2 = extractelement <16 x half> %vec16, i64 2 144 %elt3 = extractelement <16 x half> %vec16, i64 3 145 %elt4 = extractelement <16 x half> %vec16, i64 4 146 %elt5 = extractelement <16 x half> %vec16, i64 5 147 %elt6 = extractelement <16 x half> %vec16, i64 6 148 %elt7 = extractelement <16 x half> %vec16, i64 7 149 %elt8 = extractelement <16 x half> %vec16, i64 8 150 %elt9 = extractelement <16 x half> %vec16, i64 9 151 %elt10 = extractelement <16 x half> %vec16, i64 10 152 %elt11 = extractelement <16 x half> %vec16, i64 11 153 %elt12 = extractelement <16 x half> %vec16, i64 12 154 %elt13 = extractelement <16 x half> %vec16, i64 13 155 %elt14 = extractelement <16 x half> %vec16, i64 14 156 %elt15 = extractelement <16 x half> %vec16, i64 15 157 158 %add1 = fadd fast half %elt1, %elt0 159 %add2 = fadd fast half %elt2, %add1 160 %add3 = fadd fast half %elt3, %add2 161 %add4 = fadd fast half %elt4, %add3 162 %add5 = fadd fast half %elt5, %add4 163 %add6 = fadd fast half %elt6, %add5 164 %add7 = fadd fast half %elt7, %add6 165 %add8 = fadd fast half %elt8, %add7 166 %add9 = fadd fast half %elt9, %add8 167 %add10 = fadd fast half %elt10, %add9 168 %add11 = fadd fast half %elt11, %add10 169 %add12 = fadd fast half %elt12, %add11 170 %add13 = fadd fast half %elt13, %add12 171 %add14 = fadd fast half %elt14, %add13 172 %add15 = fadd fast half %elt15, %add14 173 174 ret half %add15 175} 176 177; FIXME: support vectorization; 178define half @reduction_sub_half4(<4 x half> %a) { 179; GCN-LABEL: @reduction_sub_half4( 180; GCN-NEXT: entry: 181; GCN-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 182; GCN-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[A]], i64 1 183; GCN-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[A]], i64 2 184; GCN-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[A]], i64 3 185; GCN-NEXT: [[ADD1:%.*]] = fsub fast half [[ELT1]], [[ELT0]] 186; GCN-NEXT: [[ADD2:%.*]] = fsub fast half [[ELT2]], [[ADD1]] 187; GCN-NEXT: [[ADD3:%.*]] = fsub fast half [[ELT3]], [[ADD2]] 188; GCN-NEXT: ret half [[ADD3]] 189; 190entry: 191 %elt0 = extractelement <4 x half> %a, i64 0 192 %elt1 = extractelement <4 x half> %a, i64 1 193 %elt2 = extractelement <4 x half> %a, i64 2 194 %elt3 = extractelement <4 x half> %a, i64 3 195 196 %add1 = fsub fast half %elt1, %elt0 197 %add2 = fsub fast half %elt2, %add1 198 %add3 = fsub fast half %elt3, %add2 199 200 ret half %add3 201} 202 203define i16 @reduction_v4i16(<4 x i16> %a) { 204; GFX9-LABEL: @reduction_v4i16( 205; GFX9-NEXT: entry: 206; GFX9-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i16> [[A:%.*]], <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 207; GFX9-NEXT: [[BIN_RDX:%.*]] = add <4 x i16> [[A]], [[RDX_SHUF]] 208; GFX9-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i16> [[BIN_RDX]], <4 x i16> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 209; GFX9-NEXT: [[BIN_RDX2:%.*]] = add <4 x i16> [[BIN_RDX]], [[RDX_SHUF1]] 210; GFX9-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[BIN_RDX2]], i32 0 211; GFX9-NEXT: ret i16 [[TMP0]] 212; 213; VI-LABEL: @reduction_v4i16( 214; VI-NEXT: entry: 215; VI-NEXT: [[ELT0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0 216; VI-NEXT: [[ELT1:%.*]] = extractelement <4 x i16> [[A]], i64 1 217; VI-NEXT: [[ELT2:%.*]] = extractelement <4 x i16> [[A]], i64 2 218; VI-NEXT: [[ELT3:%.*]] = extractelement <4 x i16> [[A]], i64 3 219; VI-NEXT: [[ADD1:%.*]] = add i16 [[ELT1]], [[ELT0]] 220; VI-NEXT: [[ADD2:%.*]] = add i16 [[ELT2]], [[ADD1]] 221; VI-NEXT: [[ADD3:%.*]] = add i16 [[ELT3]], [[ADD2]] 222; VI-NEXT: ret i16 [[ADD3]] 223; 224entry: 225 %elt0 = extractelement <4 x i16> %a, i64 0 226 %elt1 = extractelement <4 x i16> %a, i64 1 227 %elt2 = extractelement <4 x i16> %a, i64 2 228 %elt3 = extractelement <4 x i16> %a, i64 3 229 230 %add1 = add i16 %elt1, %elt0 231 %add2 = add i16 %elt2, %add1 232 %add3 = add i16 %elt3, %add2 233 234 ret i16 %add3 235} 236 237define i16 @reduction_v8i16(<8 x i16> %vec8) { 238; GFX9-LABEL: @reduction_v8i16( 239; GFX9-NEXT: entry: 240; GFX9-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[VEC8:%.*]], <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 241; GFX9-NEXT: [[BIN_RDX:%.*]] = add <8 x i16> [[VEC8]], [[RDX_SHUF]] 242; GFX9-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i16> [[BIN_RDX]], <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 243; GFX9-NEXT: [[BIN_RDX2:%.*]] = add <8 x i16> [[BIN_RDX]], [[RDX_SHUF1]] 244; GFX9-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i16> [[BIN_RDX2]], <8 x i16> poison, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 245; GFX9-NEXT: [[BIN_RDX4:%.*]] = add <8 x i16> [[BIN_RDX2]], [[RDX_SHUF3]] 246; GFX9-NEXT: [[TMP0:%.*]] = extractelement <8 x i16> [[BIN_RDX4]], i32 0 247; GFX9-NEXT: ret i16 [[TMP0]] 248; 249; VI-LABEL: @reduction_v8i16( 250; VI-NEXT: entry: 251; VI-NEXT: [[ELT0:%.*]] = extractelement <8 x i16> [[VEC8:%.*]], i64 0 252; VI-NEXT: [[ELT1:%.*]] = extractelement <8 x i16> [[VEC8]], i64 1 253; VI-NEXT: [[ELT2:%.*]] = extractelement <8 x i16> [[VEC8]], i64 2 254; VI-NEXT: [[ELT3:%.*]] = extractelement <8 x i16> [[VEC8]], i64 3 255; VI-NEXT: [[ELT4:%.*]] = extractelement <8 x i16> [[VEC8]], i64 4 256; VI-NEXT: [[ELT5:%.*]] = extractelement <8 x i16> [[VEC8]], i64 5 257; VI-NEXT: [[ELT6:%.*]] = extractelement <8 x i16> [[VEC8]], i64 6 258; VI-NEXT: [[ELT7:%.*]] = extractelement <8 x i16> [[VEC8]], i64 7 259; VI-NEXT: [[ADD1:%.*]] = add i16 [[ELT1]], [[ELT0]] 260; VI-NEXT: [[ADD2:%.*]] = add i16 [[ELT2]], [[ADD1]] 261; VI-NEXT: [[ADD3:%.*]] = add i16 [[ELT3]], [[ADD2]] 262; VI-NEXT: [[ADD4:%.*]] = add i16 [[ELT4]], [[ADD3]] 263; VI-NEXT: [[ADD5:%.*]] = add i16 [[ELT5]], [[ADD4]] 264; VI-NEXT: [[ADD6:%.*]] = add i16 [[ELT6]], [[ADD5]] 265; VI-NEXT: [[ADD7:%.*]] = add i16 [[ELT7]], [[ADD6]] 266; VI-NEXT: ret i16 [[ADD7]] 267; 268entry: 269 %elt0 = extractelement <8 x i16> %vec8, i64 0 270 %elt1 = extractelement <8 x i16> %vec8, i64 1 271 %elt2 = extractelement <8 x i16> %vec8, i64 2 272 %elt3 = extractelement <8 x i16> %vec8, i64 3 273 %elt4 = extractelement <8 x i16> %vec8, i64 4 274 %elt5 = extractelement <8 x i16> %vec8, i64 5 275 %elt6 = extractelement <8 x i16> %vec8, i64 6 276 %elt7 = extractelement <8 x i16> %vec8, i64 7 277 278 %add1 = add i16 %elt1, %elt0 279 %add2 = add i16 %elt2, %add1 280 %add3 = add i16 %elt3, %add2 281 %add4 = add i16 %elt4, %add3 282 %add5 = add i16 %elt5, %add4 283 %add6 = add i16 %elt6, %add5 284 %add7 = add i16 %elt7, %add6 285 286 ret i16 %add7 287} 288 289define i16 @reduction_umin_v4i16(<4 x i16> %vec4) { 290; GFX9-LABEL: @reduction_umin_v4i16( 291; GFX9-NEXT: entry: 292; GFX9-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i16> [[VEC4:%.*]], <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 293; GFX9-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <4 x i16> [[VEC4]], [[RDX_SHUF]] 294; GFX9-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i16> [[VEC4]], <4 x i16> [[RDX_SHUF]] 295; GFX9-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i16> [[RDX_MINMAX_SELECT]], <4 x i16> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 296; GFX9-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp ult <4 x i16> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] 297; GFX9-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i16> [[RDX_MINMAX_SELECT]], <4 x i16> [[RDX_SHUF1]] 298; GFX9-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[RDX_MINMAX_SELECT3]], i32 0 299; GFX9-NEXT: ret i16 [[TMP0]] 300; 301; VI-LABEL: @reduction_umin_v4i16( 302; VI-NEXT: entry: 303; VI-NEXT: [[ELT0:%.*]] = extractelement <4 x i16> [[VEC4:%.*]], i64 0 304; VI-NEXT: [[ELT1:%.*]] = extractelement <4 x i16> [[VEC4]], i64 1 305; VI-NEXT: [[ELT2:%.*]] = extractelement <4 x i16> [[VEC4]], i64 2 306; VI-NEXT: [[ELT3:%.*]] = extractelement <4 x i16> [[VEC4]], i64 3 307; VI-NEXT: [[CMP1:%.*]] = icmp ult i16 [[ELT1]], [[ELT0]] 308; VI-NEXT: [[MIN1:%.*]] = select i1 [[CMP1]], i16 [[ELT1]], i16 [[ELT0]] 309; VI-NEXT: [[CMP2:%.*]] = icmp ult i16 [[ELT2]], [[MIN1]] 310; VI-NEXT: [[MIN2:%.*]] = select i1 [[CMP2]], i16 [[ELT2]], i16 [[MIN1]] 311; VI-NEXT: [[CMP3:%.*]] = icmp ult i16 [[ELT3]], [[MIN2]] 312; VI-NEXT: [[MIN3:%.*]] = select i1 [[CMP3]], i16 [[ELT3]], i16 [[MIN2]] 313; VI-NEXT: ret i16 [[MIN3]] 314; 315entry: 316 %elt0 = extractelement <4 x i16> %vec4, i64 0 317 %elt1 = extractelement <4 x i16> %vec4, i64 1 318 %elt2 = extractelement <4 x i16> %vec4, i64 2 319 %elt3 = extractelement <4 x i16> %vec4, i64 3 320 321 %cmp1 = icmp ult i16 %elt1, %elt0 322 %min1 = select i1 %cmp1, i16 %elt1, i16 %elt0 323 %cmp2 = icmp ult i16 %elt2, %min1 324 %min2 = select i1 %cmp2, i16 %elt2, i16 %min1 325 %cmp3 = icmp ult i16 %elt3, %min2 326 %min3 = select i1 %cmp3, i16 %elt3, i16 %min2 327 328 ret i16 %min3 329} 330 331define i16 @reduction_icmp_v8i16(<8 x i16> %vec8) { 332; GFX9-LABEL: @reduction_icmp_v8i16( 333; GFX9-NEXT: entry: 334; GFX9-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[VEC8:%.*]], <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 335; GFX9-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i16> [[VEC8]], [[RDX_SHUF]] 336; GFX9-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i16> [[VEC8]], <8 x i16> [[RDX_SHUF]] 337; GFX9-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i16> [[RDX_MINMAX_SELECT]], <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 338; GFX9-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp ult <8 x i16> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] 339; GFX9-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x i16> [[RDX_MINMAX_SELECT]], <8 x i16> [[RDX_SHUF1]] 340; GFX9-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i16> [[RDX_MINMAX_SELECT3]], <8 x i16> poison, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 341; GFX9-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i16> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] 342; GFX9-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i16> [[RDX_MINMAX_SELECT3]], <8 x i16> [[RDX_SHUF4]] 343; GFX9-NEXT: [[TMP0:%.*]] = extractelement <8 x i16> [[RDX_MINMAX_SELECT6]], i32 0 344; GFX9-NEXT: ret i16 [[TMP0]] 345; 346; VI-LABEL: @reduction_icmp_v8i16( 347; VI-NEXT: entry: 348; VI-NEXT: [[ELT0:%.*]] = extractelement <8 x i16> [[VEC8:%.*]], i64 0 349; VI-NEXT: [[ELT1:%.*]] = extractelement <8 x i16> [[VEC8]], i64 1 350; VI-NEXT: [[ELT2:%.*]] = extractelement <8 x i16> [[VEC8]], i64 2 351; VI-NEXT: [[ELT3:%.*]] = extractelement <8 x i16> [[VEC8]], i64 3 352; VI-NEXT: [[ELT4:%.*]] = extractelement <8 x i16> [[VEC8]], i64 4 353; VI-NEXT: [[ELT5:%.*]] = extractelement <8 x i16> [[VEC8]], i64 5 354; VI-NEXT: [[ELT6:%.*]] = extractelement <8 x i16> [[VEC8]], i64 6 355; VI-NEXT: [[ELT7:%.*]] = extractelement <8 x i16> [[VEC8]], i64 7 356; VI-NEXT: [[CMP0:%.*]] = icmp ult i16 [[ELT1]], [[ELT0]] 357; VI-NEXT: [[MIN1:%.*]] = select i1 [[CMP0]], i16 [[ELT1]], i16 [[ELT0]] 358; VI-NEXT: [[CMP1:%.*]] = icmp ult i16 [[ELT2]], [[MIN1]] 359; VI-NEXT: [[MIN2:%.*]] = select i1 [[CMP1]], i16 [[ELT2]], i16 [[MIN1]] 360; VI-NEXT: [[CMP2:%.*]] = icmp ult i16 [[ELT3]], [[MIN2]] 361; VI-NEXT: [[MIN3:%.*]] = select i1 [[CMP2]], i16 [[ELT3]], i16 [[MIN2]] 362; VI-NEXT: [[CMP3:%.*]] = icmp ult i16 [[ELT4]], [[MIN3]] 363; VI-NEXT: [[MIN4:%.*]] = select i1 [[CMP3]], i16 [[ELT4]], i16 [[MIN3]] 364; VI-NEXT: [[CMP4:%.*]] = icmp ult i16 [[ELT5]], [[MIN4]] 365; VI-NEXT: [[MIN5:%.*]] = select i1 [[CMP4]], i16 [[ELT5]], i16 [[MIN4]] 366; VI-NEXT: [[CMP5:%.*]] = icmp ult i16 [[ELT6]], [[MIN5]] 367; VI-NEXT: [[MIN6:%.*]] = select i1 [[CMP5]], i16 [[ELT6]], i16 [[MIN5]] 368; VI-NEXT: [[CMP6:%.*]] = icmp ult i16 [[ELT7]], [[MIN6]] 369; VI-NEXT: [[MIN7:%.*]] = select i1 [[CMP6]], i16 [[ELT7]], i16 [[MIN6]] 370; VI-NEXT: ret i16 [[MIN7]] 371; 372entry: 373 %elt0 = extractelement <8 x i16> %vec8, i64 0 374 %elt1 = extractelement <8 x i16> %vec8, i64 1 375 %elt2 = extractelement <8 x i16> %vec8, i64 2 376 %elt3 = extractelement <8 x i16> %vec8, i64 3 377 %elt4 = extractelement <8 x i16> %vec8, i64 4 378 %elt5 = extractelement <8 x i16> %vec8, i64 5 379 %elt6 = extractelement <8 x i16> %vec8, i64 6 380 %elt7 = extractelement <8 x i16> %vec8, i64 7 381 382 %cmp0 = icmp ult i16 %elt1, %elt0 383 %min1 = select i1 %cmp0, i16 %elt1, i16 %elt0 384 %cmp1 = icmp ult i16 %elt2, %min1 385 %min2 = select i1 %cmp1, i16 %elt2, i16 %min1 386 %cmp2 = icmp ult i16 %elt3, %min2 387 %min3 = select i1 %cmp2, i16 %elt3, i16 %min2 388 389 %cmp3 = icmp ult i16 %elt4, %min3 390 %min4 = select i1 %cmp3, i16 %elt4, i16 %min3 391 %cmp4 = icmp ult i16 %elt5, %min4 392 %min5 = select i1 %cmp4, i16 %elt5, i16 %min4 393 394 %cmp5 = icmp ult i16 %elt6, %min5 395 %min6 = select i1 %cmp5, i16 %elt6, i16 %min5 396 %cmp6 = icmp ult i16 %elt7, %min6 397 %min7 = select i1 %cmp6, i16 %elt7, i16 %min6 398 399 ret i16 %min7 400} 401 402define i16 @reduction_smin_v16i16(<16 x i16> %vec16) { 403; GFX9-LABEL: @reduction_smin_v16i16( 404; GFX9-NEXT: entry: 405; GFX9-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i16> [[VEC16:%.*]], <16 x i16> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 406; GFX9-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <16 x i16> [[VEC16]], [[RDX_SHUF]] 407; GFX9-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x i16> [[VEC16]], <16 x i16> [[RDX_SHUF]] 408; GFX9-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i16> [[RDX_MINMAX_SELECT]], <16 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 409; GFX9-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp slt <16 x i16> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] 410; GFX9-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x i16> [[RDX_MINMAX_SELECT]], <16 x i16> [[RDX_SHUF1]] 411; GFX9-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x i16> [[RDX_MINMAX_SELECT3]], <16 x i16> poison, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 412; GFX9-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp slt <16 x i16> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] 413; GFX9-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x i16> [[RDX_MINMAX_SELECT3]], <16 x i16> [[RDX_SHUF4]] 414; GFX9-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x i16> [[RDX_MINMAX_SELECT6]], <16 x i16> poison, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 415; GFX9-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp slt <16 x i16> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] 416; GFX9-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x i16> [[RDX_MINMAX_SELECT6]], <16 x i16> [[RDX_SHUF7]] 417; GFX9-NEXT: [[TMP0:%.*]] = extractelement <16 x i16> [[RDX_MINMAX_SELECT9]], i32 0 418; GFX9-NEXT: ret i16 [[TMP0]] 419; 420; VI-LABEL: @reduction_smin_v16i16( 421; VI-NEXT: entry: 422; VI-NEXT: [[ELT0:%.*]] = extractelement <16 x i16> [[VEC16:%.*]], i64 0 423; VI-NEXT: [[ELT1:%.*]] = extractelement <16 x i16> [[VEC16]], i64 1 424; VI-NEXT: [[ELT2:%.*]] = extractelement <16 x i16> [[VEC16]], i64 2 425; VI-NEXT: [[ELT3:%.*]] = extractelement <16 x i16> [[VEC16]], i64 3 426; VI-NEXT: [[ELT4:%.*]] = extractelement <16 x i16> [[VEC16]], i64 4 427; VI-NEXT: [[ELT5:%.*]] = extractelement <16 x i16> [[VEC16]], i64 5 428; VI-NEXT: [[ELT6:%.*]] = extractelement <16 x i16> [[VEC16]], i64 6 429; VI-NEXT: [[ELT7:%.*]] = extractelement <16 x i16> [[VEC16]], i64 7 430; VI-NEXT: [[ELT8:%.*]] = extractelement <16 x i16> [[VEC16]], i64 8 431; VI-NEXT: [[ELT9:%.*]] = extractelement <16 x i16> [[VEC16]], i64 9 432; VI-NEXT: [[ELT10:%.*]] = extractelement <16 x i16> [[VEC16]], i64 10 433; VI-NEXT: [[ELT11:%.*]] = extractelement <16 x i16> [[VEC16]], i64 11 434; VI-NEXT: [[ELT12:%.*]] = extractelement <16 x i16> [[VEC16]], i64 12 435; VI-NEXT: [[ELT13:%.*]] = extractelement <16 x i16> [[VEC16]], i64 13 436; VI-NEXT: [[ELT14:%.*]] = extractelement <16 x i16> [[VEC16]], i64 14 437; VI-NEXT: [[ELT15:%.*]] = extractelement <16 x i16> [[VEC16]], i64 15 438; VI-NEXT: [[CMP0:%.*]] = icmp slt i16 [[ELT1]], [[ELT0]] 439; VI-NEXT: [[MIN1:%.*]] = select i1 [[CMP0]], i16 [[ELT1]], i16 [[ELT0]] 440; VI-NEXT: [[CMP1:%.*]] = icmp slt i16 [[ELT2]], [[MIN1]] 441; VI-NEXT: [[MIN2:%.*]] = select i1 [[CMP1]], i16 [[ELT2]], i16 [[MIN1]] 442; VI-NEXT: [[CMP2:%.*]] = icmp slt i16 [[ELT3]], [[MIN2]] 443; VI-NEXT: [[MIN3:%.*]] = select i1 [[CMP2]], i16 [[ELT3]], i16 [[MIN2]] 444; VI-NEXT: [[CMP3:%.*]] = icmp slt i16 [[ELT4]], [[MIN3]] 445; VI-NEXT: [[MIN4:%.*]] = select i1 [[CMP3]], i16 [[ELT4]], i16 [[MIN3]] 446; VI-NEXT: [[CMP4:%.*]] = icmp slt i16 [[ELT5]], [[MIN4]] 447; VI-NEXT: [[MIN5:%.*]] = select i1 [[CMP4]], i16 [[ELT5]], i16 [[MIN4]] 448; VI-NEXT: [[CMP5:%.*]] = icmp slt i16 [[ELT6]], [[MIN5]] 449; VI-NEXT: [[MIN6:%.*]] = select i1 [[CMP5]], i16 [[ELT6]], i16 [[MIN5]] 450; VI-NEXT: [[CMP6:%.*]] = icmp slt i16 [[ELT7]], [[MIN6]] 451; VI-NEXT: [[MIN7:%.*]] = select i1 [[CMP6]], i16 [[ELT7]], i16 [[MIN6]] 452; VI-NEXT: [[CMP7:%.*]] = icmp slt i16 [[ELT8]], [[MIN7]] 453; VI-NEXT: [[MIN8:%.*]] = select i1 [[CMP7]], i16 [[ELT8]], i16 [[MIN7]] 454; VI-NEXT: [[CMP8:%.*]] = icmp slt i16 [[ELT9]], [[MIN8]] 455; VI-NEXT: [[MIN9:%.*]] = select i1 [[CMP8]], i16 [[ELT9]], i16 [[MIN8]] 456; VI-NEXT: [[CMP9:%.*]] = icmp slt i16 [[ELT10]], [[MIN9]] 457; VI-NEXT: [[MIN10:%.*]] = select i1 [[CMP9]], i16 [[ELT10]], i16 [[MIN9]] 458; VI-NEXT: [[CMP10:%.*]] = icmp slt i16 [[ELT11]], [[MIN10]] 459; VI-NEXT: [[MIN11:%.*]] = select i1 [[CMP10]], i16 [[ELT11]], i16 [[MIN10]] 460; VI-NEXT: [[CMP11:%.*]] = icmp slt i16 [[ELT12]], [[MIN11]] 461; VI-NEXT: [[MIN12:%.*]] = select i1 [[CMP11]], i16 [[ELT12]], i16 [[MIN11]] 462; VI-NEXT: [[CMP12:%.*]] = icmp slt i16 [[ELT13]], [[MIN12]] 463; VI-NEXT: [[MIN13:%.*]] = select i1 [[CMP12]], i16 [[ELT13]], i16 [[MIN12]] 464; VI-NEXT: [[CMP13:%.*]] = icmp slt i16 [[ELT14]], [[MIN13]] 465; VI-NEXT: [[MIN14:%.*]] = select i1 [[CMP13]], i16 [[ELT14]], i16 [[MIN13]] 466; VI-NEXT: [[CMP14:%.*]] = icmp slt i16 [[ELT15]], [[MIN14]] 467; VI-NEXT: [[MIN15:%.*]] = select i1 [[CMP14]], i16 [[ELT15]], i16 [[MIN14]] 468; VI-NEXT: ret i16 [[MIN15]] 469; 470entry: 471 %elt0 = extractelement <16 x i16> %vec16, i64 0 472 %elt1 = extractelement <16 x i16> %vec16, i64 1 473 %elt2 = extractelement <16 x i16> %vec16, i64 2 474 %elt3 = extractelement <16 x i16> %vec16, i64 3 475 %elt4 = extractelement <16 x i16> %vec16, i64 4 476 %elt5 = extractelement <16 x i16> %vec16, i64 5 477 %elt6 = extractelement <16 x i16> %vec16, i64 6 478 %elt7 = extractelement <16 x i16> %vec16, i64 7 479 480 %elt8 = extractelement <16 x i16> %vec16, i64 8 481 %elt9 = extractelement <16 x i16> %vec16, i64 9 482 %elt10 = extractelement <16 x i16> %vec16, i64 10 483 %elt11 = extractelement <16 x i16> %vec16, i64 11 484 %elt12 = extractelement <16 x i16> %vec16, i64 12 485 %elt13 = extractelement <16 x i16> %vec16, i64 13 486 %elt14 = extractelement <16 x i16> %vec16, i64 14 487 %elt15 = extractelement <16 x i16> %vec16, i64 15 488 489 %cmp0 = icmp slt i16 %elt1, %elt0 490 %min1 = select i1 %cmp0, i16 %elt1, i16 %elt0 491 %cmp1 = icmp slt i16 %elt2, %min1 492 %min2 = select i1 %cmp1, i16 %elt2, i16 %min1 493 %cmp2 = icmp slt i16 %elt3, %min2 494 %min3 = select i1 %cmp2, i16 %elt3, i16 %min2 495 496 %cmp3 = icmp slt i16 %elt4, %min3 497 %min4 = select i1 %cmp3, i16 %elt4, i16 %min3 498 %cmp4 = icmp slt i16 %elt5, %min4 499 %min5 = select i1 %cmp4, i16 %elt5, i16 %min4 500 501 %cmp5 = icmp slt i16 %elt6, %min5 502 %min6 = select i1 %cmp5, i16 %elt6, i16 %min5 503 %cmp6 = icmp slt i16 %elt7, %min6 504 %min7 = select i1 %cmp6, i16 %elt7, i16 %min6 505 506 %cmp7 = icmp slt i16 %elt8, %min7 507 %min8 = select i1 %cmp7, i16 %elt8, i16 %min7 508 %cmp8 = icmp slt i16 %elt9, %min8 509 %min9 = select i1 %cmp8, i16 %elt9, i16 %min8 510 511 %cmp9 = icmp slt i16 %elt10, %min9 512 %min10 = select i1 %cmp9, i16 %elt10, i16 %min9 513 %cmp10 = icmp slt i16 %elt11, %min10 514 %min11 = select i1 %cmp10, i16 %elt11, i16 %min10 515 516 %cmp11 = icmp slt i16 %elt12, %min11 517 %min12 = select i1 %cmp11, i16 %elt12, i16 %min11 518 %cmp12 = icmp slt i16 %elt13, %min12 519 %min13 = select i1 %cmp12, i16 %elt13, i16 %min12 520 521 %cmp13 = icmp slt i16 %elt14, %min13 522 %min14 = select i1 %cmp13, i16 %elt14, i16 %min13 523 %cmp14 = icmp slt i16 %elt15, %min14 524 %min15 = select i1 %cmp14, i16 %elt15, i16 %min14 525 526 527 ret i16 %min15 528} 529 530define i16 @reduction_umax_v4i16(<4 x i16> %vec4) { 531; GFX9-LABEL: @reduction_umax_v4i16( 532; GFX9-NEXT: entry: 533; GFX9-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i16> [[VEC4:%.*]], <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 534; GFX9-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ugt <4 x i16> [[VEC4]], [[RDX_SHUF]] 535; GFX9-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i16> [[VEC4]], <4 x i16> [[RDX_SHUF]] 536; GFX9-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i16> [[RDX_MINMAX_SELECT]], <4 x i16> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 537; GFX9-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp ugt <4 x i16> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] 538; GFX9-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i16> [[RDX_MINMAX_SELECT]], <4 x i16> [[RDX_SHUF1]] 539; GFX9-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[RDX_MINMAX_SELECT3]], i32 0 540; GFX9-NEXT: ret i16 [[TMP0]] 541; 542; VI-LABEL: @reduction_umax_v4i16( 543; VI-NEXT: entry: 544; VI-NEXT: [[ELT0:%.*]] = extractelement <4 x i16> [[VEC4:%.*]], i64 0 545; VI-NEXT: [[ELT1:%.*]] = extractelement <4 x i16> [[VEC4]], i64 1 546; VI-NEXT: [[ELT2:%.*]] = extractelement <4 x i16> [[VEC4]], i64 2 547; VI-NEXT: [[ELT3:%.*]] = extractelement <4 x i16> [[VEC4]], i64 3 548; VI-NEXT: [[CMP1:%.*]] = icmp ugt i16 [[ELT1]], [[ELT0]] 549; VI-NEXT: [[MAX1:%.*]] = select i1 [[CMP1]], i16 [[ELT1]], i16 [[ELT0]] 550; VI-NEXT: [[CMP2:%.*]] = icmp ugt i16 [[ELT2]], [[MAX1]] 551; VI-NEXT: [[MAX2:%.*]] = select i1 [[CMP2]], i16 [[ELT2]], i16 [[MAX1]] 552; VI-NEXT: [[CMP3:%.*]] = icmp ugt i16 [[ELT3]], [[MAX2]] 553; VI-NEXT: [[MAX3:%.*]] = select i1 [[CMP3]], i16 [[ELT3]], i16 [[MAX2]] 554; VI-NEXT: ret i16 [[MAX3]] 555; 556entry: 557 %elt0 = extractelement <4 x i16> %vec4, i64 0 558 %elt1 = extractelement <4 x i16> %vec4, i64 1 559 %elt2 = extractelement <4 x i16> %vec4, i64 2 560 %elt3 = extractelement <4 x i16> %vec4, i64 3 561 562 %cmp1 = icmp ugt i16 %elt1, %elt0 563 %max1 = select i1 %cmp1, i16 %elt1, i16 %elt0 564 %cmp2 = icmp ugt i16 %elt2, %max1 565 %max2 = select i1 %cmp2, i16 %elt2, i16 %max1 566 %cmp3 = icmp ugt i16 %elt3, %max2 567 %max3 = select i1 %cmp3, i16 %elt3, i16 %max2 568 569 ret i16 %max3 570} 571 572define i16 @reduction_smax_v4i16(<4 x i16> %vec4) { 573; GFX9-LABEL: @reduction_smax_v4i16( 574; GFX9-NEXT: entry: 575; GFX9-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i16> [[VEC4:%.*]], <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 576; GFX9-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i16> [[VEC4]], [[RDX_SHUF]] 577; GFX9-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i16> [[VEC4]], <4 x i16> [[RDX_SHUF]] 578; GFX9-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i16> [[RDX_MINMAX_SELECT]], <4 x i16> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 579; GFX9-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i16> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] 580; GFX9-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i16> [[RDX_MINMAX_SELECT]], <4 x i16> [[RDX_SHUF1]] 581; GFX9-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[RDX_MINMAX_SELECT3]], i32 0 582; GFX9-NEXT: ret i16 [[TMP0]] 583; 584; VI-LABEL: @reduction_smax_v4i16( 585; VI-NEXT: entry: 586; VI-NEXT: [[ELT0:%.*]] = extractelement <4 x i16> [[VEC4:%.*]], i64 0 587; VI-NEXT: [[ELT1:%.*]] = extractelement <4 x i16> [[VEC4]], i64 1 588; VI-NEXT: [[ELT2:%.*]] = extractelement <4 x i16> [[VEC4]], i64 2 589; VI-NEXT: [[ELT3:%.*]] = extractelement <4 x i16> [[VEC4]], i64 3 590; VI-NEXT: [[CMP1:%.*]] = icmp sgt i16 [[ELT1]], [[ELT0]] 591; VI-NEXT: [[MAX1:%.*]] = select i1 [[CMP1]], i16 [[ELT1]], i16 [[ELT0]] 592; VI-NEXT: [[CMP2:%.*]] = icmp sgt i16 [[ELT2]], [[MAX1]] 593; VI-NEXT: [[MAX2:%.*]] = select i1 [[CMP2]], i16 [[ELT2]], i16 [[MAX1]] 594; VI-NEXT: [[CMP3:%.*]] = icmp sgt i16 [[ELT3]], [[MAX2]] 595; VI-NEXT: [[MAX3:%.*]] = select i1 [[CMP3]], i16 [[ELT3]], i16 [[MAX2]] 596; VI-NEXT: ret i16 [[MAX3]] 597; 598entry: 599 %elt0 = extractelement <4 x i16> %vec4, i64 0 600 %elt1 = extractelement <4 x i16> %vec4, i64 1 601 %elt2 = extractelement <4 x i16> %vec4, i64 2 602 %elt3 = extractelement <4 x i16> %vec4, i64 3 603 604 %cmp1 = icmp sgt i16 %elt1, %elt0 605 %max1 = select i1 %cmp1, i16 %elt1, i16 %elt0 606 %cmp2 = icmp sgt i16 %elt2, %max1 607 %max2 = select i1 %cmp2, i16 %elt2, i16 %max1 608 %cmp3 = icmp sgt i16 %elt3, %max2 609 %max3 = select i1 %cmp3, i16 %elt3, i16 %max2 610 611 ret i16 %max3 612} 613 614; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select 615; with fastmath on the select. 616define half @reduction_fmax_v4half(<4 x half> %vec4) { 617; GCN-LABEL: @reduction_fmax_v4half( 618; GCN-NEXT: entry: 619; GCN-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[VEC4:%.*]], i64 0 620; GCN-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1 621; GCN-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2 622; GCN-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3 623; GCN-NEXT: [[CMP1:%.*]] = fcmp fast ogt half [[ELT1]], [[ELT0]] 624; GCN-NEXT: [[MAX1:%.*]] = select i1 [[CMP1]], half [[ELT1]], half [[ELT0]] 625; GCN-NEXT: [[CMP2:%.*]] = fcmp fast ogt half [[ELT2]], [[MAX1]] 626; GCN-NEXT: [[MAX2:%.*]] = select i1 [[CMP2]], half [[ELT2]], half [[MAX1]] 627; GCN-NEXT: [[CMP3:%.*]] = fcmp fast ogt half [[ELT3]], [[MAX2]] 628; GCN-NEXT: [[MAX3:%.*]] = select i1 [[CMP3]], half [[ELT3]], half [[MAX2]] 629; GCN-NEXT: ret half [[MAX3]] 630; 631entry: 632 %elt0 = extractelement <4 x half> %vec4, i64 0 633 %elt1 = extractelement <4 x half> %vec4, i64 1 634 %elt2 = extractelement <4 x half> %vec4, i64 2 635 %elt3 = extractelement <4 x half> %vec4, i64 3 636 637 %cmp1 = fcmp fast ogt half %elt1, %elt0 638 %max1 = select i1 %cmp1, half %elt1, half %elt0 639 %cmp2 = fcmp fast ogt half %elt2, %max1 640 %max2 = select i1 %cmp2, half %elt2, half %max1 641 %cmp3 = fcmp fast ogt half %elt3, %max2 642 %max3 = select i1 %cmp3, half %elt3, half %max2 643 644 ret half %max3 645} 646 647; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select 648; with fastmath on the select. 649define half @reduction_fmin_v4half(<4 x half> %vec4) { 650; GCN-LABEL: @reduction_fmin_v4half( 651; GCN-NEXT: entry: 652; GCN-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[VEC4:%.*]], i64 0 653; GCN-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1 654; GCN-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2 655; GCN-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3 656; GCN-NEXT: [[CMP1:%.*]] = fcmp fast olt half [[ELT1]], [[ELT0]] 657; GCN-NEXT: [[MIN1:%.*]] = select i1 [[CMP1]], half [[ELT1]], half [[ELT0]] 658; GCN-NEXT: [[CMP2:%.*]] = fcmp fast olt half [[ELT2]], [[MIN1]] 659; GCN-NEXT: [[MIN2:%.*]] = select i1 [[CMP2]], half [[ELT2]], half [[MIN1]] 660; GCN-NEXT: [[CMP3:%.*]] = fcmp fast olt half [[ELT3]], [[MIN2]] 661; GCN-NEXT: [[MIN3:%.*]] = select i1 [[CMP3]], half [[ELT3]], half [[MIN2]] 662; GCN-NEXT: ret half [[MIN3]] 663; 664entry: 665 %elt0 = extractelement <4 x half> %vec4, i64 0 666 %elt1 = extractelement <4 x half> %vec4, i64 1 667 %elt2 = extractelement <4 x half> %vec4, i64 2 668 %elt3 = extractelement <4 x half> %vec4, i64 3 669 670 %cmp1 = fcmp fast olt half %elt1, %elt0 671 %min1 = select i1 %cmp1, half %elt1, half %elt0 672 %cmp2 = fcmp fast olt half %elt2, %min1 673 %min2 = select i1 %cmp2, half %elt2, half %min1 674 %cmp3 = fcmp fast olt half %elt3, %min2 675 %min3 = select i1 %cmp3, half %elt3, half %min2 676 677 ret half %min3 678} 679 680; Tests to make sure reduction does not kick in. vega does not support packed math for types larger than 16 bits. 681define float @reduction_v4float(<4 x float> %a) { 682; GCN-LABEL: @reduction_v4float( 683; GCN-NEXT: entry: 684; GCN-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 685; GCN-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[A]], i64 1 686; GCN-NEXT: [[ELT2:%.*]] = extractelement <4 x float> [[A]], i64 2 687; GCN-NEXT: [[ELT3:%.*]] = extractelement <4 x float> [[A]], i64 3 688; GCN-NEXT: [[ADD1:%.*]] = fadd fast float [[ELT1]], [[ELT0]] 689; GCN-NEXT: [[ADD2:%.*]] = fadd fast float [[ELT2]], [[ADD1]] 690; GCN-NEXT: [[ADD3:%.*]] = fadd fast float [[ELT3]], [[ADD2]] 691; GCN-NEXT: ret float [[ADD3]] 692; 693entry: 694 %elt0 = extractelement <4 x float> %a, i64 0 695 %elt1 = extractelement <4 x float> %a, i64 1 696 %elt2 = extractelement <4 x float> %a, i64 2 697 %elt3 = extractelement <4 x float> %a, i64 3 698 699 %add1 = fadd fast float %elt1, %elt0 700 %add2 = fadd fast float %elt2, %add1 701 %add3 = fadd fast float %elt3, %add2 702 703 ret float %add3 704} 705