1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer -dce < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer -dce < %s | FileCheck -check-prefixes=GCN,VI %s 4 5define half @reduction_half4(<4 x half> %a) { 6; GFX9-LABEL: @reduction_half4( 7; GFX9-NEXT: entry: 8; GFX9-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH8000, <4 x half> [[A:%.*]]) 9; GFX9-NEXT: ret half [[TMP0]] 10; 11; VI-LABEL: @reduction_half4( 12; VI-NEXT: entry: 13; VI-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 14; VI-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[A]], i64 1 15; VI-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[A]], i64 2 16; VI-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[A]], i64 3 17; VI-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]] 18; VI-NEXT: [[ADD2:%.*]] = fadd fast half [[ELT2]], [[ADD1]] 19; VI-NEXT: [[ADD3:%.*]] = fadd fast half [[ELT3]], [[ADD2]] 20; VI-NEXT: ret half [[ADD3]] 21; 22entry: 23 %elt0 = extractelement <4 x half> %a, i64 0 24 %elt1 = extractelement <4 x half> %a, i64 1 25 %elt2 = extractelement <4 x half> %a, i64 2 26 %elt3 = extractelement <4 x half> %a, i64 3 27 28 %add1 = fadd fast half %elt1, %elt0 29 %add2 = fadd fast half %elt2, %add1 30 %add3 = fadd fast half %elt3, %add2 31 32 ret half %add3 33} 34 35define half @reduction_half8(<8 x half> %vec8) { 36; GFX9-LABEL: @reduction_half8( 37; GFX9-NEXT: entry: 38; GFX9-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[VEC8:%.*]]) 39; GFX9-NEXT: ret half [[TMP0]] 40; 41; VI-LABEL: @reduction_half8( 42; VI-NEXT: entry: 43; VI-NEXT: [[ELT0:%.*]] = extractelement <8 x half> [[VEC8:%.*]], i64 0 44; VI-NEXT: [[ELT1:%.*]] = extractelement <8 x half> [[VEC8]], i64 1 45; VI-NEXT: [[ELT2:%.*]] = extractelement <8 x half> [[VEC8]], i64 2 46; VI-NEXT: [[ELT3:%.*]] = extractelement <8 x half> [[VEC8]], i64 3 47; VI-NEXT: [[ELT4:%.*]] = extractelement <8 x half> [[VEC8]], i64 4 48; VI-NEXT: [[ELT5:%.*]] = extractelement <8 x half> [[VEC8]], i64 5 49; VI-NEXT: [[ELT6:%.*]] = extractelement <8 x half> [[VEC8]], i64 6 50; VI-NEXT: [[ELT7:%.*]] = extractelement <8 x half> [[VEC8]], i64 7 51; VI-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]] 52; VI-NEXT: [[ADD2:%.*]] = fadd fast half [[ELT2]], [[ADD1]] 53; VI-NEXT: [[ADD3:%.*]] = fadd fast half [[ELT3]], [[ADD2]] 54; VI-NEXT: [[ADD4:%.*]] = fadd fast half [[ELT4]], [[ADD3]] 55; VI-NEXT: [[ADD5:%.*]] = fadd fast half [[ELT5]], [[ADD4]] 56; VI-NEXT: [[ADD6:%.*]] = fadd fast half [[ELT6]], [[ADD5]] 57; VI-NEXT: [[ADD7:%.*]] = fadd fast half [[ELT7]], [[ADD6]] 58; VI-NEXT: ret half [[ADD7]] 59; 60entry: 61 %elt0 = extractelement <8 x half> %vec8, i64 0 62 %elt1 = extractelement <8 x half> %vec8, i64 1 63 %elt2 = extractelement <8 x half> %vec8, i64 2 64 %elt3 = extractelement <8 x half> %vec8, i64 3 65 %elt4 = extractelement <8 x half> %vec8, i64 4 66 %elt5 = extractelement <8 x half> %vec8, i64 5 67 %elt6 = extractelement <8 x half> %vec8, i64 6 68 %elt7 = extractelement <8 x half> %vec8, i64 7 69 70 %add1 = fadd fast half %elt1, %elt0 71 %add2 = fadd fast half %elt2, %add1 72 %add3 = fadd fast half %elt3, %add2 73 %add4 = fadd fast half %elt4, %add3 74 %add5 = fadd fast half %elt5, %add4 75 %add6 = fadd fast half %elt6, %add5 76 %add7 = fadd fast half %elt7, %add6 77 78 ret half %add7 79} 80 81define half @reduction_half16(<16 x half> %vec16) { 82; GFX9-LABEL: @reduction_half16( 83; GFX9-NEXT: entry: 84; GFX9-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> [[VEC16:%.*]]) 85; GFX9-NEXT: ret half [[TMP0]] 86; 87; VI-LABEL: @reduction_half16( 88; VI-NEXT: entry: 89; VI-NEXT: [[ELT0:%.*]] = extractelement <16 x half> [[VEC16:%.*]], i64 0 90; VI-NEXT: [[ELT1:%.*]] = extractelement <16 x half> [[VEC16]], i64 1 91; VI-NEXT: [[ELT2:%.*]] = extractelement <16 x half> [[VEC16]], i64 2 92; VI-NEXT: [[ELT3:%.*]] = extractelement <16 x half> [[VEC16]], i64 3 93; VI-NEXT: [[ELT4:%.*]] = extractelement <16 x half> [[VEC16]], i64 4 94; VI-NEXT: [[ELT5:%.*]] = extractelement <16 x half> [[VEC16]], i64 5 95; VI-NEXT: [[ELT6:%.*]] = extractelement <16 x half> [[VEC16]], i64 6 96; VI-NEXT: [[ELT7:%.*]] = extractelement <16 x half> [[VEC16]], i64 7 97; VI-NEXT: [[ELT8:%.*]] = extractelement <16 x half> [[VEC16]], i64 8 98; VI-NEXT: [[ELT9:%.*]] = extractelement <16 x half> [[VEC16]], i64 9 99; VI-NEXT: [[ELT10:%.*]] = extractelement <16 x half> [[VEC16]], i64 10 100; VI-NEXT: [[ELT11:%.*]] = extractelement <16 x half> [[VEC16]], i64 11 101; VI-NEXT: [[ELT12:%.*]] = extractelement <16 x half> [[VEC16]], i64 12 102; VI-NEXT: [[ELT13:%.*]] = extractelement <16 x half> [[VEC16]], i64 13 103; VI-NEXT: [[ELT14:%.*]] = extractelement <16 x half> [[VEC16]], i64 14 104; VI-NEXT: [[ELT15:%.*]] = extractelement <16 x half> [[VEC16]], i64 15 105; VI-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]] 106; VI-NEXT: [[ADD2:%.*]] = fadd fast half [[ELT2]], [[ADD1]] 107; VI-NEXT: [[ADD3:%.*]] = fadd fast half [[ELT3]], [[ADD2]] 108; VI-NEXT: [[ADD4:%.*]] = fadd fast half [[ELT4]], [[ADD3]] 109; VI-NEXT: [[ADD5:%.*]] = fadd fast half [[ELT5]], [[ADD4]] 110; VI-NEXT: [[ADD6:%.*]] = fadd fast half [[ELT6]], [[ADD5]] 111; VI-NEXT: [[ADD7:%.*]] = fadd fast half [[ELT7]], [[ADD6]] 112; VI-NEXT: [[ADD8:%.*]] = fadd fast half [[ELT8]], [[ADD7]] 113; VI-NEXT: [[ADD9:%.*]] = fadd fast half [[ELT9]], [[ADD8]] 114; VI-NEXT: [[ADD10:%.*]] = fadd fast half [[ELT10]], [[ADD9]] 115; VI-NEXT: [[ADD11:%.*]] = fadd fast half [[ELT11]], [[ADD10]] 116; VI-NEXT: [[ADD12:%.*]] = fadd fast half [[ELT12]], [[ADD11]] 117; VI-NEXT: [[ADD13:%.*]] = fadd fast half [[ELT13]], [[ADD12]] 118; VI-NEXT: [[ADD14:%.*]] = fadd fast half [[ELT14]], [[ADD13]] 119; VI-NEXT: [[ADD15:%.*]] = fadd fast half [[ELT15]], [[ADD14]] 120; VI-NEXT: ret half [[ADD15]] 121; 122entry: 123 %elt0 = extractelement <16 x half> %vec16, i64 0 124 %elt1 = extractelement <16 x half> %vec16, i64 1 125 %elt2 = extractelement <16 x half> %vec16, i64 2 126 %elt3 = extractelement <16 x half> %vec16, i64 3 127 %elt4 = extractelement <16 x half> %vec16, i64 4 128 %elt5 = extractelement <16 x half> %vec16, i64 5 129 %elt6 = extractelement <16 x half> %vec16, i64 6 130 %elt7 = extractelement <16 x half> %vec16, i64 7 131 %elt8 = extractelement <16 x half> %vec16, i64 8 132 %elt9 = extractelement <16 x half> %vec16, i64 9 133 %elt10 = extractelement <16 x half> %vec16, i64 10 134 %elt11 = extractelement <16 x half> %vec16, i64 11 135 %elt12 = extractelement <16 x half> %vec16, i64 12 136 %elt13 = extractelement <16 x half> %vec16, i64 13 137 %elt14 = extractelement <16 x half> %vec16, i64 14 138 %elt15 = extractelement <16 x half> %vec16, i64 15 139 140 %add1 = fadd fast half %elt1, %elt0 141 %add2 = fadd fast half %elt2, %add1 142 %add3 = fadd fast half %elt3, %add2 143 %add4 = fadd fast half %elt4, %add3 144 %add5 = fadd fast half %elt5, %add4 145 %add6 = fadd fast half %elt6, %add5 146 %add7 = fadd fast half %elt7, %add6 147 %add8 = fadd fast half %elt8, %add7 148 %add9 = fadd fast half %elt9, %add8 149 %add10 = fadd fast half %elt10, %add9 150 %add11 = fadd fast half %elt11, %add10 151 %add12 = fadd fast half %elt12, %add11 152 %add13 = fadd fast half %elt13, %add12 153 %add14 = fadd fast half %elt14, %add13 154 %add15 = fadd fast half %elt15, %add14 155 156 ret half %add15 157} 158 159; FIXME: support vectorization; 160define half @reduction_sub_half4(<4 x half> %a) { 161; GCN-LABEL: @reduction_sub_half4( 162; GCN-NEXT: entry: 163; GCN-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 164; GCN-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[A]], i64 1 165; GCN-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[A]], i64 2 166; GCN-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[A]], i64 3 167; GCN-NEXT: [[ADD1:%.*]] = fsub fast half [[ELT1]], [[ELT0]] 168; GCN-NEXT: [[ADD2:%.*]] = fsub fast half [[ELT2]], [[ADD1]] 169; GCN-NEXT: [[ADD3:%.*]] = fsub fast half [[ELT3]], [[ADD2]] 170; GCN-NEXT: ret half [[ADD3]] 171; 172entry: 173 %elt0 = extractelement <4 x half> %a, i64 0 174 %elt1 = extractelement <4 x half> %a, i64 1 175 %elt2 = extractelement <4 x half> %a, i64 2 176 %elt3 = extractelement <4 x half> %a, i64 3 177 178 %add1 = fsub fast half %elt1, %elt0 179 %add2 = fsub fast half %elt2, %add1 180 %add3 = fsub fast half %elt3, %add2 181 182 ret half %add3 183} 184 185define i16 @reduction_v4i16(<4 x i16> %a) { 186; GFX9-LABEL: @reduction_v4i16( 187; GFX9-NEXT: entry: 188; GFX9-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[A:%.*]]) 189; GFX9-NEXT: ret i16 [[TMP0]] 190; 191; VI-LABEL: @reduction_v4i16( 192; VI-NEXT: entry: 193; VI-NEXT: [[ELT0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0 194; VI-NEXT: [[ELT1:%.*]] = extractelement <4 x i16> [[A]], i64 1 195; VI-NEXT: [[ELT2:%.*]] = extractelement <4 x i16> [[A]], i64 2 196; VI-NEXT: [[ELT3:%.*]] = extractelement <4 x i16> [[A]], i64 3 197; VI-NEXT: [[ADD1:%.*]] = add i16 [[ELT1]], [[ELT0]] 198; VI-NEXT: [[ADD2:%.*]] = add i16 [[ELT2]], [[ADD1]] 199; VI-NEXT: [[ADD3:%.*]] = add i16 [[ELT3]], [[ADD2]] 200; VI-NEXT: ret i16 [[ADD3]] 201; 202entry: 203 %elt0 = extractelement <4 x i16> %a, i64 0 204 %elt1 = extractelement <4 x i16> %a, i64 1 205 %elt2 = extractelement <4 x i16> %a, i64 2 206 %elt3 = extractelement <4 x i16> %a, i64 3 207 208 %add1 = add i16 %elt1, %elt0 209 %add2 = add i16 %elt2, %add1 210 %add3 = add i16 %elt3, %add2 211 212 ret i16 %add3 213} 214 215define i16 @reduction_v8i16(<8 x i16> %vec8) { 216; GFX9-LABEL: @reduction_v8i16( 217; GFX9-NEXT: entry: 218; GFX9-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[VEC8:%.*]]) 219; GFX9-NEXT: ret i16 [[TMP0]] 220; 221; VI-LABEL: @reduction_v8i16( 222; VI-NEXT: entry: 223; VI-NEXT: [[ELT0:%.*]] = extractelement <8 x i16> [[VEC8:%.*]], i64 0 224; VI-NEXT: [[ELT1:%.*]] = extractelement <8 x i16> [[VEC8]], i64 1 225; VI-NEXT: [[ELT2:%.*]] = extractelement <8 x i16> [[VEC8]], i64 2 226; VI-NEXT: [[ELT3:%.*]] = extractelement <8 x i16> [[VEC8]], i64 3 227; VI-NEXT: [[ELT4:%.*]] = extractelement <8 x i16> [[VEC8]], i64 4 228; VI-NEXT: [[ELT5:%.*]] = extractelement <8 x i16> [[VEC8]], i64 5 229; VI-NEXT: [[ELT6:%.*]] = extractelement <8 x i16> [[VEC8]], i64 6 230; VI-NEXT: [[ELT7:%.*]] = extractelement <8 x i16> [[VEC8]], i64 7 231; VI-NEXT: [[ADD1:%.*]] = add i16 [[ELT1]], [[ELT0]] 232; VI-NEXT: [[ADD2:%.*]] = add i16 [[ELT2]], [[ADD1]] 233; VI-NEXT: [[ADD3:%.*]] = add i16 [[ELT3]], [[ADD2]] 234; VI-NEXT: [[ADD4:%.*]] = add i16 [[ELT4]], [[ADD3]] 235; VI-NEXT: [[ADD5:%.*]] = add i16 [[ELT5]], [[ADD4]] 236; VI-NEXT: [[ADD6:%.*]] = add i16 [[ELT6]], [[ADD5]] 237; VI-NEXT: [[ADD7:%.*]] = add i16 [[ELT7]], [[ADD6]] 238; VI-NEXT: ret i16 [[ADD7]] 239; 240entry: 241 %elt0 = extractelement <8 x i16> %vec8, i64 0 242 %elt1 = extractelement <8 x i16> %vec8, i64 1 243 %elt2 = extractelement <8 x i16> %vec8, i64 2 244 %elt3 = extractelement <8 x i16> %vec8, i64 3 245 %elt4 = extractelement <8 x i16> %vec8, i64 4 246 %elt5 = extractelement <8 x i16> %vec8, i64 5 247 %elt6 = extractelement <8 x i16> %vec8, i64 6 248 %elt7 = extractelement <8 x i16> %vec8, i64 7 249 250 %add1 = add i16 %elt1, %elt0 251 %add2 = add i16 %elt2, %add1 252 %add3 = add i16 %elt3, %add2 253 %add4 = add i16 %elt4, %add3 254 %add5 = add i16 %elt5, %add4 255 %add6 = add i16 %elt6, %add5 256 %add7 = add i16 %elt7, %add6 257 258 ret i16 %add7 259} 260 261define i16 @reduction_umin_v4i16(<4 x i16> %vec4) { 262; GFX9-LABEL: @reduction_umin_v4i16( 263; GFX9-NEXT: entry: 264; GFX9-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> [[VEC4:%.*]]) 265; GFX9-NEXT: ret i16 [[TMP0]] 266; 267; VI-LABEL: @reduction_umin_v4i16( 268; VI-NEXT: entry: 269; VI-NEXT: [[ELT0:%.*]] = extractelement <4 x i16> [[VEC4:%.*]], i64 0 270; VI-NEXT: [[ELT1:%.*]] = extractelement <4 x i16> [[VEC4]], i64 1 271; VI-NEXT: [[ELT2:%.*]] = extractelement <4 x i16> [[VEC4]], i64 2 272; VI-NEXT: [[ELT3:%.*]] = extractelement <4 x i16> [[VEC4]], i64 3 273; VI-NEXT: [[CMP1:%.*]] = icmp ult i16 [[ELT1]], [[ELT0]] 274; VI-NEXT: [[MIN1:%.*]] = select i1 [[CMP1]], i16 [[ELT1]], i16 [[ELT0]] 275; VI-NEXT: [[CMP2:%.*]] = icmp ult i16 [[ELT2]], [[MIN1]] 276; VI-NEXT: [[MIN2:%.*]] = select i1 [[CMP2]], i16 [[ELT2]], i16 [[MIN1]] 277; VI-NEXT: [[CMP3:%.*]] = icmp ult i16 [[ELT3]], [[MIN2]] 278; VI-NEXT: [[MIN3:%.*]] = select i1 [[CMP3]], i16 [[ELT3]], i16 [[MIN2]] 279; VI-NEXT: ret i16 [[MIN3]] 280; 281entry: 282 %elt0 = extractelement <4 x i16> %vec4, i64 0 283 %elt1 = extractelement <4 x i16> %vec4, i64 1 284 %elt2 = extractelement <4 x i16> %vec4, i64 2 285 %elt3 = extractelement <4 x i16> %vec4, i64 3 286 287 %cmp1 = icmp ult i16 %elt1, %elt0 288 %min1 = select i1 %cmp1, i16 %elt1, i16 %elt0 289 %cmp2 = icmp ult i16 %elt2, %min1 290 %min2 = select i1 %cmp2, i16 %elt2, i16 %min1 291 %cmp3 = icmp ult i16 %elt3, %min2 292 %min3 = select i1 %cmp3, i16 %elt3, i16 %min2 293 294 ret i16 %min3 295} 296 297define i16 @reduction_icmp_v8i16(<8 x i16> %vec8) { 298; GFX9-LABEL: @reduction_icmp_v8i16( 299; GFX9-NEXT: entry: 300; GFX9-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> [[VEC8:%.*]]) 301; GFX9-NEXT: ret i16 [[TMP0]] 302; 303; VI-LABEL: @reduction_icmp_v8i16( 304; VI-NEXT: entry: 305; VI-NEXT: [[ELT0:%.*]] = extractelement <8 x i16> [[VEC8:%.*]], i64 0 306; VI-NEXT: [[ELT1:%.*]] = extractelement <8 x i16> [[VEC8]], i64 1 307; VI-NEXT: [[ELT2:%.*]] = extractelement <8 x i16> [[VEC8]], i64 2 308; VI-NEXT: [[ELT3:%.*]] = extractelement <8 x i16> [[VEC8]], i64 3 309; VI-NEXT: [[ELT4:%.*]] = extractelement <8 x i16> [[VEC8]], i64 4 310; VI-NEXT: [[ELT5:%.*]] = extractelement <8 x i16> [[VEC8]], i64 5 311; VI-NEXT: [[ELT6:%.*]] = extractelement <8 x i16> [[VEC8]], i64 6 312; VI-NEXT: [[ELT7:%.*]] = extractelement <8 x i16> [[VEC8]], i64 7 313; VI-NEXT: [[CMP0:%.*]] = icmp ult i16 [[ELT1]], [[ELT0]] 314; VI-NEXT: [[MIN1:%.*]] = select i1 [[CMP0]], i16 [[ELT1]], i16 [[ELT0]] 315; VI-NEXT: [[CMP1:%.*]] = icmp ult i16 [[ELT2]], [[MIN1]] 316; VI-NEXT: [[MIN2:%.*]] = select i1 [[CMP1]], i16 [[ELT2]], i16 [[MIN1]] 317; VI-NEXT: [[CMP2:%.*]] = icmp ult i16 [[ELT3]], [[MIN2]] 318; VI-NEXT: [[MIN3:%.*]] = select i1 [[CMP2]], i16 [[ELT3]], i16 [[MIN2]] 319; VI-NEXT: [[CMP3:%.*]] = icmp ult i16 [[ELT4]], [[MIN3]] 320; VI-NEXT: [[MIN4:%.*]] = select i1 [[CMP3]], i16 [[ELT4]], i16 [[MIN3]] 321; VI-NEXT: [[CMP4:%.*]] = icmp ult i16 [[ELT5]], [[MIN4]] 322; VI-NEXT: [[MIN5:%.*]] = select i1 [[CMP4]], i16 [[ELT5]], i16 [[MIN4]] 323; VI-NEXT: [[CMP5:%.*]] = icmp ult i16 [[ELT6]], [[MIN5]] 324; VI-NEXT: [[MIN6:%.*]] = select i1 [[CMP5]], i16 [[ELT6]], i16 [[MIN5]] 325; VI-NEXT: [[CMP6:%.*]] = icmp ult i16 [[ELT7]], [[MIN6]] 326; VI-NEXT: [[MIN7:%.*]] = select i1 [[CMP6]], i16 [[ELT7]], i16 [[MIN6]] 327; VI-NEXT: ret i16 [[MIN7]] 328; 329entry: 330 %elt0 = extractelement <8 x i16> %vec8, i64 0 331 %elt1 = extractelement <8 x i16> %vec8, i64 1 332 %elt2 = extractelement <8 x i16> %vec8, i64 2 333 %elt3 = extractelement <8 x i16> %vec8, i64 3 334 %elt4 = extractelement <8 x i16> %vec8, i64 4 335 %elt5 = extractelement <8 x i16> %vec8, i64 5 336 %elt6 = extractelement <8 x i16> %vec8, i64 6 337 %elt7 = extractelement <8 x i16> %vec8, i64 7 338 339 %cmp0 = icmp ult i16 %elt1, %elt0 340 %min1 = select i1 %cmp0, i16 %elt1, i16 %elt0 341 %cmp1 = icmp ult i16 %elt2, %min1 342 %min2 = select i1 %cmp1, i16 %elt2, i16 %min1 343 %cmp2 = icmp ult i16 %elt3, %min2 344 %min3 = select i1 %cmp2, i16 %elt3, i16 %min2 345 346 %cmp3 = icmp ult i16 %elt4, %min3 347 %min4 = select i1 %cmp3, i16 %elt4, i16 %min3 348 %cmp4 = icmp ult i16 %elt5, %min4 349 %min5 = select i1 %cmp4, i16 %elt5, i16 %min4 350 351 %cmp5 = icmp ult i16 %elt6, %min5 352 %min6 = select i1 %cmp5, i16 %elt6, i16 %min5 353 %cmp6 = icmp ult i16 %elt7, %min6 354 %min7 = select i1 %cmp6, i16 %elt7, i16 %min6 355 356 ret i16 %min7 357} 358 359define i16 @reduction_smin_v16i16(<16 x i16> %vec16) { 360; GFX9-LABEL: @reduction_smin_v16i16( 361; GFX9-NEXT: entry: 362; GFX9-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> [[VEC16:%.*]]) 363; GFX9-NEXT: ret i16 [[TMP0]] 364; 365; VI-LABEL: @reduction_smin_v16i16( 366; VI-NEXT: entry: 367; VI-NEXT: [[ELT0:%.*]] = extractelement <16 x i16> [[VEC16:%.*]], i64 0 368; VI-NEXT: [[ELT1:%.*]] = extractelement <16 x i16> [[VEC16]], i64 1 369; VI-NEXT: [[ELT2:%.*]] = extractelement <16 x i16> [[VEC16]], i64 2 370; VI-NEXT: [[ELT3:%.*]] = extractelement <16 x i16> [[VEC16]], i64 3 371; VI-NEXT: [[ELT4:%.*]] = extractelement <16 x i16> [[VEC16]], i64 4 372; VI-NEXT: [[ELT5:%.*]] = extractelement <16 x i16> [[VEC16]], i64 5 373; VI-NEXT: [[ELT6:%.*]] = extractelement <16 x i16> [[VEC16]], i64 6 374; VI-NEXT: [[ELT7:%.*]] = extractelement <16 x i16> [[VEC16]], i64 7 375; VI-NEXT: [[ELT8:%.*]] = extractelement <16 x i16> [[VEC16]], i64 8 376; VI-NEXT: [[ELT9:%.*]] = extractelement <16 x i16> [[VEC16]], i64 9 377; VI-NEXT: [[ELT10:%.*]] = extractelement <16 x i16> [[VEC16]], i64 10 378; VI-NEXT: [[ELT11:%.*]] = extractelement <16 x i16> [[VEC16]], i64 11 379; VI-NEXT: [[ELT12:%.*]] = extractelement <16 x i16> [[VEC16]], i64 12 380; VI-NEXT: [[ELT13:%.*]] = extractelement <16 x i16> [[VEC16]], i64 13 381; VI-NEXT: [[ELT14:%.*]] = extractelement <16 x i16> [[VEC16]], i64 14 382; VI-NEXT: [[ELT15:%.*]] = extractelement <16 x i16> [[VEC16]], i64 15 383; VI-NEXT: [[CMP0:%.*]] = icmp slt i16 [[ELT1]], [[ELT0]] 384; VI-NEXT: [[MIN1:%.*]] = select i1 [[CMP0]], i16 [[ELT1]], i16 [[ELT0]] 385; VI-NEXT: [[CMP1:%.*]] = icmp slt i16 [[ELT2]], [[MIN1]] 386; VI-NEXT: [[MIN2:%.*]] = select i1 [[CMP1]], i16 [[ELT2]], i16 [[MIN1]] 387; VI-NEXT: [[CMP2:%.*]] = icmp slt i16 [[ELT3]], [[MIN2]] 388; VI-NEXT: [[MIN3:%.*]] = select i1 [[CMP2]], i16 [[ELT3]], i16 [[MIN2]] 389; VI-NEXT: [[CMP3:%.*]] = icmp slt i16 [[ELT4]], [[MIN3]] 390; VI-NEXT: [[MIN4:%.*]] = select i1 [[CMP3]], i16 [[ELT4]], i16 [[MIN3]] 391; VI-NEXT: [[CMP4:%.*]] = icmp slt i16 [[ELT5]], [[MIN4]] 392; VI-NEXT: [[MIN5:%.*]] = select i1 [[CMP4]], i16 [[ELT5]], i16 [[MIN4]] 393; VI-NEXT: [[CMP5:%.*]] = icmp slt i16 [[ELT6]], [[MIN5]] 394; VI-NEXT: [[MIN6:%.*]] = select i1 [[CMP5]], i16 [[ELT6]], i16 [[MIN5]] 395; VI-NEXT: [[CMP6:%.*]] = icmp slt i16 [[ELT7]], [[MIN6]] 396; VI-NEXT: [[MIN7:%.*]] = select i1 [[CMP6]], i16 [[ELT7]], i16 [[MIN6]] 397; VI-NEXT: [[CMP7:%.*]] = icmp slt i16 [[ELT8]], [[MIN7]] 398; VI-NEXT: [[MIN8:%.*]] = select i1 [[CMP7]], i16 [[ELT8]], i16 [[MIN7]] 399; VI-NEXT: [[CMP8:%.*]] = icmp slt i16 [[ELT9]], [[MIN8]] 400; VI-NEXT: [[MIN9:%.*]] = select i1 [[CMP8]], i16 [[ELT9]], i16 [[MIN8]] 401; VI-NEXT: [[CMP9:%.*]] = icmp slt i16 [[ELT10]], [[MIN9]] 402; VI-NEXT: [[MIN10:%.*]] = select i1 [[CMP9]], i16 [[ELT10]], i16 [[MIN9]] 403; VI-NEXT: [[CMP10:%.*]] = icmp slt i16 [[ELT11]], [[MIN10]] 404; VI-NEXT: [[MIN11:%.*]] = select i1 [[CMP10]], i16 [[ELT11]], i16 [[MIN10]] 405; VI-NEXT: [[CMP11:%.*]] = icmp slt i16 [[ELT12]], [[MIN11]] 406; VI-NEXT: [[MIN12:%.*]] = select i1 [[CMP11]], i16 [[ELT12]], i16 [[MIN11]] 407; VI-NEXT: [[CMP12:%.*]] = icmp slt i16 [[ELT13]], [[MIN12]] 408; VI-NEXT: [[MIN13:%.*]] = select i1 [[CMP12]], i16 [[ELT13]], i16 [[MIN12]] 409; VI-NEXT: [[CMP13:%.*]] = icmp slt i16 [[ELT14]], [[MIN13]] 410; VI-NEXT: [[MIN14:%.*]] = select i1 [[CMP13]], i16 [[ELT14]], i16 [[MIN13]] 411; VI-NEXT: [[CMP14:%.*]] = icmp slt i16 [[ELT15]], [[MIN14]] 412; VI-NEXT: [[MIN15:%.*]] = select i1 [[CMP14]], i16 [[ELT15]], i16 [[MIN14]] 413; VI-NEXT: ret i16 [[MIN15]] 414; 415entry: 416 %elt0 = extractelement <16 x i16> %vec16, i64 0 417 %elt1 = extractelement <16 x i16> %vec16, i64 1 418 %elt2 = extractelement <16 x i16> %vec16, i64 2 419 %elt3 = extractelement <16 x i16> %vec16, i64 3 420 %elt4 = extractelement <16 x i16> %vec16, i64 4 421 %elt5 = extractelement <16 x i16> %vec16, i64 5 422 %elt6 = extractelement <16 x i16> %vec16, i64 6 423 %elt7 = extractelement <16 x i16> %vec16, i64 7 424 425 %elt8 = extractelement <16 x i16> %vec16, i64 8 426 %elt9 = extractelement <16 x i16> %vec16, i64 9 427 %elt10 = extractelement <16 x i16> %vec16, i64 10 428 %elt11 = extractelement <16 x i16> %vec16, i64 11 429 %elt12 = extractelement <16 x i16> %vec16, i64 12 430 %elt13 = extractelement <16 x i16> %vec16, i64 13 431 %elt14 = extractelement <16 x i16> %vec16, i64 14 432 %elt15 = extractelement <16 x i16> %vec16, i64 15 433 434 %cmp0 = icmp slt i16 %elt1, %elt0 435 %min1 = select i1 %cmp0, i16 %elt1, i16 %elt0 436 %cmp1 = icmp slt i16 %elt2, %min1 437 %min2 = select i1 %cmp1, i16 %elt2, i16 %min1 438 %cmp2 = icmp slt i16 %elt3, %min2 439 %min3 = select i1 %cmp2, i16 %elt3, i16 %min2 440 441 %cmp3 = icmp slt i16 %elt4, %min3 442 %min4 = select i1 %cmp3, i16 %elt4, i16 %min3 443 %cmp4 = icmp slt i16 %elt5, %min4 444 %min5 = select i1 %cmp4, i16 %elt5, i16 %min4 445 446 %cmp5 = icmp slt i16 %elt6, %min5 447 %min6 = select i1 %cmp5, i16 %elt6, i16 %min5 448 %cmp6 = icmp slt i16 %elt7, %min6 449 %min7 = select i1 %cmp6, i16 %elt7, i16 %min6 450 451 %cmp7 = icmp slt i16 %elt8, %min7 452 %min8 = select i1 %cmp7, i16 %elt8, i16 %min7 453 %cmp8 = icmp slt i16 %elt9, %min8 454 %min9 = select i1 %cmp8, i16 %elt9, i16 %min8 455 456 %cmp9 = icmp slt i16 %elt10, %min9 457 %min10 = select i1 %cmp9, i16 %elt10, i16 %min9 458 %cmp10 = icmp slt i16 %elt11, %min10 459 %min11 = select i1 %cmp10, i16 %elt11, i16 %min10 460 461 %cmp11 = icmp slt i16 %elt12, %min11 462 %min12 = select i1 %cmp11, i16 %elt12, i16 %min11 463 %cmp12 = icmp slt i16 %elt13, %min12 464 %min13 = select i1 %cmp12, i16 %elt13, i16 %min12 465 466 %cmp13 = icmp slt i16 %elt14, %min13 467 %min14 = select i1 %cmp13, i16 %elt14, i16 %min13 468 %cmp14 = icmp slt i16 %elt15, %min14 469 %min15 = select i1 %cmp14, i16 %elt15, i16 %min14 470 471 472 ret i16 %min15 473} 474 475define i16 @reduction_umax_v4i16(<4 x i16> %vec4) { 476; GFX9-LABEL: @reduction_umax_v4i16( 477; GFX9-NEXT: entry: 478; GFX9-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[VEC4:%.*]]) 479; GFX9-NEXT: ret i16 [[TMP0]] 480; 481; VI-LABEL: @reduction_umax_v4i16( 482; VI-NEXT: entry: 483; VI-NEXT: [[ELT0:%.*]] = extractelement <4 x i16> [[VEC4:%.*]], i64 0 484; VI-NEXT: [[ELT1:%.*]] = extractelement <4 x i16> [[VEC4]], i64 1 485; VI-NEXT: [[ELT2:%.*]] = extractelement <4 x i16> [[VEC4]], i64 2 486; VI-NEXT: [[ELT3:%.*]] = extractelement <4 x i16> [[VEC4]], i64 3 487; VI-NEXT: [[CMP1:%.*]] = icmp ugt i16 [[ELT1]], [[ELT0]] 488; VI-NEXT: [[MAX1:%.*]] = select i1 [[CMP1]], i16 [[ELT1]], i16 [[ELT0]] 489; VI-NEXT: [[CMP2:%.*]] = icmp ugt i16 [[ELT2]], [[MAX1]] 490; VI-NEXT: [[MAX2:%.*]] = select i1 [[CMP2]], i16 [[ELT2]], i16 [[MAX1]] 491; VI-NEXT: [[CMP3:%.*]] = icmp ugt i16 [[ELT3]], [[MAX2]] 492; VI-NEXT: [[MAX3:%.*]] = select i1 [[CMP3]], i16 [[ELT3]], i16 [[MAX2]] 493; VI-NEXT: ret i16 [[MAX3]] 494; 495entry: 496 %elt0 = extractelement <4 x i16> %vec4, i64 0 497 %elt1 = extractelement <4 x i16> %vec4, i64 1 498 %elt2 = extractelement <4 x i16> %vec4, i64 2 499 %elt3 = extractelement <4 x i16> %vec4, i64 3 500 501 %cmp1 = icmp ugt i16 %elt1, %elt0 502 %max1 = select i1 %cmp1, i16 %elt1, i16 %elt0 503 %cmp2 = icmp ugt i16 %elt2, %max1 504 %max2 = select i1 %cmp2, i16 %elt2, i16 %max1 505 %cmp3 = icmp ugt i16 %elt3, %max2 506 %max3 = select i1 %cmp3, i16 %elt3, i16 %max2 507 508 ret i16 %max3 509} 510 511define i16 @reduction_smax_v4i16(<4 x i16> %vec4) { 512; GFX9-LABEL: @reduction_smax_v4i16( 513; GFX9-NEXT: entry: 514; GFX9-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> [[VEC4:%.*]]) 515; GFX9-NEXT: ret i16 [[TMP0]] 516; 517; VI-LABEL: @reduction_smax_v4i16( 518; VI-NEXT: entry: 519; VI-NEXT: [[ELT0:%.*]] = extractelement <4 x i16> [[VEC4:%.*]], i64 0 520; VI-NEXT: [[ELT1:%.*]] = extractelement <4 x i16> [[VEC4]], i64 1 521; VI-NEXT: [[ELT2:%.*]] = extractelement <4 x i16> [[VEC4]], i64 2 522; VI-NEXT: [[ELT3:%.*]] = extractelement <4 x i16> [[VEC4]], i64 3 523; VI-NEXT: [[CMP1:%.*]] = icmp sgt i16 [[ELT1]], [[ELT0]] 524; VI-NEXT: [[MAX1:%.*]] = select i1 [[CMP1]], i16 [[ELT1]], i16 [[ELT0]] 525; VI-NEXT: [[CMP2:%.*]] = icmp sgt i16 [[ELT2]], [[MAX1]] 526; VI-NEXT: [[MAX2:%.*]] = select i1 [[CMP2]], i16 [[ELT2]], i16 [[MAX1]] 527; VI-NEXT: [[CMP3:%.*]] = icmp sgt i16 [[ELT3]], [[MAX2]] 528; VI-NEXT: [[MAX3:%.*]] = select i1 [[CMP3]], i16 [[ELT3]], i16 [[MAX2]] 529; VI-NEXT: ret i16 [[MAX3]] 530; 531entry: 532 %elt0 = extractelement <4 x i16> %vec4, i64 0 533 %elt1 = extractelement <4 x i16> %vec4, i64 1 534 %elt2 = extractelement <4 x i16> %vec4, i64 2 535 %elt3 = extractelement <4 x i16> %vec4, i64 3 536 537 %cmp1 = icmp sgt i16 %elt1, %elt0 538 %max1 = select i1 %cmp1, i16 %elt1, i16 %elt0 539 %cmp2 = icmp sgt i16 %elt2, %max1 540 %max2 = select i1 %cmp2, i16 %elt2, i16 %max1 541 %cmp3 = icmp sgt i16 %elt3, %max2 542 %max3 = select i1 %cmp3, i16 %elt3, i16 %max2 543 544 ret i16 %max3 545} 546 547; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select 548; with fastmath on the select. 549define half @reduction_fmax_v4half(<4 x half> %vec4) { 550; GCN-LABEL: @reduction_fmax_v4half( 551; GCN-NEXT: entry: 552; GCN-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[VEC4:%.*]], i64 0 553; GCN-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1 554; GCN-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2 555; GCN-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3 556; GCN-NEXT: [[CMP1:%.*]] = fcmp fast ogt half [[ELT1]], [[ELT0]] 557; GCN-NEXT: [[MAX1:%.*]] = select i1 [[CMP1]], half [[ELT1]], half [[ELT0]] 558; GCN-NEXT: [[CMP2:%.*]] = fcmp fast ogt half [[ELT2]], [[MAX1]] 559; GCN-NEXT: [[MAX2:%.*]] = select i1 [[CMP2]], half [[ELT2]], half [[MAX1]] 560; GCN-NEXT: [[CMP3:%.*]] = fcmp fast ogt half [[ELT3]], [[MAX2]] 561; GCN-NEXT: [[MAX3:%.*]] = select i1 [[CMP3]], half [[ELT3]], half [[MAX2]] 562; GCN-NEXT: ret half [[MAX3]] 563; 564entry: 565 %elt0 = extractelement <4 x half> %vec4, i64 0 566 %elt1 = extractelement <4 x half> %vec4, i64 1 567 %elt2 = extractelement <4 x half> %vec4, i64 2 568 %elt3 = extractelement <4 x half> %vec4, i64 3 569 570 %cmp1 = fcmp fast ogt half %elt1, %elt0 571 %max1 = select i1 %cmp1, half %elt1, half %elt0 572 %cmp2 = fcmp fast ogt half %elt2, %max1 573 %max2 = select i1 %cmp2, half %elt2, half %max1 574 %cmp3 = fcmp fast ogt half %elt3, %max2 575 %max3 = select i1 %cmp3, half %elt3, half %max2 576 577 ret half %max3 578} 579 580; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select 581; with fastmath on the select. 582define half @reduction_fmin_v4half(<4 x half> %vec4) { 583; GCN-LABEL: @reduction_fmin_v4half( 584; GCN-NEXT: entry: 585; GCN-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[VEC4:%.*]], i64 0 586; GCN-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1 587; GCN-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2 588; GCN-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3 589; GCN-NEXT: [[CMP1:%.*]] = fcmp fast olt half [[ELT1]], [[ELT0]] 590; GCN-NEXT: [[MIN1:%.*]] = select i1 [[CMP1]], half [[ELT1]], half [[ELT0]] 591; GCN-NEXT: [[CMP2:%.*]] = fcmp fast olt half [[ELT2]], [[MIN1]] 592; GCN-NEXT: [[MIN2:%.*]] = select i1 [[CMP2]], half [[ELT2]], half [[MIN1]] 593; GCN-NEXT: [[CMP3:%.*]] = fcmp fast olt half [[ELT3]], [[MIN2]] 594; GCN-NEXT: [[MIN3:%.*]] = select i1 [[CMP3]], half [[ELT3]], half [[MIN2]] 595; GCN-NEXT: ret half [[MIN3]] 596; 597entry: 598 %elt0 = extractelement <4 x half> %vec4, i64 0 599 %elt1 = extractelement <4 x half> %vec4, i64 1 600 %elt2 = extractelement <4 x half> %vec4, i64 2 601 %elt3 = extractelement <4 x half> %vec4, i64 3 602 603 %cmp1 = fcmp fast olt half %elt1, %elt0 604 %min1 = select i1 %cmp1, half %elt1, half %elt0 605 %cmp2 = fcmp fast olt half %elt2, %min1 606 %min2 = select i1 %cmp2, half %elt2, half %min1 607 %cmp3 = fcmp fast olt half %elt3, %min2 608 %min3 = select i1 %cmp3, half %elt3, half %min2 609 610 ret half %min3 611} 612 613; Tests to make sure reduction does not kick in. vega does not support packed math for types larger than 16 bits. 614define float @reduction_v4float(<4 x float> %a) { 615; GCN-LABEL: @reduction_v4float( 616; GCN-NEXT: entry: 617; GCN-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 618; GCN-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[A]], i64 1 619; GCN-NEXT: [[ELT2:%.*]] = extractelement <4 x float> [[A]], i64 2 620; GCN-NEXT: [[ELT3:%.*]] = extractelement <4 x float> [[A]], i64 3 621; GCN-NEXT: [[ADD1:%.*]] = fadd fast float [[ELT1]], [[ELT0]] 622; GCN-NEXT: [[ADD2:%.*]] = fadd fast float [[ELT2]], [[ADD1]] 623; GCN-NEXT: [[ADD3:%.*]] = fadd fast float [[ELT3]], [[ADD2]] 624; GCN-NEXT: ret float [[ADD3]] 625; 626entry: 627 %elt0 = extractelement <4 x float> %a, i64 0 628 %elt1 = extractelement <4 x float> %a, i64 1 629 %elt2 = extractelement <4 x float> %a, i64 2 630 %elt3 = extractelement <4 x float> %a, i64 3 631 632 %add1 = fadd fast float %elt1, %elt0 633 %add2 = fadd fast float %elt2, %add1 634 %add3 = fadd fast float %elt3, %add2 635 636 ret float %add3 637} 638