1; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,SI,FUNC %s 2; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,VI,GFX8_9_10,FUNC %s 3; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9_10,GFX8_9_10,FUNC %s 4; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10,GFX9_10,GFX8_9_10,FUNC %s 5; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cypress -verify-machineinstrs < %s | FileCheck --check-prefixes=EG,FUNC %s 6 7; FUNC-LABEL: {{^}}v_test_imin_sle_i32: 8; GCN: v_min_i32_e32 9 10; EG: MIN_INT 11define amdgpu_kernel void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 { 12 %tid = call i32 @llvm.amdgcn.workitem.id.x() 13 %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid 14 %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid 15 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid 16 %a = load i32, i32 addrspace(1)* %a.gep, align 4 17 %b = load i32, i32 addrspace(1)* %b.gep, align 4 18 %cmp = icmp sle i32 %a, %b 19 %val = select i1 %cmp, i32 %a, i32 %b 20 store i32 %val, i32 addrspace(1)* %out.gep, align 4 21 ret void 22} 23 24; FUNC-LABEL: {{^}}s_test_imin_sle_i32: 25; GCN: s_min_i32 26 27; EG: MIN_INT 28define amdgpu_kernel void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 29 %cmp = icmp sle i32 %a, %b 30 %val = select i1 %cmp, i32 %a, i32 %b 31 store i32 %val, i32 addrspace(1)* %out, align 4 32 ret void 33} 34 35; FUNC-LABEL: {{^}}s_test_imin_sle_v1i32: 36; GCN: s_min_i32 37 38; EG: MIN_INT 39define amdgpu_kernel void @s_test_imin_sle_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) #0 { 40 %cmp = icmp sle <1 x i32> %a, %b 41 %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b 42 store <1 x i32> %val, <1 x i32> addrspace(1)* %out 43 ret void 44} 45 46; FUNC-LABEL: {{^}}s_test_imin_sle_v4i32: 47; GCN: s_min_i32 48; GCN: s_min_i32 49; GCN: s_min_i32 50; GCN: s_min_i32 51 52; EG: MIN_INT 53; EG: MIN_INT 54; EG: MIN_INT 55; EG: MIN_INT 56define amdgpu_kernel void @s_test_imin_sle_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) #0 { 57 %cmp = icmp sle <4 x i32> %a, %b 58 %val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b 59 store <4 x i32> %val, <4 x i32> addrspace(1)* %out 60 ret void 61} 62 63; FUNC-LABEL: {{^}}s_test_imin_sle_i8: 64; GCN: s_load_dword 65; GCN: s_load_dword 66; GCN: s_sext_i32_i8 67; GCN: s_sext_i32_i8 68; GCN: s_min_i32 69define amdgpu_kernel void @s_test_imin_sle_i8(i8 addrspace(1)* %out, [8 x i32], i8 %a, [8 x i32], i8 %b) #0 { 70 %cmp = icmp sle i8 %a, %b 71 %val = select i1 %cmp, i8 %a, i8 %b 72 store i8 %val, i8 addrspace(1)* %out 73 ret void 74} 75 76; FIXME: Why vector and sdwa for last element? 77; FUNC-LABEL: {{^}}s_test_imin_sle_v4i8: 78; GCN-DAG: s_load_dwordx2 79; GCN-DAG: s_load_dword s 80; GCN-DAG: s_load_dword s 81; GCN-NOT: _load_ 82 83; SI: s_min_i32 84; SI: s_min_i32 85; SI: s_min_i32 86; SI: s_min_i32 87 88; VI: s_min_i32 89; VI: s_min_i32 90; VI: s_min_i32 91; VI: v_min_i32_sdwa 92 93; GFX9_10: v_min_i16 94; GFX9_10: v_min_i16 95; GFX9_10: v_min_i16 96; GFX9_10: v_min_i16 97 98; EG: MIN_INT 99; EG: MIN_INT 100; EG: MIN_INT 101; EG: MIN_INT 102define amdgpu_kernel void @s_test_imin_sle_v4i8(<4 x i8> addrspace(1)* %out, [8 x i32], <4 x i8> %a, [8 x i32], <4 x i8> %b) #0 { 103 %cmp = icmp sle <4 x i8> %a, %b 104 %val = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b 105 store <4 x i8> %val, <4 x i8> addrspace(1)* %out 106 ret void 107} 108 109; FUNC-LABEL: {{^}}s_test_imin_sle_v2i16: 110; GCN: s_load_dword s 111; GCN: s_load_dword s 112 113; SI: s_ashr_i32 114; SI: s_ashr_i32 115; SI: s_sext_i32_i16 116; SI: s_sext_i32_i16 117; SI: s_min_i32 118; SI: s_min_i32 119 120; VI: s_sext_i32_i16 121; VI: s_sext_i32_i16 122; VI: s_min_i32 123; VI: s_min_i32 124 125; GFX9_10: v_pk_min_i16 126 127; EG: MIN_INT 128; EG: MIN_INT 129define amdgpu_kernel void @s_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 { 130 %cmp = icmp sle <2 x i16> %a, %b 131 %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b 132 store <2 x i16> %val, <2 x i16> addrspace(1)* %out 133 ret void 134} 135 136; FUNC-LABEL: {{^}}s_test_imin_sle_v4i16: 137; SI-NOT: buffer_load 138; SI: s_min_i32 139; SI: s_min_i32 140; SI: s_min_i32 141; SI: s_min_i32 142 143; VI: s_min_i32 144; VI: s_min_i32 145; VI: s_min_i32 146; VI: s_min_i32 147 148; GFX9_10: v_pk_min_i16 149; GFX9_10: v_pk_min_i16 150 151; EG: MIN_INT 152; EG: MIN_INT 153; EG: MIN_INT 154; EG: MIN_INT 155define amdgpu_kernel void @s_test_imin_sle_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) #0 { 156 %cmp = icmp sle <4 x i16> %a, %b 157 %val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b 158 store <4 x i16> %val, <4 x i16> addrspace(1)* %out 159 ret void 160} 161 162; FUNC-LABEL: @v_test_imin_slt_i32 163; GCN: v_min_i32_e32 164 165; EG: MIN_INT 166define amdgpu_kernel void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #0 { 167 %tid = call i32 @llvm.amdgcn.workitem.id.x() 168 %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %aptr, i32 %tid 169 %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid 170 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid 171 %a = load i32, i32 addrspace(1)* %a.gep, align 4 172 %b = load i32, i32 addrspace(1)* %b.gep, align 4 173 %cmp = icmp slt i32 %a, %b 174 %val = select i1 %cmp, i32 %a, i32 %b 175 store i32 %val, i32 addrspace(1)* %out.gep, align 4 176 ret void 177} 178 179; FUNC-LABEL: @v_test_imin_slt_i16 180; SI: v_min_i32_e32 181 182; GFX8_9: v_min_i16_e32 183; GFX10: v_min_i16 184 185; EG: MIN_INT 186define amdgpu_kernel void @v_test_imin_slt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 { 187 %tid = call i32 @llvm.amdgcn.workitem.id.x() 188 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %aptr, i32 %tid 189 %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %bptr, i32 %tid 190 %out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid 191 192 %a = load i16, i16 addrspace(1)* %a.gep 193 %b = load i16, i16 addrspace(1)* %b.gep 194 %cmp = icmp slt i16 %a, %b 195 %val = select i1 %cmp, i16 %a, i16 %b 196 store i16 %val, i16 addrspace(1)* %out.gep 197 ret void 198} 199 200; FUNC-LABEL: @s_test_imin_slt_i32 201; GCN: s_min_i32 202 203; EG: MIN_INT 204define amdgpu_kernel void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 205 %cmp = icmp slt i32 %a, %b 206 %val = select i1 %cmp, i32 %a, i32 %b 207 store i32 %val, i32 addrspace(1)* %out, align 4 208 ret void 209} 210 211; FUNC-LABEL: {{^}}s_test_imin_slt_v2i32: 212; GCN: s_min_i32 213; GCN: s_min_i32 214 215; EG: MIN_INT 216; EG: MIN_INT 217define amdgpu_kernel void @s_test_imin_slt_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 { 218 %cmp = icmp slt <2 x i32> %a, %b 219 %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b 220 store <2 x i32> %val, <2 x i32> addrspace(1)* %out 221 ret void 222} 223 224; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i32: 225; GCN: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8 226 227; EG: MIN_INT {{.*}}literal.{{[xyzw]}} 228define amdgpu_kernel void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) #0 { 229 %cmp = icmp slt i32 %a, 8 230 %val = select i1 %cmp, i32 %a, i32 8 231 store i32 %val, i32 addrspace(1)* %out, align 4 232 ret void 233} 234 235; FUNC-LABEL: {{^}}s_test_imin_sle_imm_i32: 236; GCN: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8 237 238; EG: MIN_INT {{.*}}literal.{{[xyzw]}} 239define amdgpu_kernel void @s_test_imin_sle_imm_i32(i32 addrspace(1)* %out, i32 %a) #0 { 240 %cmp = icmp sle i32 %a, 8 241 %val = select i1 %cmp, i32 %a, i32 8 242 store i32 %val, i32 addrspace(1)* %out, align 4 243 ret void 244} 245 246; FUNC-LABEL: @v_test_umin_ule_i32 247; GCN: v_min_u32_e32 248 249; EG: MIN_UINT 250define amdgpu_kernel void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 { 251 %tid = call i32 @llvm.amdgcn.workitem.id.x() 252 %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid 253 %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid 254 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid 255 %a = load i32, i32 addrspace(1)* %a.gep, align 4 256 %b = load i32, i32 addrspace(1)* %b.gep, align 4 257 %cmp = icmp ule i32 %a, %b 258 %val = select i1 %cmp, i32 %a, i32 %b 259 store i32 %val, i32 addrspace(1)* %out.gep, align 4 260 ret void 261} 262 263; FUNC-LABEL: @v_test_umin_ule_v3i32 264; GCN: v_min_u32_e32 265; GCN: v_min_u32_e32 266; GCN: v_min_u32_e32 267; GCN-NOT: v_min_u32_e32 268; GCN: s_endpgm 269 270; EG: MIN_UINT 271; EG: MIN_UINT 272; EG: MIN_UINT 273define amdgpu_kernel void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %a.ptr, <3 x i32> addrspace(1)* %b.ptr) #0 { 274 %tid = call i32 @llvm.amdgcn.workitem.id.x() 275 %a.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %a.ptr, i32 %tid 276 %b.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %b.ptr, i32 %tid 277 %out.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid 278 279 %a = load <3 x i32>, <3 x i32> addrspace(1)* %a.gep 280 %b = load <3 x i32>, <3 x i32> addrspace(1)* %b.gep 281 %cmp = icmp ule <3 x i32> %a, %b 282 %val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b 283 store <3 x i32> %val, <3 x i32> addrspace(1)* %out.gep 284 ret void 285} 286 287; FIXME: Reduce unused packed component to scalar 288; FUNC-LABEL: @v_test_umin_ule_v3i16{{$}} 289; SI: v_min_u32_e32 290; SI: v_min_u32_e32 291; SI: v_min_u32_e32 292; SI-NOT: v_min_u32_e32 293 294; VI: v_min_u16_e32 295; VI: v_min_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 296; VI: v_min_u16_e32 297; VI-NOT: v_min_u16 298 299; GFX9_10: v_pk_min_u16 300; GFX9_10: v_pk_min_u16 301 302; GCN: s_endpgm 303 304; EG: MIN_UINT 305; EG: MIN_UINT 306; EG: MIN_UINT 307define amdgpu_kernel void @v_test_umin_ule_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %a.ptr, <3 x i16> addrspace(1)* %b.ptr) #0 { 308 %tid = call i32 @llvm.amdgcn.workitem.id.x() 309 %a.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %a.ptr, i32 %tid 310 %b.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %b.ptr, i32 %tid 311 %out.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %out, i32 %tid 312 313 %a = load <3 x i16>, <3 x i16> addrspace(1)* %a.gep 314 %b = load <3 x i16>, <3 x i16> addrspace(1)* %b.gep 315 %cmp = icmp ule <3 x i16> %a, %b 316 %val = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 317 store <3 x i16> %val, <3 x i16> addrspace(1)* %out.gep 318 ret void 319} 320 321; FUNC-LABEL: @s_test_umin_ule_i32 322; GCN: s_min_u32 323 324; EG: MIN_UINT 325define amdgpu_kernel void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 326 %cmp = icmp ule i32 %a, %b 327 %val = select i1 %cmp, i32 %a, i32 %b 328 store i32 %val, i32 addrspace(1)* %out, align 4 329 ret void 330} 331 332; FUNC-LABEL: @v_test_umin_ult_i32 333; GCN: v_min_u32_e32 334 335; EG: MIN_UINT 336define amdgpu_kernel void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 { 337 %tid = call i32 @llvm.amdgcn.workitem.id.x() 338 %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid 339 %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid 340 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid 341 %a = load i32, i32 addrspace(1)* %a.gep, align 4 342 %b = load i32, i32 addrspace(1)* %b.gep, align 4 343 %cmp = icmp ult i32 %a, %b 344 %val = select i1 %cmp, i32 %a, i32 %b 345 store i32 %val, i32 addrspace(1)* %out.gep, align 4 346 ret void 347} 348 349; FUNC-LABEL: {{^}}v_test_umin_ult_i8: 350; SI: {{buffer|flat|global}}_load_ubyte 351; SI: {{buffer|flat|global}}_load_ubyte 352; SI: v_min_u32_e32 353 354; GFX8_9_10: {{flat|global}}_load_ubyte 355; GFX8_9_10: {{flat|global}}_load_ubyte 356; GFX8_9: v_min_u16_e32 357; GFX10: v_min_u16 358 359; EG: MIN_UINT 360define amdgpu_kernel void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %a.ptr, i8 addrspace(1)* %b.ptr) #0 { 361 %tid = call i32 @llvm.amdgcn.workitem.id.x() 362 %a.gep = getelementptr inbounds i8, i8 addrspace(1)* %a.ptr, i32 %tid 363 %b.gep = getelementptr inbounds i8, i8 addrspace(1)* %b.ptr, i32 %tid 364 %out.gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i32 %tid 365 366 %a = load i8, i8 addrspace(1)* %a.gep, align 1 367 %b = load i8, i8 addrspace(1)* %b.gep, align 1 368 %cmp = icmp ult i8 %a, %b 369 %val = select i1 %cmp, i8 %a, i8 %b 370 store i8 %val, i8 addrspace(1)* %out.gep, align 1 371 ret void 372} 373 374; FUNC-LABEL: @s_test_umin_ult_i32 375; GCN: s_min_u32 376 377; EG: MIN_UINT 378define amdgpu_kernel void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 379 %cmp = icmp ult i32 %a, %b 380 %val = select i1 %cmp, i32 %a, i32 %b 381 store i32 %val, i32 addrspace(1)* %out, align 4 382 ret void 383} 384 385; FUNC-LABEL: @v_test_umin_ult_i32_multi_use 386; SI-NOT: v_min 387; GCN: v_cmp_lt_u32 388; SI-NOT: v_min 389; SI: v_cndmask_b32 390; SI-NOT: v_min 391; GCN: s_endpgm 392 393; EG-NOT: MIN_UINT 394define amdgpu_kernel void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace(1)* %out1, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #0 { 395 %a = load i32, i32 addrspace(1)* %aptr, align 4 396 %b = load i32, i32 addrspace(1)* %bptr, align 4 397 %cmp = icmp ult i32 %a, %b 398 %val = select i1 %cmp, i32 %a, i32 %b 399 store i32 %val, i32 addrspace(1)* %out0, align 4 400 store i1 %cmp, i1 addrspace(1)* %out1 401 ret void 402} 403 404; FUNC-LABEL: @v_test_umin_ult_i16_multi_use 405; GCN-NOT: v_min 406; GCN: v_cmp_lt_u32 407; GCN: v_cndmask_b32 408; GCN-NOT: v_min 409; GCN: s_endpgm 410 411; EG-NOT: MIN_UINT 412define amdgpu_kernel void @v_test_umin_ult_i16_multi_use(i16 addrspace(1)* %out0, i1 addrspace(1)* %out1, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 { 413 %a = load i16, i16 addrspace(1)* %aptr, align 2 414 %b = load i16, i16 addrspace(1)* %bptr, align 2 415 %cmp = icmp ult i16 %a, %b 416 %val = select i1 %cmp, i16 %a, i16 %b 417 store i16 %val, i16 addrspace(1)* %out0, align 2 418 store i1 %cmp, i1 addrspace(1)* %out1 419 ret void 420} 421 422 423; FUNC-LABEL: @s_test_umin_ult_v1i32 424; GCN: s_min_u32 425 426; EG: MIN_UINT 427define amdgpu_kernel void @s_test_umin_ult_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) #0 { 428 %cmp = icmp ult <1 x i32> %a, %b 429 %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b 430 store <1 x i32> %val, <1 x i32> addrspace(1)* %out 431 ret void 432} 433 434; FUNC-LABEL: {{^}}s_test_umin_ult_v8i32: 435; GCN: s_min_u32 436; GCN: s_min_u32 437; GCN: s_min_u32 438; GCN: s_min_u32 439; GCN: s_min_u32 440; GCN: s_min_u32 441; GCN: s_min_u32 442; GCN: s_min_u32 443 444; EG: MIN_UINT 445; EG: MIN_UINT 446; EG: MIN_UINT 447; EG: MIN_UINT 448; EG: MIN_UINT 449; EG: MIN_UINT 450; EG: MIN_UINT 451; EG: MIN_UINT 452define amdgpu_kernel void @s_test_umin_ult_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) #0 { 453 %cmp = icmp ult <8 x i32> %a, %b 454 %val = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b 455 store <8 x i32> %val, <8 x i32> addrspace(1)* %out 456 ret void 457} 458 459; FUNC-LABEL: {{^}}s_test_umin_ult_v8i16: 460; GCN-NOT: {{buffer|flat|global}}_load 461; SI: s_min_u32 462; SI: s_min_u32 463; SI: s_min_u32 464; SI: s_min_u32 465; SI: s_min_u32 466; SI: s_min_u32 467; SI: s_min_u32 468; SI: s_min_u32 469 470; VI: s_min_u32 471; VI: s_min_u32 472; VI: s_min_u32 473; VI: s_min_u32 474; VI: s_min_u32 475; VI: s_min_u32 476; VI: s_min_u32 477; VI: s_min_u32 478 479; EG: MIN_UINT 480; EG: MIN_UINT 481; EG: MIN_UINT 482; EG: MIN_UINT 483; EG: MIN_UINT 484; EG: MIN_UINT 485; EG: MIN_UINT 486; EG: MIN_UINT 487define amdgpu_kernel void @s_test_umin_ult_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) #0 { 488 %cmp = icmp ult <8 x i16> %a, %b 489 %val = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b 490 store <8 x i16> %val, <8 x i16> addrspace(1)* %out 491 ret void 492} 493 494; Make sure redundant and removed 495; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umin_ult_i16: 496; GCN-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xa|0x28}} 497; GCN-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}} 498; GCN: s_min_u32 [[MIN:s[0-9]+]], s{{[0-9]}}, s{{[0-9]}} 499; GCN: v_mov_b32_e32 [[VMIN:v[0-9]+]], s{{[0-9]}} 500; GCN: buffer_store_dword [[VMIN]] 501 502; EG: MIN_UINT 503define amdgpu_kernel void @simplify_demanded_bits_test_umin_ult_i16(i32 addrspace(1)* %out, [8 x i32], i16 zeroext %a, [8 x i32], i16 zeroext %b) #0 { 504 %a.ext = zext i16 %a to i32 505 %b.ext = zext i16 %b to i32 506 %cmp = icmp ult i32 %a.ext, %b.ext 507 %val = select i1 %cmp, i32 %a.ext, i32 %b.ext 508 %mask = and i32 %val, 65535 509 store i32 %mask, i32 addrspace(1)* %out 510 ret void 511} 512 513; Make sure redundant sign_extend_inreg removed. 514 515; FUNC-LABEL: {{^}}simplify_demanded_bits_test_min_slt_i16: 516; GCN-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xa|0x28}} 517; GCN-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}} 518; GCN-DAG: s_sext_i32_i16 [[EXT_A:s[0-9]+]], [[A]] 519; GCN-DAG: s_sext_i32_i16 [[EXT_B:s[0-9]+]], [[B]] 520 521; GCN: s_min_i32 [[MIN:s[0-9]+]], [[EXT_A]], [[EXT_B]] 522; GCN: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]] 523; GCN: buffer_store_dword [[VMIN]] 524 525; EG: MIN_INT 526define amdgpu_kernel void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, [8 x i32], i16 signext %a, [8 x i32], i16 signext %b) #0 { 527 %a.ext = sext i16 %a to i32 528 %b.ext = sext i16 %b to i32 529 %cmp = icmp slt i32 %a.ext, %b.ext 530 %val = select i1 %cmp, i32 %a.ext, i32 %b.ext 531 %shl = shl i32 %val, 16 532 %sextinreg = ashr i32 %shl, 16 533 store i32 %sextinreg, i32 addrspace(1)* %out 534 ret void 535} 536 537; FUNC-LABEL: {{^}}s_test_imin_sle_i16: 538; GCN: s_min_i32 539 540; EG: MIN_INT 541define amdgpu_kernel void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) #0 { 542 %cmp = icmp sle i16 %a, %b 543 %val = select i1 %cmp, i16 %a, i16 %b 544 store i16 %val, i16 addrspace(1)* %out 545 ret void 546} 547 548; 64 bit 549; FUNC-LABEL: {{^}}test_umin_ult_i64 550; GCN: s_endpgm 551 552; EG: MIN_UINT 553; EG: MIN_UINT 554define amdgpu_kernel void @test_umin_ult_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 { 555 %tmp = icmp ult i64 %a, %b 556 %val = select i1 %tmp, i64 %a, i64 %b 557 store i64 %val, i64 addrspace(1)* %out, align 8 558 ret void 559} 560 561; FUNC-LABEL: {{^}}test_umin_ule_i64 562; GCN: s_endpgm 563 564; EG: MIN_UINT 565; EG: MIN_UINT 566define amdgpu_kernel void @test_umin_ule_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 { 567 %tmp = icmp ule i64 %a, %b 568 %val = select i1 %tmp, i64 %a, i64 %b 569 store i64 %val, i64 addrspace(1)* %out, align 8 570 ret void 571} 572 573; FUNC-LABEL: {{^}}test_imin_slt_i64 574; GCN: s_endpgm 575 576; EG-DAG: MIN_UINT 577; EG-DAG: MIN_INT 578define amdgpu_kernel void @test_imin_slt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 { 579 %tmp = icmp slt i64 %a, %b 580 %val = select i1 %tmp, i64 %a, i64 %b 581 store i64 %val, i64 addrspace(1)* %out, align 8 582 ret void 583} 584 585; FUNC-LABEL: {{^}}test_imin_sle_i64 586; GCN: s_endpgm 587 588; EG-DAG: MIN_UINT 589; EG-DAG: MIN_INT 590define amdgpu_kernel void @test_imin_sle_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 { 591 %tmp = icmp sle i64 %a, %b 592 %val = select i1 %tmp, i64 %a, i64 %b 593 store i64 %val, i64 addrspace(1)* %out, align 8 594 ret void 595} 596 597; FUNC-LABEL: {{^}}v_test_imin_sle_v2i16: 598; SI: v_min_i32 599; SI: v_min_i32 600 601; VI: v_min_i16 602; VI: v_min_i16 603 604; GFX9_10: v_pk_min_i16 605 606; EG: MIN_INT 607; EG: MIN_INT 608define amdgpu_kernel void @v_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 { 609 %tid = call i32 @llvm.amdgcn.workitem.id.x() 610 %a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid 611 %b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid 612 %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid 613 %a = load <2 x i16>, <2 x i16> addrspace(1)* %a.gep 614 %b = load <2 x i16>, <2 x i16> addrspace(1)* %b.gep 615 %cmp = icmp sle <2 x i16> %a, %b 616 %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b 617 store <2 x i16> %val, <2 x i16> addrspace(1)* %out.gep 618 ret void 619} 620 621; FIXME: i16 min 622; FUNC-LABEL: {{^}}v_test_imin_ule_v2i16: 623; SI: v_min_u32 624; SI: v_min_u32 625 626; VI: v_min_u16 627; VI: v_min_u16 628 629; GFX9_10: v_pk_min_u16 630 631; EG: MIN_UINT 632; EG: MIN_UINT 633define amdgpu_kernel void @v_test_imin_ule_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 { 634 %tid = call i32 @llvm.amdgcn.workitem.id.x() 635 %a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid 636 %b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid 637 %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid 638 %a = load <2 x i16>, <2 x i16> addrspace(1)* %a.gep 639 %b = load <2 x i16>, <2 x i16> addrspace(1)* %b.gep 640 %cmp = icmp ule <2 x i16> %a, %b 641 %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b 642 store <2 x i16> %val, <2 x i16> addrspace(1)* %out.gep 643 ret void 644} 645 646declare i32 @llvm.amdgcn.workitem.id.x() #1 647 648attributes #0 = { nounwind } 649attributes #1 = { nounwind readnone } 650