1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -amdgpu-load-store-vectorizer=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s 3 4define i32 @v_bfe_i32_arg_arg_arg(i32 %src0, i32 %src1, i32 %src2) #0 { 5; GFX6-LABEL: v_bfe_i32_arg_arg_arg: 6; GFX6: ; %bb.0: 7; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8; GFX6-NEXT: v_bfe_i32 v0, v0, v1, v2 9; GFX6-NEXT: s_setpc_b64 s[30:31] 10 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 %src2) 11 ret i32 %bfe_i32 12} 13 14define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 { 15; GFX6-LABEL: s_bfe_i32_arg_arg_arg: 16; GFX6: ; %bb.0: 17; GFX6-NEXT: s_and_b32 s1, s1, 63 18; GFX6-NEXT: s_lshl_b32 s2, s2, 16 19; GFX6-NEXT: s_or_b32 s1, s1, s2 20; GFX6-NEXT: s_bfe_i32 s0, s0, s1 21; GFX6-NEXT: ; return to shader part epilog 22 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 %src2) 23 ret i32 %bfe_i32 24} 25 26; TODO: Need to expand this 27; define i64 @v_bfe_i64_arg_arg_arg(i64 %src0, i32 %src1, i32 %src2) #0 { 28; %bfe_i64 = call i32 @llvm.amdgcn.sbfe.i64(i32 %src0, i32 %src1, i32 %src2) 29; ret i64 %bfe_i64 30; } 31 32define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 { 33; GFX6-LABEL: s_bfe_i64_arg_arg_arg: 34; GFX6: ; %bb.0: 35; GFX6-NEXT: s_and_b32 s2, s2, 63 36; GFX6-NEXT: s_lshl_b32 s3, s3, 16 37; GFX6-NEXT: s_or_b32 s2, s2, s3 38; GFX6-NEXT: s_bfe_i64 s[0:1], s[0:1], s2 39; GFX6-NEXT: ; return to shader part epilog 40 %bfe_i32 = call i64 @llvm.amdgcn.sbfe.i64(i64 %src0, i32 %src1, i32 %src2) 41 ret i64 %bfe_i32 42} 43 44define amdgpu_kernel void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 45; GFX6-LABEL: bfe_i32_arg_arg_imm: 46; GFX6: ; %bb.0: 47; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 48; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 49; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 50; GFX6-NEXT: s_mov_b32 s6, -1 51; GFX6-NEXT: s_mov_b32 s7, 0xf000 52; GFX6-NEXT: s_waitcnt lgkmcnt(0) 53; GFX6-NEXT: s_and_b32 s0, s0, 63 54; GFX6-NEXT: s_or_b32 s0, s0, 0x7b0000 55; GFX6-NEXT: s_bfe_i32 s0, s2, s0 56; GFX6-NEXT: v_mov_b32_e32 v0, s0 57; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 58; GFX6-NEXT: s_endpgm 59 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 123) 60 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 61 ret void 62} 63 64define amdgpu_kernel void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 { 65; GFX6-LABEL: bfe_i32_arg_imm_arg: 66; GFX6: ; %bb.0: 67; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 68; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 69; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 70; GFX6-NEXT: s_mov_b32 s6, -1 71; GFX6-NEXT: s_mov_b32 s7, 0xf000 72; GFX6-NEXT: s_waitcnt lgkmcnt(0) 73; GFX6-NEXT: s_lshl_b32 s0, s0, 16 74; GFX6-NEXT: s_or_b32 s0, 59, s0 75; GFX6-NEXT: s_bfe_i32 s0, s2, s0 76; GFX6-NEXT: v_mov_b32_e32 v0, s0 77; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 78; GFX6-NEXT: s_endpgm 79 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 123, i32 %src2) 80 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 81 ret void 82} 83 84define amdgpu_kernel void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 { 85; GFX6-LABEL: bfe_i32_imm_arg_arg: 86; GFX6: ; %bb.0: 87; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 88; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 89; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 90; GFX6-NEXT: s_mov_b32 s6, -1 91; GFX6-NEXT: s_mov_b32 s7, 0xf000 92; GFX6-NEXT: s_waitcnt lgkmcnt(0) 93; GFX6-NEXT: s_and_b32 s1, s2, 63 94; GFX6-NEXT: s_lshl_b32 s0, s0, 16 95; GFX6-NEXT: s_or_b32 s0, s1, s0 96; GFX6-NEXT: s_bfe_i32 s0, 0x7b, s0 97; GFX6-NEXT: v_mov_b32_e32 v0, s0 98; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 99; GFX6-NEXT: s_endpgm 100 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 123, i32 %src1, i32 %src2) 101 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 102 ret void 103} 104 105define amdgpu_kernel void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) #0 { 106; GFX6-LABEL: v_bfe_print_arg: 107; GFX6: ; %bb.0: 108; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 109; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 110; GFX6-NEXT: s_mov_b32 s6, -1 111; GFX6-NEXT: s_mov_b32 s7, 0xf000 112; GFX6-NEXT: s_waitcnt lgkmcnt(0) 113; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 114; GFX6-NEXT: s_waitcnt lgkmcnt(0) 115; GFX6-NEXT: s_bfe_i32 s0, s0, 0x80002 116; GFX6-NEXT: v_mov_b32_e32 v0, s0 117; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 118; GFX6-NEXT: s_endpgm 119 %load = load i32, i32 addrspace(1)* %src0, align 4 120 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 2, i32 8) 121 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 122 ret void 123} 124 125define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 126; GFX6-LABEL: bfe_i32_arg_0_width_reg_offset: 127; GFX6: ; %bb.0: 128; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 129; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 130; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 131; GFX6-NEXT: s_mov_b32 s6, -1 132; GFX6-NEXT: s_mov_b32 s7, 0xf000 133; GFX6-NEXT: s_waitcnt lgkmcnt(0) 134; GFX6-NEXT: s_and_b32 s0, s0, 63 135; GFX6-NEXT: s_bfe_i32 s0, s2, s0 136; GFX6-NEXT: v_mov_b32_e32 v0, s0 137; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 138; GFX6-NEXT: s_endpgm 139 %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 0) 140 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 141 ret void 142} 143 144define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 145; GFX6-LABEL: bfe_i32_arg_0_width_imm_offset: 146; GFX6: ; %bb.0: 147; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 148; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 149; GFX6-NEXT: s_mov_b32 s6, -1 150; GFX6-NEXT: s_mov_b32 s7, 0xf000 151; GFX6-NEXT: s_waitcnt lgkmcnt(0) 152; GFX6-NEXT: s_bfe_i32 s0, s0, 8 153; GFX6-NEXT: v_mov_b32_e32 v0, s0 154; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 155; GFX6-NEXT: s_endpgm 156 %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 8, i32 0) 157 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 158 ret void 159} 160 161define amdgpu_kernel void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 162; GFX6-LABEL: bfe_i32_test_6: 163; GFX6: ; %bb.0: 164; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 165; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 166; GFX6-NEXT: s_mov_b32 s6, -1 167; GFX6-NEXT: s_mov_b32 s7, 0xf000 168; GFX6-NEXT: s_waitcnt lgkmcnt(0) 169; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 170; GFX6-NEXT: s_waitcnt lgkmcnt(0) 171; GFX6-NEXT: s_lshl_b32 s0, s0, 31 172; GFX6-NEXT: s_bfe_i32 s0, s0, 0x1f0001 173; GFX6-NEXT: v_mov_b32_e32 v0, s0 174; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 175; GFX6-NEXT: s_endpgm 176 %x = load i32, i32 addrspace(1)* %in, align 4 177 %shl = shl i32 %x, 31 178 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 1, i32 31) 179 store i32 %bfe, i32 addrspace(1)* %out, align 4 180 ret void 181} 182 183define amdgpu_kernel void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 184; GFX6-LABEL: bfe_i32_test_7: 185; GFX6: ; %bb.0: 186; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 187; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 188; GFX6-NEXT: s_mov_b32 s6, -1 189; GFX6-NEXT: s_mov_b32 s7, 0xf000 190; GFX6-NEXT: s_waitcnt lgkmcnt(0) 191; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 192; GFX6-NEXT: s_waitcnt lgkmcnt(0) 193; GFX6-NEXT: s_lshl_b32 s0, s0, 31 194; GFX6-NEXT: s_bfe_i32 s0, s0, 0x1f0000 195; GFX6-NEXT: v_mov_b32_e32 v0, s0 196; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 197; GFX6-NEXT: s_endpgm 198 %x = load i32, i32 addrspace(1)* %in, align 4 199 %shl = shl i32 %x, 31 200 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 0, i32 31) 201 store i32 %bfe, i32 addrspace(1)* %out, align 4 202 ret void 203} 204 205define amdgpu_kernel void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 206; GFX6-LABEL: bfe_i32_test_8: 207; GFX6: ; %bb.0: 208; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 209; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 210; GFX6-NEXT: s_mov_b32 s6, -1 211; GFX6-NEXT: s_mov_b32 s7, 0xf000 212; GFX6-NEXT: s_waitcnt lgkmcnt(0) 213; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 214; GFX6-NEXT: s_waitcnt lgkmcnt(0) 215; GFX6-NEXT: s_lshl_b32 s0, s0, 31 216; GFX6-NEXT: s_bfe_i32 s0, s0, 0x1001f 217; GFX6-NEXT: v_mov_b32_e32 v0, s0 218; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 219; GFX6-NEXT: s_endpgm 220 %x = load i32, i32 addrspace(1)* %in, align 4 221 %shl = shl i32 %x, 31 222 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1) 223 store i32 %bfe, i32 addrspace(1)* %out, align 4 224 ret void 225} 226 227define amdgpu_kernel void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 228; GFX6-LABEL: bfe_i32_test_9: 229; GFX6: ; %bb.0: 230; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 231; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 232; GFX6-NEXT: s_mov_b32 s6, -1 233; GFX6-NEXT: s_mov_b32 s7, 0xf000 234; GFX6-NEXT: s_waitcnt lgkmcnt(0) 235; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 236; GFX6-NEXT: s_waitcnt lgkmcnt(0) 237; GFX6-NEXT: s_bfe_i32 s0, s0, 0x1001f 238; GFX6-NEXT: v_mov_b32_e32 v0, s0 239; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 240; GFX6-NEXT: s_endpgm 241 %x = load i32, i32 addrspace(1)* %in, align 4 242 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 31, i32 1) 243 store i32 %bfe, i32 addrspace(1)* %out, align 4 244 ret void 245} 246 247define amdgpu_kernel void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 248; GFX6-LABEL: bfe_i32_test_10: 249; GFX6: ; %bb.0: 250; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 251; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 252; GFX6-NEXT: s_mov_b32 s6, -1 253; GFX6-NEXT: s_mov_b32 s7, 0xf000 254; GFX6-NEXT: s_waitcnt lgkmcnt(0) 255; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 256; GFX6-NEXT: s_waitcnt lgkmcnt(0) 257; GFX6-NEXT: s_bfe_i32 s0, s0, 0x1f0001 258; GFX6-NEXT: v_mov_b32_e32 v0, s0 259; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 260; GFX6-NEXT: s_endpgm 261 %x = load i32, i32 addrspace(1)* %in, align 4 262 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 1, i32 31) 263 store i32 %bfe, i32 addrspace(1)* %out, align 4 264 ret void 265} 266 267define amdgpu_kernel void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 268; GFX6-LABEL: bfe_i32_test_11: 269; GFX6: ; %bb.0: 270; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 271; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 272; GFX6-NEXT: s_mov_b32 s6, -1 273; GFX6-NEXT: s_mov_b32 s7, 0xf000 274; GFX6-NEXT: s_waitcnt lgkmcnt(0) 275; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 276; GFX6-NEXT: s_waitcnt lgkmcnt(0) 277; GFX6-NEXT: s_bfe_i32 s0, s0, 0x180008 278; GFX6-NEXT: v_mov_b32_e32 v0, s0 279; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 280; GFX6-NEXT: s_endpgm 281 %x = load i32, i32 addrspace(1)* %in, align 4 282 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 8, i32 24) 283 store i32 %bfe, i32 addrspace(1)* %out, align 4 284 ret void 285} 286 287define amdgpu_kernel void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 288; GFX6-LABEL: bfe_i32_test_12: 289; GFX6: ; %bb.0: 290; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 291; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 292; GFX6-NEXT: s_mov_b32 s6, -1 293; GFX6-NEXT: s_mov_b32 s7, 0xf000 294; GFX6-NEXT: s_waitcnt lgkmcnt(0) 295; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 296; GFX6-NEXT: s_waitcnt lgkmcnt(0) 297; GFX6-NEXT: s_bfe_i32 s0, s0, 0x80018 298; GFX6-NEXT: v_mov_b32_e32 v0, s0 299; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 300; GFX6-NEXT: s_endpgm 301 %x = load i32, i32 addrspace(1)* %in, align 4 302 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 24, i32 8) 303 store i32 %bfe, i32 addrspace(1)* %out, align 4 304 ret void 305} 306 307define amdgpu_kernel void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 308; GFX6-LABEL: bfe_i32_test_13: 309; GFX6: ; %bb.0: 310; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 311; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 312; GFX6-NEXT: s_mov_b32 s6, -1 313; GFX6-NEXT: s_mov_b32 s7, 0xf000 314; GFX6-NEXT: s_waitcnt lgkmcnt(0) 315; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 316; GFX6-NEXT: s_waitcnt lgkmcnt(0) 317; GFX6-NEXT: s_ashr_i32 s0, s0, 31 318; GFX6-NEXT: s_bfe_i32 s0, s0, 0x1001f 319; GFX6-NEXT: v_mov_b32_e32 v0, s0 320; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 321; GFX6-NEXT: s_endpgm 322 %x = load i32, i32 addrspace(1)* %in, align 4 323 %shl = ashr i32 %x, 31 324 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1) 325 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 326} 327 328define amdgpu_kernel void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 329; GFX6-LABEL: bfe_i32_test_14: 330; GFX6: ; %bb.0: 331; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 332; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 333; GFX6-NEXT: s_mov_b32 s6, -1 334; GFX6-NEXT: s_mov_b32 s7, 0xf000 335; GFX6-NEXT: s_waitcnt lgkmcnt(0) 336; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 337; GFX6-NEXT: s_waitcnt lgkmcnt(0) 338; GFX6-NEXT: s_lshr_b32 s0, s0, 31 339; GFX6-NEXT: s_bfe_i32 s0, s0, 0x1001f 340; GFX6-NEXT: v_mov_b32_e32 v0, s0 341; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 342; GFX6-NEXT: s_endpgm 343 %x = load i32, i32 addrspace(1)* %in, align 4 344 %shl = lshr i32 %x, 31 345 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1) 346 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 347} 348 349define amdgpu_kernel void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) #0 { 350; GFX6-LABEL: bfe_i32_constant_fold_test_0: 351; GFX6: ; %bb.0: 352; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 353; GFX6-NEXT: s_bfe_i32 s2, 0, 0 354; GFX6-NEXT: v_mov_b32_e32 v0, s2 355; GFX6-NEXT: s_mov_b32 s2, -1 356; GFX6-NEXT: s_mov_b32 s3, 0xf000 357; GFX6-NEXT: s_waitcnt lgkmcnt(0) 358; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 359; GFX6-NEXT: s_endpgm 360 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 0) 361 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 362 ret void 363} 364 365define amdgpu_kernel void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) #0 { 366; GFX6-LABEL: bfe_i32_constant_fold_test_1: 367; GFX6: ; %bb.0: 368; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 369; GFX6-NEXT: s_bfe_i32 s2, 0x302e, 0 370; GFX6-NEXT: v_mov_b32_e32 v0, s2 371; GFX6-NEXT: s_mov_b32 s2, -1 372; GFX6-NEXT: s_mov_b32 s3, 0xf000 373; GFX6-NEXT: s_waitcnt lgkmcnt(0) 374; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 375; GFX6-NEXT: s_endpgm 376 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 12334, i32 0, i32 0) 377 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 378 ret void 379} 380 381define amdgpu_kernel void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) #0 { 382; GFX6-LABEL: bfe_i32_constant_fold_test_2: 383; GFX6: ; %bb.0: 384; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 385; GFX6-NEXT: s_bfe_i32 s2, 0, 0x10000 386; GFX6-NEXT: v_mov_b32_e32 v0, s2 387; GFX6-NEXT: s_mov_b32 s2, -1 388; GFX6-NEXT: s_mov_b32 s3, 0xf000 389; GFX6-NEXT: s_waitcnt lgkmcnt(0) 390; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 391; GFX6-NEXT: s_endpgm 392 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 1) 393 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 394 ret void 395} 396 397define amdgpu_kernel void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) #0 { 398; GFX6-LABEL: bfe_i32_constant_fold_test_3: 399; GFX6: ; %bb.0: 400; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 401; GFX6-NEXT: s_bfe_i32 s2, 1, 0x10000 402; GFX6-NEXT: v_mov_b32_e32 v0, s2 403; GFX6-NEXT: s_mov_b32 s2, -1 404; GFX6-NEXT: s_mov_b32 s3, 0xf000 405; GFX6-NEXT: s_waitcnt lgkmcnt(0) 406; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 407; GFX6-NEXT: s_endpgm 408 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 1, i32 0, i32 1) 409 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 410 ret void 411} 412 413define amdgpu_kernel void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) #0 { 414; GFX6-LABEL: bfe_i32_constant_fold_test_4: 415; GFX6: ; %bb.0: 416; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 417; GFX6-NEXT: s_bfe_i32 s2, -1, 0x10000 418; GFX6-NEXT: v_mov_b32_e32 v0, s2 419; GFX6-NEXT: s_mov_b32 s2, -1 420; GFX6-NEXT: s_mov_b32 s3, 0xf000 421; GFX6-NEXT: s_waitcnt lgkmcnt(0) 422; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 423; GFX6-NEXT: s_endpgm 424 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 0, i32 1) 425 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 426 ret void 427} 428 429define amdgpu_kernel void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) #0 { 430; GFX6-LABEL: bfe_i32_constant_fold_test_5: 431; GFX6: ; %bb.0: 432; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 433; GFX6-NEXT: s_mov_b32 s2, 0x10007 434; GFX6-NEXT: s_bfe_i32 s2, 0x80, s2 435; GFX6-NEXT: v_mov_b32_e32 v0, s2 436; GFX6-NEXT: s_mov_b32 s2, -1 437; GFX6-NEXT: s_mov_b32 s3, 0xf000 438; GFX6-NEXT: s_waitcnt lgkmcnt(0) 439; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 440; GFX6-NEXT: s_endpgm 441 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 7, i32 1) 442 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 443 ret void 444} 445 446define amdgpu_kernel void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) #0 { 447; GFX6-LABEL: bfe_i32_constant_fold_test_6: 448; GFX6: ; %bb.0: 449; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 450; GFX6-NEXT: s_mov_b32 s2, 0x80000 451; GFX6-NEXT: s_bfe_i32 s2, 0x80, s2 452; GFX6-NEXT: v_mov_b32_e32 v0, s2 453; GFX6-NEXT: s_mov_b32 s2, -1 454; GFX6-NEXT: s_mov_b32 s3, 0xf000 455; GFX6-NEXT: s_waitcnt lgkmcnt(0) 456; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 457; GFX6-NEXT: s_endpgm 458 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 0, i32 8) 459 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 460 ret void 461} 462 463define amdgpu_kernel void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) #0 { 464; GFX6-LABEL: bfe_i32_constant_fold_test_7: 465; GFX6: ; %bb.0: 466; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 467; GFX6-NEXT: s_mov_b32 s2, 0x80000 468; GFX6-NEXT: s_bfe_i32 s2, 0x7f, s2 469; GFX6-NEXT: v_mov_b32_e32 v0, s2 470; GFX6-NEXT: s_mov_b32 s2, -1 471; GFX6-NEXT: s_mov_b32 s3, 0xf000 472; GFX6-NEXT: s_waitcnt lgkmcnt(0) 473; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 474; GFX6-NEXT: s_endpgm 475 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 0, i32 8) 476 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 477 ret void 478} 479 480define amdgpu_kernel void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) #0 { 481; GFX6-LABEL: bfe_i32_constant_fold_test_8: 482; GFX6: ; %bb.0: 483; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 484; GFX6-NEXT: s_mov_b32 s2, 0x80006 485; GFX6-NEXT: s_bfe_i32 s2, 0x7f, s2 486; GFX6-NEXT: v_mov_b32_e32 v0, s2 487; GFX6-NEXT: s_mov_b32 s2, -1 488; GFX6-NEXT: s_mov_b32 s3, 0xf000 489; GFX6-NEXT: s_waitcnt lgkmcnt(0) 490; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 491; GFX6-NEXT: s_endpgm 492 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 6, i32 8) 493 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 494 ret void 495} 496 497define amdgpu_kernel void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) #0 { 498; GFX6-LABEL: bfe_i32_constant_fold_test_9: 499; GFX6: ; %bb.0: 500; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 501; GFX6-NEXT: s_mov_b32 s2, 0x80010 502; GFX6-NEXT: s_bfe_i32 s2, 0x10000, s2 503; GFX6-NEXT: v_mov_b32_e32 v0, s2 504; GFX6-NEXT: s_mov_b32 s2, -1 505; GFX6-NEXT: s_mov_b32 s3, 0xf000 506; GFX6-NEXT: s_waitcnt lgkmcnt(0) 507; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 508; GFX6-NEXT: s_endpgm 509 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65536, i32 16, i32 8) 510 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 511 ret void 512} 513 514define amdgpu_kernel void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) #0 { 515; GFX6-LABEL: bfe_i32_constant_fold_test_10: 516; GFX6: ; %bb.0: 517; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 518; GFX6-NEXT: s_mov_b32 s2, 0x100010 519; GFX6-NEXT: s_bfe_i32 s2, 0xffff, s2 520; GFX6-NEXT: v_mov_b32_e32 v0, s2 521; GFX6-NEXT: s_mov_b32 s2, -1 522; GFX6-NEXT: s_mov_b32 s3, 0xf000 523; GFX6-NEXT: s_waitcnt lgkmcnt(0) 524; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 525; GFX6-NEXT: s_endpgm 526 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65535, i32 16, i32 16) 527 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 528 ret void 529} 530 531define amdgpu_kernel void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) #0 { 532; GFX6-LABEL: bfe_i32_constant_fold_test_11: 533; GFX6: ; %bb.0: 534; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 535; GFX6-NEXT: s_mov_b32 s2, 0x40004 536; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2 537; GFX6-NEXT: v_mov_b32_e32 v0, s2 538; GFX6-NEXT: s_mov_b32 s2, -1 539; GFX6-NEXT: s_mov_b32 s3, 0xf000 540; GFX6-NEXT: s_waitcnt lgkmcnt(0) 541; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 542; GFX6-NEXT: s_endpgm 543 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 4) 544 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 545 ret void 546} 547 548define amdgpu_kernel void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) #0 { 549; GFX6-LABEL: bfe_i32_constant_fold_test_12: 550; GFX6: ; %bb.0: 551; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 552; GFX6-NEXT: s_mov_b32 s2, 0x1001f 553; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2 554; GFX6-NEXT: v_mov_b32_e32 v0, s2 555; GFX6-NEXT: s_mov_b32 s2, -1 556; GFX6-NEXT: s_mov_b32 s3, 0xf000 557; GFX6-NEXT: s_waitcnt lgkmcnt(0) 558; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 559; GFX6-NEXT: s_endpgm 560 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 31, i32 1) 561 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 562 ret void 563} 564 565define amdgpu_kernel void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) #0 { 566; GFX6-LABEL: bfe_i32_constant_fold_test_13: 567; GFX6: ; %bb.0: 568; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 569; GFX6-NEXT: s_mov_b32 s2, 0x100010 570; GFX6-NEXT: s_bfe_i32 s2, 0x1fffe, s2 571; GFX6-NEXT: v_mov_b32_e32 v0, s2 572; GFX6-NEXT: s_mov_b32 s2, -1 573; GFX6-NEXT: s_mov_b32 s3, 0xf000 574; GFX6-NEXT: s_waitcnt lgkmcnt(0) 575; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 576; GFX6-NEXT: s_endpgm 577 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 131070, i32 16, i32 16) 578 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 579 ret void 580} 581 582define amdgpu_kernel void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) #0 { 583; GFX6-LABEL: bfe_i32_constant_fold_test_14: 584; GFX6: ; %bb.0: 585; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 586; GFX6-NEXT: s_mov_b32 s2, 0x1e0002 587; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2 588; GFX6-NEXT: v_mov_b32_e32 v0, s2 589; GFX6-NEXT: s_mov_b32 s2, -1 590; GFX6-NEXT: s_mov_b32 s3, 0xf000 591; GFX6-NEXT: s_waitcnt lgkmcnt(0) 592; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 593; GFX6-NEXT: s_endpgm 594 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 2, i32 30) 595 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 596 ret void 597} 598 599define amdgpu_kernel void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) #0 { 600; GFX6-LABEL: bfe_i32_constant_fold_test_15: 601; GFX6: ; %bb.0: 602; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 603; GFX6-NEXT: s_mov_b32 s2, 0x1c0004 604; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2 605; GFX6-NEXT: v_mov_b32_e32 v0, s2 606; GFX6-NEXT: s_mov_b32 s2, -1 607; GFX6-NEXT: s_mov_b32 s3, 0xf000 608; GFX6-NEXT: s_waitcnt lgkmcnt(0) 609; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 610; GFX6-NEXT: s_endpgm 611 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 28) 612 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 613 ret void 614} 615 616define amdgpu_kernel void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) #0 { 617; GFX6-LABEL: bfe_i32_constant_fold_test_16: 618; GFX6: ; %bb.0: 619; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 620; GFX6-NEXT: s_bfe_i32 s2, -1, 0x70001 621; GFX6-NEXT: v_mov_b32_e32 v0, s2 622; GFX6-NEXT: s_mov_b32 s2, -1 623; GFX6-NEXT: s_mov_b32 s3, 0xf000 624; GFX6-NEXT: s_waitcnt lgkmcnt(0) 625; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 626; GFX6-NEXT: s_endpgm 627 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 1, i32 7) 628 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 629 ret void 630} 631 632define amdgpu_kernel void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) #0 { 633; GFX6-LABEL: bfe_i32_constant_fold_test_17: 634; GFX6: ; %bb.0: 635; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 636; GFX6-NEXT: s_mov_b32 s2, 0x1f0001 637; GFX6-NEXT: s_bfe_i32 s2, 0xff, s2 638; GFX6-NEXT: v_mov_b32_e32 v0, s2 639; GFX6-NEXT: s_mov_b32 s2, -1 640; GFX6-NEXT: s_mov_b32 s3, 0xf000 641; GFX6-NEXT: s_waitcnt lgkmcnt(0) 642; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 643; GFX6-NEXT: s_endpgm 644 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 1, i32 31) 645 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 646 ret void 647} 648 649define amdgpu_kernel void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) #0 { 650; GFX6-LABEL: bfe_i32_constant_fold_test_18: 651; GFX6: ; %bb.0: 652; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 653; GFX6-NEXT: s_mov_b32 s2, 0x1001f 654; GFX6-NEXT: s_bfe_i32 s2, 0xff, s2 655; GFX6-NEXT: v_mov_b32_e32 v0, s2 656; GFX6-NEXT: s_mov_b32 s2, -1 657; GFX6-NEXT: s_mov_b32 s3, 0xf000 658; GFX6-NEXT: s_waitcnt lgkmcnt(0) 659; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 660; GFX6-NEXT: s_endpgm 661 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 31, i32 1) 662 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 663 ret void 664} 665 666define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 667; GFX6-LABEL: bfe_sext_in_reg_i24: 668; GFX6: ; %bb.0: 669; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 670; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 671; GFX6-NEXT: s_mov_b32 s6, -1 672; GFX6-NEXT: s_mov_b32 s7, 0xf000 673; GFX6-NEXT: s_waitcnt lgkmcnt(0) 674; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 675; GFX6-NEXT: s_waitcnt lgkmcnt(0) 676; GFX6-NEXT: s_bfe_i32 s0, s0, 0x180000 677; GFX6-NEXT: s_bfe_i32 s0, s0, 0x180000 678; GFX6-NEXT: v_mov_b32_e32 v0, s0 679; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 680; GFX6-NEXT: s_endpgm 681 %x = load i32, i32 addrspace(1)* %in, align 4 682 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 0, i32 24) 683 %shl = shl i32 %bfe, 8 684 %ashr = ashr i32 %shl, 8 685 store i32 %ashr, i32 addrspace(1)* %out, align 4 686 ret void 687} 688 689define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 690; GFX6-LABEL: simplify_demanded_bfe_sdiv: 691; GFX6: ; %bb.0: 692; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, 2.0 693; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb 694; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 695; GFX6-NEXT: s_mov_b32 s6, -1 696; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 697; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 698; GFX6-NEXT: s_waitcnt lgkmcnt(0) 699; GFX6-NEXT: s_load_dword s2, s[2:3], 0x0 700; GFX6-NEXT: s_mov_b32 s7, 0xf000 701; GFX6-NEXT: v_mul_lo_u32 v1, -2, v0 702; GFX6-NEXT: s_waitcnt lgkmcnt(0) 703; GFX6-NEXT: s_bfe_i32 s2, s2, 0x100001 704; GFX6-NEXT: s_ashr_i32 s3, s2, 31 705; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 706; GFX6-NEXT: s_add_i32 s2, s2, s3 707; GFX6-NEXT: s_xor_b32 s2, s2, s3 708; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 709; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 710; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v0 711; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 712; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s2, v1 713; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 2, v1 714; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 715; GFX6-NEXT: v_subrev_i32_e64 v2, s[0:1], 2, v1 716; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 717; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 718; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 2, v1 719; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 720; GFX6-NEXT: v_xor_b32_e32 v0, s3, v0 721; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 722; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 723; GFX6-NEXT: s_endpgm 724 %src = load i32, i32 addrspace(1)* %in, align 4 725 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 1, i32 16) 726 %div = sdiv i32 %bfe, 2 727 store i32 %div, i32 addrspace(1)* %out, align 4 728 ret void 729} 730 731define amdgpu_kernel void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 732; GFX6-LABEL: bfe_0_width: 733; GFX6: ; %bb.0: 734; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 735; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 736; GFX6-NEXT: s_mov_b32 s6, -1 737; GFX6-NEXT: s_mov_b32 s7, 0xf000 738; GFX6-NEXT: s_waitcnt lgkmcnt(0) 739; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 740; GFX6-NEXT: s_waitcnt lgkmcnt(0) 741; GFX6-NEXT: s_bfe_i32 s0, s0, 8 742; GFX6-NEXT: v_mov_b32_e32 v0, s0 743; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 744; GFX6-NEXT: s_endpgm 745 %load = load i32, i32 addrspace(1)* %ptr, align 4 746 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 8, i32 0) 747 store i32 %bfe, i32 addrspace(1)* %out, align 4 748 ret void 749} 750 751define amdgpu_kernel void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 752; GFX6-LABEL: bfe_8_bfe_8: 753; GFX6: ; %bb.0: 754; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 755; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 756; GFX6-NEXT: s_mov_b32 s6, -1 757; GFX6-NEXT: s_mov_b32 s7, 0xf000 758; GFX6-NEXT: s_waitcnt lgkmcnt(0) 759; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 760; GFX6-NEXT: s_mov_b32 s1, 0x80000 761; GFX6-NEXT: s_waitcnt lgkmcnt(0) 762; GFX6-NEXT: s_bfe_i32 s0, s0, s1 763; GFX6-NEXT: s_bfe_i32 s0, s0, s1 764; GFX6-NEXT: v_mov_b32_e32 v0, s0 765; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 766; GFX6-NEXT: s_endpgm 767 %load = load i32, i32 addrspace(1)* %ptr, align 4 768 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8) 769 %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8) 770 store i32 %bfe1, i32 addrspace(1)* %out, align 4 771 ret void 772} 773 774define amdgpu_kernel void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 775; GFX6-LABEL: bfe_8_bfe_16: 776; GFX6: ; %bb.0: 777; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 778; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 779; GFX6-NEXT: s_mov_b32 s6, -1 780; GFX6-NEXT: s_mov_b32 s7, 0xf000 781; GFX6-NEXT: s_waitcnt lgkmcnt(0) 782; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 783; GFX6-NEXT: s_waitcnt lgkmcnt(0) 784; GFX6-NEXT: s_bfe_i32 s0, s0, 0x80000 785; GFX6-NEXT: s_bfe_i32 s0, s0, 0x100000 786; GFX6-NEXT: v_mov_b32_e32 v0, s0 787; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 788; GFX6-NEXT: s_endpgm 789 %load = load i32, i32 addrspace(1)* %ptr, align 4 790 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8) 791 %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 16) 792 store i32 %bfe1, i32 addrspace(1)* %out, align 4 793 ret void 794} 795 796; This really should be folded into 1 797define amdgpu_kernel void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 798; GFX6-LABEL: bfe_16_bfe_8: 799; GFX6: ; %bb.0: 800; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 801; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 802; GFX6-NEXT: s_mov_b32 s6, -1 803; GFX6-NEXT: s_mov_b32 s7, 0xf000 804; GFX6-NEXT: s_waitcnt lgkmcnt(0) 805; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 806; GFX6-NEXT: s_waitcnt lgkmcnt(0) 807; GFX6-NEXT: s_bfe_i32 s0, s0, 0x100000 808; GFX6-NEXT: s_bfe_i32 s0, s0, 0x80000 809; GFX6-NEXT: v_mov_b32_e32 v0, s0 810; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 811; GFX6-NEXT: s_endpgm 812 %load = load i32, i32 addrspace(1)* %ptr, align 4 813 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 16) 814 %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8) 815 store i32 %bfe1, i32 addrspace(1)* %out, align 4 816 ret void 817} 818 819; Make sure there isn't a redundant BFE 820define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 821; GFX6-LABEL: sext_in_reg_i8_to_i32_bfe: 822; GFX6: ; %bb.0: 823; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 824; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 825; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 826; GFX6-NEXT: s_mov_b32 s6, -1 827; GFX6-NEXT: s_mov_b32 s7, 0xf000 828; GFX6-NEXT: s_waitcnt lgkmcnt(0) 829; GFX6-NEXT: s_add_i32 s2, s2, s0 830; GFX6-NEXT: s_bfe_i32 s0, s2, 0x80000 831; GFX6-NEXT: s_sext_i32_i8 s0, s0 832; GFX6-NEXT: v_mov_b32_e32 v0, s0 833; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 834; GFX6-NEXT: s_endpgm 835 %c = add i32 %a, %b ; add to prevent folding into extload 836 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 0, i32 8) 837 %shl = shl i32 %bfe, 24 838 %ashr = ashr i32 %shl, 24 839 store i32 %ashr, i32 addrspace(1)* %out, align 4 840 ret void 841} 842 843define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 844; GFX6-LABEL: sext_in_reg_i8_to_i32_bfe_wrong: 845; GFX6: ; %bb.0: 846; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 847; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 848; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 849; GFX6-NEXT: s_mov_b32 s6, -1 850; GFX6-NEXT: s_mov_b32 s7, 0xf000 851; GFX6-NEXT: s_waitcnt lgkmcnt(0) 852; GFX6-NEXT: s_add_i32 s2, s2, s0 853; GFX6-NEXT: s_bfe_i32 s0, s2, 8 854; GFX6-NEXT: s_sext_i32_i8 s0, s0 855; GFX6-NEXT: v_mov_b32_e32 v0, s0 856; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 857; GFX6-NEXT: s_endpgm 858 %c = add i32 %a, %b ; add to prevent folding into extload 859 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 8, i32 0) 860 %shl = shl i32 %bfe, 24 861 %ashr = ashr i32 %shl, 24 862 store i32 %ashr, i32 addrspace(1)* %out, align 4 863 ret void 864} 865 866define amdgpu_kernel void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 { 867; GFX6-LABEL: sextload_i8_to_i32_bfe: 868; GFX6: ; %bb.0: 869; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 870; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 871; GFX6-NEXT: s_mov_b32 s2, -1 872; GFX6-NEXT: s_mov_b32 s3, 0xf000 873; GFX6-NEXT: s_mov_b64 s[6:7], s[2:3] 874; GFX6-NEXT: s_waitcnt lgkmcnt(0) 875; GFX6-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 876; GFX6-NEXT: s_waitcnt vmcnt(0) 877; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 878; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 879; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 880; GFX6-NEXT: s_endpgm 881 %load = load i8, i8 addrspace(1)* %ptr, align 1 882 %sext = sext i8 %load to i32 883 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 0, i32 8) 884 %shl = shl i32 %bfe, 24 885 %ashr = ashr i32 %shl, 24 886 store i32 %ashr, i32 addrspace(1)* %out, align 4 887 ret void 888} 889 890define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 { 891; GFX6-LABEL: sextload_i8_to_i32_bfe_0: 892; GFX6: ; %bb.0: 893; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 894; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 895; GFX6-NEXT: s_mov_b32 s2, -1 896; GFX6-NEXT: s_mov_b32 s3, 0xf000 897; GFX6-NEXT: s_mov_b64 s[6:7], s[2:3] 898; GFX6-NEXT: s_waitcnt lgkmcnt(0) 899; GFX6-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 900; GFX6-NEXT: s_waitcnt vmcnt(0) 901; GFX6-NEXT: v_bfe_i32 v0, v0, 8, 0 902; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 903; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 904; GFX6-NEXT: s_endpgm 905 %load = load i8, i8 addrspace(1)* %ptr, align 1 906 %sext = sext i8 %load to i32 907 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 8, i32 0) 908 %shl = shl i32 %bfe, 24 909 %ashr = ashr i32 %shl, 24 910 store i32 %ashr, i32 addrspace(1)* %out, align 4 911 ret void 912} 913 914define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 915; GFX6-LABEL: sext_in_reg_i1_bfe_offset_0: 916; GFX6: ; %bb.0: 917; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 918; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 919; GFX6-NEXT: s_mov_b32 s6, -1 920; GFX6-NEXT: s_mov_b32 s7, 0xf000 921; GFX6-NEXT: s_waitcnt lgkmcnt(0) 922; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 923; GFX6-NEXT: s_waitcnt lgkmcnt(0) 924; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10000 925; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10000 926; GFX6-NEXT: v_mov_b32_e32 v0, s0 927; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 928; GFX6-NEXT: s_endpgm 929 %x = load i32, i32 addrspace(1)* %in, align 4 930 %shl = shl i32 %x, 31 931 %shr = ashr i32 %shl, 31 932 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 0, i32 1) 933 store i32 %bfe, i32 addrspace(1)* %out, align 4 934 ret void 935} 936 937define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 938; GFX6-LABEL: sext_in_reg_i1_bfe_offset_1: 939; GFX6: ; %bb.0: 940; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 941; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 942; GFX6-NEXT: s_mov_b32 s6, -1 943; GFX6-NEXT: s_mov_b32 s7, 0xf000 944; GFX6-NEXT: s_waitcnt lgkmcnt(0) 945; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 946; GFX6-NEXT: s_waitcnt lgkmcnt(0) 947; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20000 948; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10001 949; GFX6-NEXT: v_mov_b32_e32 v0, s0 950; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 951; GFX6-NEXT: s_endpgm 952 %x = load i32, i32 addrspace(1)* %in, align 4 953 %shl = shl i32 %x, 30 954 %shr = ashr i32 %shl, 30 955 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 1, i32 1) 956 store i32 %bfe, i32 addrspace(1)* %out, align 4 957 ret void 958} 959 960define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 961; GFX6-LABEL: sext_in_reg_i2_bfe_offset_1: 962; GFX6: ; %bb.0: 963; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 964; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 965; GFX6-NEXT: s_mov_b32 s6, -1 966; GFX6-NEXT: s_mov_b32 s7, 0xf000 967; GFX6-NEXT: s_waitcnt lgkmcnt(0) 968; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 969; GFX6-NEXT: s_waitcnt lgkmcnt(0) 970; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20000 971; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20001 972; GFX6-NEXT: v_mov_b32_e32 v0, s0 973; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 974; GFX6-NEXT: s_endpgm 975 %x = load i32, i32 addrspace(1)* %in, align 4 976 %shl = shl i32 %x, 30 977 %shr = ashr i32 %shl, 30 978 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 1, i32 2) 979 store i32 %bfe, i32 addrspace(1)* %out, align 4 980 ret void 981} 982 983declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) #1 984declare i64 @llvm.amdgcn.sbfe.i64(i64, i32, i32) #1 985 986attributes #0 = { nounwind } 987attributes #1 = { nounwind readnone } 988