1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -amdgpu-load-store-vectorizer=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s 3 4define i32 @v_bfe_i32_arg_arg_arg(i32 %src0, i32 %src1, i32 %src2) #0 { 5; GFX6-LABEL: v_bfe_i32_arg_arg_arg: 6; GFX6: ; %bb.0: 7; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8; GFX6-NEXT: v_bfe_u32 v0, v0, v1, v2 9; GFX6-NEXT: s_setpc_b64 s[30:31] 10 %bfe_i32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2) 11 ret i32 %bfe_i32 12} 13 14define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 { 15; GFX6-LABEL: s_bfe_i32_arg_arg_arg: 16; GFX6: ; %bb.0: 17; GFX6-NEXT: s_and_b32 s1, s1, 63 18; GFX6-NEXT: s_lshl_b32 s2, s2, 16 19; GFX6-NEXT: s_or_b32 s1, s1, s2 20; GFX6-NEXT: s_bfe_u32 s0, s0, s1 21; GFX6-NEXT: ; return to shader part epilog 22 %bfe_i32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2) 23 ret i32 %bfe_i32 24} 25 26; TODO: Need to expand this. 27; define i64 @v_bfe_i64_arg_arg_arg(i64 %src0, i32 %src1, i32 %src2) #0 { 28; %bfe_i64 = call i32 @llvm.amdgcn.ubfe.i64(i32 %src0, i32 %src1, i32 %src2) 29; ret i64 %bfe_i64 30; } 31 32define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 { 33; GFX6-LABEL: s_bfe_i64_arg_arg_arg: 34; GFX6: ; %bb.0: 35; GFX6-NEXT: s_and_b32 s2, s2, 63 36; GFX6-NEXT: s_lshl_b32 s3, s3, 16 37; GFX6-NEXT: s_or_b32 s2, s2, s3 38; GFX6-NEXT: s_bfe_u64 s[0:1], s[0:1], s2 39; GFX6-NEXT: ; return to shader part epilog 40 %bfe_i32 = call i64 @llvm.amdgcn.ubfe.i64(i64 %src0, i32 %src1, i32 %src2) 41 ret i64 %bfe_i32 42} 43 44define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 { 45; GFX6-LABEL: bfe_u32_arg_arg_arg: 46; GFX6: ; %bb.0: 47; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 48; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 49; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 50; GFX6-NEXT: s_mov_b32 s6, -1 51; GFX6-NEXT: s_mov_b32 s7, 0xf000 52; GFX6-NEXT: s_waitcnt lgkmcnt(0) 53; GFX6-NEXT: s_and_b32 s1, s0, 63 54; GFX6-NEXT: s_lshl_b32 s0, s0, 16 55; GFX6-NEXT: s_or_b32 s0, s1, s0 56; GFX6-NEXT: s_bfe_u32 s0, s2, s0 57; GFX6-NEXT: v_mov_b32_e32 v0, s0 58; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 59; GFX6-NEXT: s_endpgm 60 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src1) 61 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 62 ret void 63} 64 65define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 66; GFX6-LABEL: bfe_u32_arg_arg_imm: 67; GFX6: ; %bb.0: 68; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 69; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 70; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 71; GFX6-NEXT: s_mov_b32 s6, -1 72; GFX6-NEXT: s_mov_b32 s7, 0xf000 73; GFX6-NEXT: s_waitcnt lgkmcnt(0) 74; GFX6-NEXT: s_and_b32 s0, s0, 63 75; GFX6-NEXT: s_or_b32 s0, s0, 0x7b0000 76; GFX6-NEXT: s_bfe_u32 s0, s2, s0 77; GFX6-NEXT: v_mov_b32_e32 v0, s0 78; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 79; GFX6-NEXT: s_endpgm 80 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 123) 81 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 82 ret void 83} 84 85define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 { 86; GFX6-LABEL: bfe_u32_arg_imm_arg: 87; GFX6: ; %bb.0: 88; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 89; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 90; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 91; GFX6-NEXT: s_mov_b32 s6, -1 92; GFX6-NEXT: s_mov_b32 s7, 0xf000 93; GFX6-NEXT: s_waitcnt lgkmcnt(0) 94; GFX6-NEXT: s_lshl_b32 s0, s0, 16 95; GFX6-NEXT: s_or_b32 s0, 59, s0 96; GFX6-NEXT: s_bfe_u32 s0, s2, s0 97; GFX6-NEXT: v_mov_b32_e32 v0, s0 98; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 99; GFX6-NEXT: s_endpgm 100 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 123, i32 %src2) 101 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 102 ret void 103} 104 105define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 { 106; GFX6-LABEL: bfe_u32_imm_arg_arg: 107; GFX6: ; %bb.0: 108; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 109; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 110; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 111; GFX6-NEXT: s_mov_b32 s6, -1 112; GFX6-NEXT: s_mov_b32 s7, 0xf000 113; GFX6-NEXT: s_waitcnt lgkmcnt(0) 114; GFX6-NEXT: s_and_b32 s1, s2, 63 115; GFX6-NEXT: s_lshl_b32 s0, s0, 16 116; GFX6-NEXT: s_or_b32 s0, s1, s0 117; GFX6-NEXT: s_bfe_u32 s0, 0x7b, s0 118; GFX6-NEXT: v_mov_b32_e32 v0, s0 119; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 120; GFX6-NEXT: s_endpgm 121 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 123, i32 %src1, i32 %src2) 122 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 123 ret void 124} 125 126define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 127; GFX6-LABEL: bfe_u32_arg_0_width_reg_offset: 128; GFX6: ; %bb.0: 129; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 130; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 131; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 132; GFX6-NEXT: s_mov_b32 s6, -1 133; GFX6-NEXT: s_mov_b32 s7, 0xf000 134; GFX6-NEXT: s_waitcnt lgkmcnt(0) 135; GFX6-NEXT: s_and_b32 s0, s0, 63 136; GFX6-NEXT: s_bfe_u32 s0, s2, s0 137; GFX6-NEXT: v_mov_b32_e32 v0, s0 138; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 139; GFX6-NEXT: s_endpgm 140 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 0) 141 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 142 ret void 143} 144 145define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 146; GFX6-LABEL: bfe_u32_arg_0_width_imm_offset: 147; GFX6: ; %bb.0: 148; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 149; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 150; GFX6-NEXT: s_mov_b32 s6, -1 151; GFX6-NEXT: s_mov_b32 s7, 0xf000 152; GFX6-NEXT: s_waitcnt lgkmcnt(0) 153; GFX6-NEXT: s_bfe_u32 s0, s0, 8 154; GFX6-NEXT: v_mov_b32_e32 v0, s0 155; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 156; GFX6-NEXT: s_endpgm 157 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 8, i32 0) 158 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 159 ret void 160} 161 162define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { 163; GFX6-LABEL: bfe_u32_zextload_i8: 164; GFX6: ; %bb.0: 165; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 166; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 167; GFX6-NEXT: s_mov_b32 s2, -1 168; GFX6-NEXT: s_mov_b32 s3, 0xf000 169; GFX6-NEXT: s_mov_b64 s[6:7], s[2:3] 170; GFX6-NEXT: s_waitcnt lgkmcnt(0) 171; GFX6-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 172; GFX6-NEXT: s_waitcnt vmcnt(0) 173; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 8 174; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 175; GFX6-NEXT: s_endpgm 176 %load = load i8, i8 addrspace(1)* %in 177 %ext = zext i8 %load to i32 178 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8) 179 store i32 %bfe, i32 addrspace(1)* %out, align 4 180 ret void 181} 182 183; FIXME: Should be using s_add_i32 184define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 185; GFX6-LABEL: bfe_u32_zext_in_reg_i8: 186; GFX6: ; %bb.0: 187; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 188; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 189; GFX6-NEXT: s_mov_b32 s6, -1 190; GFX6-NEXT: s_mov_b32 s7, 0xf000 191; GFX6-NEXT: s_waitcnt lgkmcnt(0) 192; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 193; GFX6-NEXT: s_waitcnt lgkmcnt(0) 194; GFX6-NEXT: s_add_i32 s0, s0, 1 195; GFX6-NEXT: s_and_b32 s0, s0, 0xff 196; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80000 197; GFX6-NEXT: v_mov_b32_e32 v0, s0 198; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 199; GFX6-NEXT: s_endpgm 200 %load = load i32, i32 addrspace(1)* %in, align 4 201 %add = add i32 %load, 1 202 %ext = and i32 %add, 255 203 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8) 204 store i32 %bfe, i32 addrspace(1)* %out, align 4 205 ret void 206} 207 208define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 209; GFX6-LABEL: bfe_u32_zext_in_reg_i16: 210; GFX6: ; %bb.0: 211; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 212; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 213; GFX6-NEXT: s_mov_b32 s6, -1 214; GFX6-NEXT: s_mov_b32 s7, 0xf000 215; GFX6-NEXT: s_waitcnt lgkmcnt(0) 216; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 217; GFX6-NEXT: s_waitcnt lgkmcnt(0) 218; GFX6-NEXT: s_add_i32 s0, s0, 1 219; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 220; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 221; GFX6-NEXT: v_mov_b32_e32 v0, s0 222; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 223; GFX6-NEXT: s_endpgm 224 %load = load i32, i32 addrspace(1)* %in, align 4 225 %add = add i32 %load, 1 226 %ext = and i32 %add, 65535 227 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 16) 228 store i32 %bfe, i32 addrspace(1)* %out, align 4 229 ret void 230} 231 232define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 233; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_1: 234; GFX6: ; %bb.0: 235; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 236; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 237; GFX6-NEXT: s_mov_b32 s6, -1 238; GFX6-NEXT: s_mov_b32 s7, 0xf000 239; GFX6-NEXT: s_waitcnt lgkmcnt(0) 240; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 241; GFX6-NEXT: s_waitcnt lgkmcnt(0) 242; GFX6-NEXT: s_add_i32 s0, s0, 1 243; GFX6-NEXT: s_and_b32 s0, s0, 0xff 244; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80001 245; GFX6-NEXT: v_mov_b32_e32 v0, s0 246; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 247; GFX6-NEXT: s_endpgm 248 %load = load i32, i32 addrspace(1)* %in, align 4 249 %add = add i32 %load, 1 250 %ext = and i32 %add, 255 251 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 1, i32 8) 252 store i32 %bfe, i32 addrspace(1)* %out, align 4 253 ret void 254} 255 256define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 257; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_3: 258; GFX6: ; %bb.0: 259; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 260; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 261; GFX6-NEXT: s_mov_b32 s6, -1 262; GFX6-NEXT: s_mov_b32 s7, 0xf000 263; GFX6-NEXT: s_waitcnt lgkmcnt(0) 264; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 265; GFX6-NEXT: s_waitcnt lgkmcnt(0) 266; GFX6-NEXT: s_add_i32 s0, s0, 1 267; GFX6-NEXT: s_and_b32 s0, s0, 0xff 268; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80003 269; GFX6-NEXT: v_mov_b32_e32 v0, s0 270; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 271; GFX6-NEXT: s_endpgm 272 %load = load i32, i32 addrspace(1)* %in, align 4 273 %add = add i32 %load, 1 274 %ext = and i32 %add, 255 275 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 3, i32 8) 276 store i32 %bfe, i32 addrspace(1)* %out, align 4 277 ret void 278} 279 280define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 281; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_7: 282; GFX6: ; %bb.0: 283; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 284; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 285; GFX6-NEXT: s_mov_b32 s6, -1 286; GFX6-NEXT: s_mov_b32 s7, 0xf000 287; GFX6-NEXT: s_waitcnt lgkmcnt(0) 288; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 289; GFX6-NEXT: s_waitcnt lgkmcnt(0) 290; GFX6-NEXT: s_add_i32 s0, s0, 1 291; GFX6-NEXT: s_and_b32 s0, s0, 0xff 292; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80007 293; GFX6-NEXT: v_mov_b32_e32 v0, s0 294; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 295; GFX6-NEXT: s_endpgm 296 %load = load i32, i32 addrspace(1)* %in, align 4 297 %add = add i32 %load, 1 298 %ext = and i32 %add, 255 299 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 7, i32 8) 300 store i32 %bfe, i32 addrspace(1)* %out, align 4 301 ret void 302} 303 304define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 305; GFX6-LABEL: bfe_u32_zext_in_reg_i16_offset_8: 306; GFX6: ; %bb.0: 307; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 308; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 309; GFX6-NEXT: s_mov_b32 s6, -1 310; GFX6-NEXT: s_mov_b32 s7, 0xf000 311; GFX6-NEXT: s_waitcnt lgkmcnt(0) 312; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 313; GFX6-NEXT: s_waitcnt lgkmcnt(0) 314; GFX6-NEXT: s_add_i32 s0, s0, 1 315; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 316; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80008 317; GFX6-NEXT: v_mov_b32_e32 v0, s0 318; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 319; GFX6-NEXT: s_endpgm 320 %load = load i32, i32 addrspace(1)* %in, align 4 321 %add = add i32 %load, 1 322 %ext = and i32 %add, 65535 323 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 8, i32 8) 324 store i32 %bfe, i32 addrspace(1)* %out, align 4 325 ret void 326} 327 328define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 329; GFX6-LABEL: bfe_u32_test_1: 330; GFX6: ; %bb.0: 331; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 332; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 333; GFX6-NEXT: s_mov_b32 s6, -1 334; GFX6-NEXT: s_mov_b32 s7, 0xf000 335; GFX6-NEXT: s_waitcnt lgkmcnt(0) 336; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 337; GFX6-NEXT: s_waitcnt lgkmcnt(0) 338; GFX6-NEXT: s_bfe_u32 s0, s0, 0x10000 339; GFX6-NEXT: v_mov_b32_e32 v0, s0 340; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 341; GFX6-NEXT: s_endpgm 342 %x = load i32, i32 addrspace(1)* %in, align 4 343 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 0, i32 1) 344 store i32 %bfe, i32 addrspace(1)* %out, align 4 345 ret void 346} 347 348define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 349; GFX6-LABEL: bfe_u32_test_2: 350; GFX6: ; %bb.0: 351; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 352; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 353; GFX6-NEXT: s_mov_b32 s6, -1 354; GFX6-NEXT: s_mov_b32 s7, 0xf000 355; GFX6-NEXT: s_waitcnt lgkmcnt(0) 356; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 357; GFX6-NEXT: s_waitcnt lgkmcnt(0) 358; GFX6-NEXT: s_lshl_b32 s0, s0, 31 359; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80000 360; GFX6-NEXT: v_mov_b32_e32 v0, s0 361; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 362; GFX6-NEXT: s_endpgm 363 %x = load i32, i32 addrspace(1)* %in, align 4 364 %shl = shl i32 %x, 31 365 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 8) 366 store i32 %bfe, i32 addrspace(1)* %out, align 4 367 ret void 368} 369 370define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 371; GFX6-LABEL: bfe_u32_test_3: 372; GFX6: ; %bb.0: 373; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 374; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 375; GFX6-NEXT: s_mov_b32 s6, -1 376; GFX6-NEXT: s_mov_b32 s7, 0xf000 377; GFX6-NEXT: s_waitcnt lgkmcnt(0) 378; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 379; GFX6-NEXT: s_waitcnt lgkmcnt(0) 380; GFX6-NEXT: s_lshl_b32 s0, s0, 31 381; GFX6-NEXT: s_bfe_u32 s0, s0, 0x10000 382; GFX6-NEXT: v_mov_b32_e32 v0, s0 383; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 384; GFX6-NEXT: s_endpgm 385 %x = load i32, i32 addrspace(1)* %in, align 4 386 %shl = shl i32 %x, 31 387 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 1) 388 store i32 %bfe, i32 addrspace(1)* %out, align 4 389 ret void 390} 391 392define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 393; GFX6-LABEL: bfe_u32_test_4: 394; GFX6: ; %bb.0: 395; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 396; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 397; GFX6-NEXT: s_mov_b32 s6, -1 398; GFX6-NEXT: s_mov_b32 s7, 0xf000 399; GFX6-NEXT: s_waitcnt lgkmcnt(0) 400; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 401; GFX6-NEXT: s_waitcnt lgkmcnt(0) 402; GFX6-NEXT: s_bfe_u32 s0, s0, 0x10000 403; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1001f 404; GFX6-NEXT: v_mov_b32_e32 v0, s0 405; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 406; GFX6-NEXT: s_endpgm 407 %x = load i32, i32 addrspace(1)* %in, align 4 408 %shl = shl i32 %x, 31 409 %shr = lshr i32 %shl, 31 410 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 31, i32 1) 411 store i32 %bfe, i32 addrspace(1)* %out, align 4 412 ret void 413} 414 415define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 416; GFX6-LABEL: bfe_u32_test_5: 417; GFX6: ; %bb.0: 418; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 419; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 420; GFX6-NEXT: s_mov_b32 s6, -1 421; GFX6-NEXT: s_mov_b32 s7, 0xf000 422; GFX6-NEXT: s_waitcnt lgkmcnt(0) 423; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 424; GFX6-NEXT: s_waitcnt lgkmcnt(0) 425; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10000 426; GFX6-NEXT: s_bfe_u32 s0, s0, 0x10000 427; GFX6-NEXT: v_mov_b32_e32 v0, s0 428; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 429; GFX6-NEXT: s_endpgm 430 %x = load i32, i32 addrspace(1)* %in, align 4 431 %shl = shl i32 %x, 31 432 %shr = ashr i32 %shl, 31 433 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 0, i32 1) 434 store i32 %bfe, i32 addrspace(1)* %out, align 4 435 ret void 436} 437 438define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 439; GFX6-LABEL: bfe_u32_test_6: 440; GFX6: ; %bb.0: 441; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 442; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 443; GFX6-NEXT: s_mov_b32 s6, -1 444; GFX6-NEXT: s_mov_b32 s7, 0xf000 445; GFX6-NEXT: s_waitcnt lgkmcnt(0) 446; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 447; GFX6-NEXT: s_waitcnt lgkmcnt(0) 448; GFX6-NEXT: s_lshl_b32 s0, s0, 31 449; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1f0001 450; GFX6-NEXT: v_mov_b32_e32 v0, s0 451; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 452; GFX6-NEXT: s_endpgm 453 %x = load i32, i32 addrspace(1)* %in, align 4 454 %shl = shl i32 %x, 31 455 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 1, i32 31) 456 store i32 %bfe, i32 addrspace(1)* %out, align 4 457 ret void 458} 459 460define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 461; GFX6-LABEL: bfe_u32_test_7: 462; GFX6: ; %bb.0: 463; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 464; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 465; GFX6-NEXT: s_mov_b32 s6, -1 466; GFX6-NEXT: s_mov_b32 s7, 0xf000 467; GFX6-NEXT: s_waitcnt lgkmcnt(0) 468; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 469; GFX6-NEXT: s_waitcnt lgkmcnt(0) 470; GFX6-NEXT: s_lshl_b32 s0, s0, 31 471; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1f0000 472; GFX6-NEXT: v_mov_b32_e32 v0, s0 473; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 474; GFX6-NEXT: s_endpgm 475 %x = load i32, i32 addrspace(1)* %in, align 4 476 %shl = shl i32 %x, 31 477 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 31) 478 store i32 %bfe, i32 addrspace(1)* %out, align 4 479 ret void 480} 481 482define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 483; GFX6-LABEL: bfe_u32_test_8: 484; GFX6: ; %bb.0: 485; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 486; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 487; GFX6-NEXT: s_mov_b32 s6, -1 488; GFX6-NEXT: s_mov_b32 s7, 0xf000 489; GFX6-NEXT: s_waitcnt lgkmcnt(0) 490; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 491; GFX6-NEXT: s_waitcnt lgkmcnt(0) 492; GFX6-NEXT: s_lshl_b32 s0, s0, 31 493; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1001f 494; GFX6-NEXT: v_mov_b32_e32 v0, s0 495; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 496; GFX6-NEXT: s_endpgm 497 %x = load i32, i32 addrspace(1)* %in, align 4 498 %shl = shl i32 %x, 31 499 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) 500 store i32 %bfe, i32 addrspace(1)* %out, align 4 501 ret void 502} 503 504define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 505; GFX6-LABEL: bfe_u32_test_9: 506; GFX6: ; %bb.0: 507; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 508; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 509; GFX6-NEXT: s_mov_b32 s6, -1 510; GFX6-NEXT: s_mov_b32 s7, 0xf000 511; GFX6-NEXT: s_waitcnt lgkmcnt(0) 512; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 513; GFX6-NEXT: s_waitcnt lgkmcnt(0) 514; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1001f 515; GFX6-NEXT: v_mov_b32_e32 v0, s0 516; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 517; GFX6-NEXT: s_endpgm 518 %x = load i32, i32 addrspace(1)* %in, align 4 519 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 31, i32 1) 520 store i32 %bfe, i32 addrspace(1)* %out, align 4 521 ret void 522} 523 524define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 525; GFX6-LABEL: bfe_u32_test_10: 526; GFX6: ; %bb.0: 527; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 528; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 529; GFX6-NEXT: s_mov_b32 s6, -1 530; GFX6-NEXT: s_mov_b32 s7, 0xf000 531; GFX6-NEXT: s_waitcnt lgkmcnt(0) 532; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 533; GFX6-NEXT: s_waitcnt lgkmcnt(0) 534; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1f0001 535; GFX6-NEXT: v_mov_b32_e32 v0, s0 536; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 537; GFX6-NEXT: s_endpgm 538 %x = load i32, i32 addrspace(1)* %in, align 4 539 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 1, i32 31) 540 store i32 %bfe, i32 addrspace(1)* %out, align 4 541 ret void 542} 543 544define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 545; GFX6-LABEL: bfe_u32_test_11: 546; GFX6: ; %bb.0: 547; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 548; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 549; GFX6-NEXT: s_mov_b32 s6, -1 550; GFX6-NEXT: s_mov_b32 s7, 0xf000 551; GFX6-NEXT: s_waitcnt lgkmcnt(0) 552; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 553; GFX6-NEXT: s_waitcnt lgkmcnt(0) 554; GFX6-NEXT: s_bfe_u32 s0, s0, 0x180008 555; GFX6-NEXT: v_mov_b32_e32 v0, s0 556; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 557; GFX6-NEXT: s_endpgm 558 %x = load i32, i32 addrspace(1)* %in, align 4 559 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 8, i32 24) 560 store i32 %bfe, i32 addrspace(1)* %out, align 4 561 ret void 562} 563 564define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 565; GFX6-LABEL: bfe_u32_test_12: 566; GFX6: ; %bb.0: 567; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 568; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 569; GFX6-NEXT: s_mov_b32 s6, -1 570; GFX6-NEXT: s_mov_b32 s7, 0xf000 571; GFX6-NEXT: s_waitcnt lgkmcnt(0) 572; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 573; GFX6-NEXT: s_waitcnt lgkmcnt(0) 574; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80018 575; GFX6-NEXT: v_mov_b32_e32 v0, s0 576; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 577; GFX6-NEXT: s_endpgm 578 %x = load i32, i32 addrspace(1)* %in, align 4 579 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 24, i32 8) 580 store i32 %bfe, i32 addrspace(1)* %out, align 4 581 ret void 582} 583 584; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} 585define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 586; GFX6-LABEL: bfe_u32_test_13: 587; GFX6: ; %bb.0: 588; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 589; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 590; GFX6-NEXT: s_mov_b32 s6, -1 591; GFX6-NEXT: s_mov_b32 s7, 0xf000 592; GFX6-NEXT: s_waitcnt lgkmcnt(0) 593; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 594; GFX6-NEXT: s_waitcnt lgkmcnt(0) 595; GFX6-NEXT: s_ashr_i32 s0, s0, 31 596; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1001f 597; GFX6-NEXT: v_mov_b32_e32 v0, s0 598; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 599; GFX6-NEXT: s_endpgm 600 %x = load i32, i32 addrspace(1)* %in, align 4 601 %shl = ashr i32 %x, 31 602 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) 603 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 604} 605 606define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 607; GFX6-LABEL: bfe_u32_test_14: 608; GFX6: ; %bb.0: 609; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 610; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 611; GFX6-NEXT: s_mov_b32 s6, -1 612; GFX6-NEXT: s_mov_b32 s7, 0xf000 613; GFX6-NEXT: s_waitcnt lgkmcnt(0) 614; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 615; GFX6-NEXT: s_waitcnt lgkmcnt(0) 616; GFX6-NEXT: s_lshr_b32 s0, s0, 31 617; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1001f 618; GFX6-NEXT: v_mov_b32_e32 v0, s0 619; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 620; GFX6-NEXT: s_endpgm 621 %x = load i32, i32 addrspace(1)* %in, align 4 622 %shl = lshr i32 %x, 31 623 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) 624 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 625} 626 627define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) #0 { 628; GFX6-LABEL: bfe_u32_constant_fold_test_0: 629; GFX6: ; %bb.0: 630; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 631; GFX6-NEXT: s_bfe_u32 s2, 0, 0 632; GFX6-NEXT: v_mov_b32_e32 v0, s2 633; GFX6-NEXT: s_mov_b32 s2, -1 634; GFX6-NEXT: s_mov_b32 s3, 0xf000 635; GFX6-NEXT: s_waitcnt lgkmcnt(0) 636; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 637; GFX6-NEXT: s_endpgm 638 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0) 639 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 640 ret void 641} 642 643define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) #0 { 644; GFX6-LABEL: bfe_u32_constant_fold_test_1: 645; GFX6: ; %bb.0: 646; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 647; GFX6-NEXT: s_bfe_u32 s2, 0x302e, 0 648; GFX6-NEXT: v_mov_b32_e32 v0, s2 649; GFX6-NEXT: s_mov_b32 s2, -1 650; GFX6-NEXT: s_mov_b32 s3, 0xf000 651; GFX6-NEXT: s_waitcnt lgkmcnt(0) 652; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 653; GFX6-NEXT: s_endpgm 654 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 12334, i32 0, i32 0) 655 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 656 ret void 657} 658 659define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) #0 { 660; GFX6-LABEL: bfe_u32_constant_fold_test_2: 661; GFX6: ; %bb.0: 662; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 663; GFX6-NEXT: s_bfe_u32 s2, 0, 0x10000 664; GFX6-NEXT: v_mov_b32_e32 v0, s2 665; GFX6-NEXT: s_mov_b32 s2, -1 666; GFX6-NEXT: s_mov_b32 s3, 0xf000 667; GFX6-NEXT: s_waitcnt lgkmcnt(0) 668; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 669; GFX6-NEXT: s_endpgm 670 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 1) 671 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 672 ret void 673} 674 675define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) #0 { 676; GFX6-LABEL: bfe_u32_constant_fold_test_3: 677; GFX6: ; %bb.0: 678; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 679; GFX6-NEXT: s_bfe_u32 s2, 1, 0x10000 680; GFX6-NEXT: v_mov_b32_e32 v0, s2 681; GFX6-NEXT: s_mov_b32 s2, -1 682; GFX6-NEXT: s_mov_b32 s3, 0xf000 683; GFX6-NEXT: s_waitcnt lgkmcnt(0) 684; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 685; GFX6-NEXT: s_endpgm 686 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 1, i32 0, i32 1) 687 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 688 ret void 689} 690 691define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) #0 { 692; GFX6-LABEL: bfe_u32_constant_fold_test_4: 693; GFX6: ; %bb.0: 694; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 695; GFX6-NEXT: s_bfe_u32 s2, -1, 0x10000 696; GFX6-NEXT: v_mov_b32_e32 v0, s2 697; GFX6-NEXT: s_mov_b32 s2, -1 698; GFX6-NEXT: s_mov_b32 s3, 0xf000 699; GFX6-NEXT: s_waitcnt lgkmcnt(0) 700; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 701; GFX6-NEXT: s_endpgm 702 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 0, i32 1) 703 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 704 ret void 705} 706 707define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) #0 { 708; GFX6-LABEL: bfe_u32_constant_fold_test_5: 709; GFX6: ; %bb.0: 710; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 711; GFX6-NEXT: s_mov_b32 s2, 0x10007 712; GFX6-NEXT: s_bfe_u32 s2, 0x80, s2 713; GFX6-NEXT: v_mov_b32_e32 v0, s2 714; GFX6-NEXT: s_mov_b32 s2, -1 715; GFX6-NEXT: s_mov_b32 s3, 0xf000 716; GFX6-NEXT: s_waitcnt lgkmcnt(0) 717; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 718; GFX6-NEXT: s_endpgm 719 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 7, i32 1) 720 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 721 ret void 722} 723 724define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) #0 { 725; GFX6-LABEL: bfe_u32_constant_fold_test_6: 726; GFX6: ; %bb.0: 727; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 728; GFX6-NEXT: s_mov_b32 s2, 0x80000 729; GFX6-NEXT: s_bfe_u32 s2, 0x80, s2 730; GFX6-NEXT: v_mov_b32_e32 v0, s2 731; GFX6-NEXT: s_mov_b32 s2, -1 732; GFX6-NEXT: s_mov_b32 s3, 0xf000 733; GFX6-NEXT: s_waitcnt lgkmcnt(0) 734; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 735; GFX6-NEXT: s_endpgm 736 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 0, i32 8) 737 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 738 ret void 739} 740 741define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) #0 { 742; GFX6-LABEL: bfe_u32_constant_fold_test_7: 743; GFX6: ; %bb.0: 744; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 745; GFX6-NEXT: s_mov_b32 s2, 0x80000 746; GFX6-NEXT: s_bfe_u32 s2, 0x7f, s2 747; GFX6-NEXT: v_mov_b32_e32 v0, s2 748; GFX6-NEXT: s_mov_b32 s2, -1 749; GFX6-NEXT: s_mov_b32 s3, 0xf000 750; GFX6-NEXT: s_waitcnt lgkmcnt(0) 751; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 752; GFX6-NEXT: s_endpgm 753 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 0, i32 8) 754 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 755 ret void 756} 757 758define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) #0 { 759; GFX6-LABEL: bfe_u32_constant_fold_test_8: 760; GFX6: ; %bb.0: 761; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 762; GFX6-NEXT: s_mov_b32 s2, 0x80006 763; GFX6-NEXT: s_bfe_u32 s2, 0x7f, s2 764; GFX6-NEXT: v_mov_b32_e32 v0, s2 765; GFX6-NEXT: s_mov_b32 s2, -1 766; GFX6-NEXT: s_mov_b32 s3, 0xf000 767; GFX6-NEXT: s_waitcnt lgkmcnt(0) 768; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 769; GFX6-NEXT: s_endpgm 770 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 6, i32 8) 771 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 772 ret void 773} 774 775define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) #0 { 776; GFX6-LABEL: bfe_u32_constant_fold_test_9: 777; GFX6: ; %bb.0: 778; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 779; GFX6-NEXT: s_mov_b32 s2, 0x80010 780; GFX6-NEXT: s_bfe_u32 s2, 0x10000, s2 781; GFX6-NEXT: v_mov_b32_e32 v0, s2 782; GFX6-NEXT: s_mov_b32 s2, -1 783; GFX6-NEXT: s_mov_b32 s3, 0xf000 784; GFX6-NEXT: s_waitcnt lgkmcnt(0) 785; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 786; GFX6-NEXT: s_endpgm 787 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65536, i32 16, i32 8) 788 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 789 ret void 790} 791 792define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) #0 { 793; GFX6-LABEL: bfe_u32_constant_fold_test_10: 794; GFX6: ; %bb.0: 795; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 796; GFX6-NEXT: s_mov_b32 s2, 0x100010 797; GFX6-NEXT: s_bfe_u32 s2, 0xffff, s2 798; GFX6-NEXT: v_mov_b32_e32 v0, s2 799; GFX6-NEXT: s_mov_b32 s2, -1 800; GFX6-NEXT: s_mov_b32 s3, 0xf000 801; GFX6-NEXT: s_waitcnt lgkmcnt(0) 802; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 803; GFX6-NEXT: s_endpgm 804 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65535, i32 16, i32 16) 805 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 806 ret void 807} 808 809define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) #0 { 810; GFX6-LABEL: bfe_u32_constant_fold_test_11: 811; GFX6: ; %bb.0: 812; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 813; GFX6-NEXT: s_mov_b32 s2, 0x40004 814; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 815; GFX6-NEXT: v_mov_b32_e32 v0, s2 816; GFX6-NEXT: s_mov_b32 s2, -1 817; GFX6-NEXT: s_mov_b32 s3, 0xf000 818; GFX6-NEXT: s_waitcnt lgkmcnt(0) 819; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 820; GFX6-NEXT: s_endpgm 821 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 4) 822 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 823 ret void 824} 825 826define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) #0 { 827; GFX6-LABEL: bfe_u32_constant_fold_test_12: 828; GFX6: ; %bb.0: 829; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 830; GFX6-NEXT: s_mov_b32 s2, 0x1001f 831; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 832; GFX6-NEXT: v_mov_b32_e32 v0, s2 833; GFX6-NEXT: s_mov_b32 s2, -1 834; GFX6-NEXT: s_mov_b32 s3, 0xf000 835; GFX6-NEXT: s_waitcnt lgkmcnt(0) 836; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 837; GFX6-NEXT: s_endpgm 838 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 31, i32 1) 839 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 840 ret void 841} 842 843define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) #0 { 844; GFX6-LABEL: bfe_u32_constant_fold_test_13: 845; GFX6: ; %bb.0: 846; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 847; GFX6-NEXT: s_mov_b32 s2, 0x100010 848; GFX6-NEXT: s_bfe_u32 s2, 0x1fffe, s2 849; GFX6-NEXT: v_mov_b32_e32 v0, s2 850; GFX6-NEXT: s_mov_b32 s2, -1 851; GFX6-NEXT: s_mov_b32 s3, 0xf000 852; GFX6-NEXT: s_waitcnt lgkmcnt(0) 853; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 854; GFX6-NEXT: s_endpgm 855 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 131070, i32 16, i32 16) 856 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 857 ret void 858} 859 860define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) #0 { 861; GFX6-LABEL: bfe_u32_constant_fold_test_14: 862; GFX6: ; %bb.0: 863; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 864; GFX6-NEXT: s_mov_b32 s2, 0x1e0002 865; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 866; GFX6-NEXT: v_mov_b32_e32 v0, s2 867; GFX6-NEXT: s_mov_b32 s2, -1 868; GFX6-NEXT: s_mov_b32 s3, 0xf000 869; GFX6-NEXT: s_waitcnt lgkmcnt(0) 870; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 871; GFX6-NEXT: s_endpgm 872 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 2, i32 30) 873 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 874 ret void 875} 876 877define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) #0 { 878; GFX6-LABEL: bfe_u32_constant_fold_test_15: 879; GFX6: ; %bb.0: 880; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 881; GFX6-NEXT: s_mov_b32 s2, 0x1c0004 882; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 883; GFX6-NEXT: v_mov_b32_e32 v0, s2 884; GFX6-NEXT: s_mov_b32 s2, -1 885; GFX6-NEXT: s_mov_b32 s3, 0xf000 886; GFX6-NEXT: s_waitcnt lgkmcnt(0) 887; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 888; GFX6-NEXT: s_endpgm 889 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 28) 890 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 891 ret void 892} 893 894define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) #0 { 895; GFX6-LABEL: bfe_u32_constant_fold_test_16: 896; GFX6: ; %bb.0: 897; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 898; GFX6-NEXT: s_bfe_u32 s2, -1, 0x70001 899; GFX6-NEXT: v_mov_b32_e32 v0, s2 900; GFX6-NEXT: s_mov_b32 s2, -1 901; GFX6-NEXT: s_mov_b32 s3, 0xf000 902; GFX6-NEXT: s_waitcnt lgkmcnt(0) 903; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 904; GFX6-NEXT: s_endpgm 905 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 1, i32 7) 906 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 907 ret void 908} 909 910define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) #0 { 911; GFX6-LABEL: bfe_u32_constant_fold_test_17: 912; GFX6: ; %bb.0: 913; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 914; GFX6-NEXT: s_mov_b32 s2, 0x1f0001 915; GFX6-NEXT: s_bfe_u32 s2, 0xff, s2 916; GFX6-NEXT: v_mov_b32_e32 v0, s2 917; GFX6-NEXT: s_mov_b32 s2, -1 918; GFX6-NEXT: s_mov_b32 s3, 0xf000 919; GFX6-NEXT: s_waitcnt lgkmcnt(0) 920; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 921; GFX6-NEXT: s_endpgm 922 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 1, i32 31) 923 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 924 ret void 925} 926 927define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) #0 { 928; GFX6-LABEL: bfe_u32_constant_fold_test_18: 929; GFX6: ; %bb.0: 930; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 931; GFX6-NEXT: s_mov_b32 s2, 0x1001f 932; GFX6-NEXT: s_bfe_u32 s2, 0xff, s2 933; GFX6-NEXT: v_mov_b32_e32 v0, s2 934; GFX6-NEXT: s_mov_b32 s2, -1 935; GFX6-NEXT: s_mov_b32 s3, 0xf000 936; GFX6-NEXT: s_waitcnt lgkmcnt(0) 937; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 938; GFX6-NEXT: s_endpgm 939 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 31, i32 1) 940 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 941 ret void 942} 943 944; Make sure that SimplifyDemandedBits doesn't cause the and to be 945; reduced to the bits demanded by the bfe. 946 947; XXX: The operand to v_bfe_u32 could also just directly be the load register. 948define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0, 949; GFX6-LABEL: simplify_bfe_u32_multi_use_arg: 950; GFX6: ; %bb.0: 951; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 952; GFX6-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xb 953; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd 954; GFX6-NEXT: s_mov_b32 s6, -1 955; GFX6-NEXT: s_mov_b32 s7, 0xf000 956; GFX6-NEXT: s_mov_b64 s[10:11], s[6:7] 957; GFX6-NEXT: s_waitcnt lgkmcnt(0) 958; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 959; GFX6-NEXT: s_waitcnt lgkmcnt(0) 960; GFX6-NEXT: s_and_b32 s0, s0, 63 961; GFX6-NEXT: s_bfe_u32 s1, s0, 0x20002 962; GFX6-NEXT: v_mov_b32_e32 v1, s1 963; GFX6-NEXT: v_mov_b32_e32 v0, s0 964; GFX6-NEXT: buffer_store_dword v1, off, s[4:7], 0 965; GFX6-NEXT: buffer_store_dword v0, off, s[8:11], 0 966; GFX6-NEXT: s_endpgm 967 i32 addrspace(1)* %out1, 968 i32 addrspace(1)* %in) #0 { 969 %src = load i32, i32 addrspace(1)* %in, align 4 970 %and = and i32 %src, 63 971 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2) 972 store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4 973 store i32 %and, i32 addrspace(1)* %out1, align 4 974 ret void 975} 976 977define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 { 978; GFX6-LABEL: lshr_and: 979; GFX6: ; %bb.0: 980; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 981; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 982; GFX6-NEXT: s_mov_b32 s6, -1 983; GFX6-NEXT: s_mov_b32 s7, 0xf000 984; GFX6-NEXT: s_waitcnt lgkmcnt(0) 985; GFX6-NEXT: s_bfe_u32 s0, s0, 0x30006 986; GFX6-NEXT: v_mov_b32_e32 v0, s0 987; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 988; GFX6-NEXT: s_endpgm 989 %b = lshr i32 %a, 6 990 %c = and i32 %b, 7 991 store i32 %c, i32 addrspace(1)* %out, align 8 992 ret void 993} 994 995define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 996; GFX6-LABEL: v_lshr_and: 997; GFX6: ; %bb.0: 998; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 999; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 1000; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 1001; GFX6-NEXT: s_mov_b32 s6, -1 1002; GFX6-NEXT: s_mov_b32 s7, 0xf000 1003; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1004; GFX6-NEXT: s_lshr_b32 s0, s2, s0 1005; GFX6-NEXT: s_and_b32 s0, s0, 7 1006; GFX6-NEXT: v_mov_b32_e32 v0, s0 1007; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 1008; GFX6-NEXT: s_endpgm 1009 %c = lshr i32 %a, %b 1010 %d = and i32 %c, 7 1011 store i32 %d, i32 addrspace(1)* %out, align 8 1012 ret void 1013} 1014 1015define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 { 1016; GFX6-LABEL: and_lshr: 1017; GFX6: ; %bb.0: 1018; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1019; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 1020; GFX6-NEXT: s_mov_b32 s6, -1 1021; GFX6-NEXT: s_mov_b32 s7, 0xf000 1022; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1023; GFX6-NEXT: s_and_b32 s0, s0, 0x1c0 1024; GFX6-NEXT: s_lshr_b32 s0, s0, 6 1025; GFX6-NEXT: v_mov_b32_e32 v0, s0 1026; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 1027; GFX6-NEXT: s_endpgm 1028 %b = and i32 %a, 448 1029 %c = lshr i32 %b, 6 1030 store i32 %c, i32 addrspace(1)* %out, align 8 1031 ret void 1032} 1033 1034define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 { 1035; GFX6-LABEL: and_lshr2: 1036; GFX6: ; %bb.0: 1037; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1038; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 1039; GFX6-NEXT: s_mov_b32 s6, -1 1040; GFX6-NEXT: s_mov_b32 s7, 0xf000 1041; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1042; GFX6-NEXT: s_and_b32 s0, s0, 0x1ff 1043; GFX6-NEXT: s_lshr_b32 s0, s0, 6 1044; GFX6-NEXT: v_mov_b32_e32 v0, s0 1045; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 1046; GFX6-NEXT: s_endpgm 1047 %b = and i32 %a, 511 1048 %c = lshr i32 %b, 6 1049 store i32 %c, i32 addrspace(1)* %out, align 8 1050 ret void 1051} 1052 1053define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 { 1054; GFX6-LABEL: shl_lshr: 1055; GFX6: ; %bb.0: 1056; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1057; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 1058; GFX6-NEXT: s_mov_b32 s6, -1 1059; GFX6-NEXT: s_mov_b32 s7, 0xf000 1060; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1061; GFX6-NEXT: s_bfe_u32 s0, s0, 0x150002 1062; GFX6-NEXT: v_mov_b32_e32 v0, s0 1063; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 1064; GFX6-NEXT: s_endpgm 1065 %b = shl i32 %a, 9 1066 %c = lshr i32 %b, 11 1067 store i32 %c, i32 addrspace(1)* %out, align 8 1068 ret void 1069} 1070 1071declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1 1072declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) #1 1073 1074attributes #0 = { nounwind } 1075attributes #1 = { nounwind readnone } 1076