1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -amdgpu-load-store-vectorizer=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s 3 4define i32 @v_bfe_i32_arg_arg_arg(i32 %src0, i32 %src1, i32 %src2) #0 { 5; GFX6-LABEL: v_bfe_i32_arg_arg_arg: 6; GFX6: ; %bb.0: 7; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8; GFX6-NEXT: v_bfe_u32 v0, v0, v1, v2 9; GFX6-NEXT: s_setpc_b64 s[30:31] 10 %bfe_i32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2) 11 ret i32 %bfe_i32 12} 13 14define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 { 15; GFX6-LABEL: s_bfe_i32_arg_arg_arg: 16; GFX6: ; %bb.0: 17; GFX6-NEXT: s_and_b32 s1, s1, 63 18; GFX6-NEXT: s_lshl_b32 s2, s2, 16 19; GFX6-NEXT: s_or_b32 s1, s1, s2 20; GFX6-NEXT: s_bfe_u32 s0, s0, s1 21; GFX6-NEXT: ; return to shader part epilog 22 %bfe_i32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2) 23 ret i32 %bfe_i32 24} 25 26; TODO: Need to expand this. 27; define i64 @v_bfe_i64_arg_arg_arg(i64 %src0, i32 %src1, i32 %src2) #0 { 28; %bfe_i64 = call i32 @llvm.amdgcn.ubfe.i64(i32 %src0, i32 %src1, i32 %src2) 29; ret i64 %bfe_i64 30; } 31 32define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 { 33; GFX6-LABEL: s_bfe_i64_arg_arg_arg: 34; GFX6: ; %bb.0: 35; GFX6-NEXT: s_and_b32 s2, s2, 63 36; GFX6-NEXT: s_lshl_b32 s3, s3, 16 37; GFX6-NEXT: s_or_b32 s2, s2, s3 38; GFX6-NEXT: s_bfe_u64 s[0:1], s[0:1], s2 39; GFX6-NEXT: ; return to shader part epilog 40 %bfe_i32 = call i64 @llvm.amdgcn.ubfe.i64(i64 %src0, i32 %src1, i32 %src2) 41 ret i64 %bfe_i32 42} 43 44define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 { 45; GFX6-LABEL: bfe_u32_arg_arg_arg: 46; GFX6: ; %bb.0: 47; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 48; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 49; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 50; GFX6-NEXT: s_mov_b32 s6, -1 51; GFX6-NEXT: s_mov_b32 s7, 0xf000 52; GFX6-NEXT: s_waitcnt lgkmcnt(0) 53; GFX6-NEXT: s_and_b32 s1, s0, 63 54; GFX6-NEXT: s_lshl_b32 s0, s0, 16 55; GFX6-NEXT: s_or_b32 s0, s1, s0 56; GFX6-NEXT: s_bfe_u32 s0, s2, s0 57; GFX6-NEXT: v_mov_b32_e32 v0, s0 58; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 59; GFX6-NEXT: s_endpgm 60 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src1) 61 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 62 ret void 63} 64 65define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 66; GFX6-LABEL: bfe_u32_arg_arg_imm: 67; GFX6: ; %bb.0: 68; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 69; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 70; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 71; GFX6-NEXT: s_mov_b32 s6, -1 72; GFX6-NEXT: s_mov_b32 s7, 0xf000 73; GFX6-NEXT: s_waitcnt lgkmcnt(0) 74; GFX6-NEXT: s_and_b32 s0, s0, 63 75; GFX6-NEXT: s_or_b32 s0, s0, 0x7b0000 76; GFX6-NEXT: s_bfe_u32 s0, s2, s0 77; GFX6-NEXT: v_mov_b32_e32 v0, s0 78; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 79; GFX6-NEXT: s_endpgm 80 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 123) 81 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 82 ret void 83} 84 85define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 { 86; GFX6-LABEL: bfe_u32_arg_imm_arg: 87; GFX6: ; %bb.0: 88; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 89; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 90; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 91; GFX6-NEXT: s_mov_b32 s6, -1 92; GFX6-NEXT: s_mov_b32 s7, 0xf000 93; GFX6-NEXT: s_waitcnt lgkmcnt(0) 94; GFX6-NEXT: s_lshl_b32 s0, s0, 16 95; GFX6-NEXT: s_or_b32 s0, 59, s0 96; GFX6-NEXT: s_bfe_u32 s0, s2, s0 97; GFX6-NEXT: v_mov_b32_e32 v0, s0 98; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 99; GFX6-NEXT: s_endpgm 100 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 123, i32 %src2) 101 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 102 ret void 103} 104 105define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 { 106; GFX6-LABEL: bfe_u32_imm_arg_arg: 107; GFX6: ; %bb.0: 108; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 109; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 110; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 111; GFX6-NEXT: s_mov_b32 s6, -1 112; GFX6-NEXT: s_mov_b32 s7, 0xf000 113; GFX6-NEXT: s_waitcnt lgkmcnt(0) 114; GFX6-NEXT: s_and_b32 s1, s2, 63 115; GFX6-NEXT: s_lshl_b32 s0, s0, 16 116; GFX6-NEXT: s_or_b32 s0, s1, s0 117; GFX6-NEXT: s_bfe_u32 s0, 0x7b, s0 118; GFX6-NEXT: v_mov_b32_e32 v0, s0 119; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 120; GFX6-NEXT: s_endpgm 121 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 123, i32 %src1, i32 %src2) 122 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 123 ret void 124} 125 126define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 127; GFX6-LABEL: bfe_u32_arg_0_width_reg_offset: 128; GFX6: ; %bb.0: 129; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 130; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 131; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 132; GFX6-NEXT: s_mov_b32 s6, -1 133; GFX6-NEXT: s_mov_b32 s7, 0xf000 134; GFX6-NEXT: s_waitcnt lgkmcnt(0) 135; GFX6-NEXT: s_and_b32 s0, s0, 63 136; GFX6-NEXT: s_bfe_u32 s0, s2, s0 137; GFX6-NEXT: v_mov_b32_e32 v0, s0 138; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 139; GFX6-NEXT: s_endpgm 140 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 0) 141 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 142 ret void 143} 144 145define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 146; GFX6-LABEL: bfe_u32_arg_0_width_imm_offset: 147; GFX6: ; %bb.0: 148; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 149; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 150; GFX6-NEXT: s_mov_b32 s6, -1 151; GFX6-NEXT: s_mov_b32 s7, 0xf000 152; GFX6-NEXT: s_waitcnt lgkmcnt(0) 153; GFX6-NEXT: s_bfe_u32 s0, s0, 8 154; GFX6-NEXT: v_mov_b32_e32 v0, s0 155; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 156; GFX6-NEXT: s_endpgm 157 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 8, i32 0) 158 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 159 ret void 160} 161 162define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { 163; GFX6-LABEL: bfe_u32_zextload_i8: 164; GFX6: ; %bb.0: 165; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 166; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 167; GFX6-NEXT: s_mov_b32 s2, -1 168; GFX6-NEXT: s_mov_b32 s3, 0xf000 169; GFX6-NEXT: s_mov_b64 s[6:7], s[2:3] 170; GFX6-NEXT: s_waitcnt lgkmcnt(0) 171; GFX6-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 172; GFX6-NEXT: s_waitcnt vmcnt(0) 173; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 8 174; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 175; GFX6-NEXT: s_endpgm 176 %load = load i8, i8 addrspace(1)* %in 177 %ext = zext i8 %load to i32 178 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8) 179 store i32 %bfe, i32 addrspace(1)* %out, align 4 180 ret void 181} 182 183; FIXME: Should be using s_add_i32 184define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 185; GFX6-LABEL: bfe_u32_zext_in_reg_i8: 186; GFX6: ; %bb.0: 187; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 188; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 189; GFX6-NEXT: s_mov_b32 s6, -1 190; GFX6-NEXT: s_mov_b32 s7, 0xf000 191; GFX6-NEXT: s_waitcnt lgkmcnt(0) 192; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 193; GFX6-NEXT: s_waitcnt lgkmcnt(0) 194; GFX6-NEXT: s_add_i32 s0, s0, 1 195; GFX6-NEXT: s_and_b32 s0, s0, 0xff 196; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80000 197; GFX6-NEXT: v_mov_b32_e32 v0, s0 198; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 199; GFX6-NEXT: s_endpgm 200 %load = load i32, i32 addrspace(1)* %in, align 4 201 %add = add i32 %load, 1 202 %ext = and i32 %add, 255 203 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8) 204 store i32 %bfe, i32 addrspace(1)* %out, align 4 205 ret void 206} 207 208define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 209; GFX6-LABEL: bfe_u32_zext_in_reg_i16: 210; GFX6: ; %bb.0: 211; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 212; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 213; GFX6-NEXT: s_mov_b32 s6, -1 214; GFX6-NEXT: s_mov_b32 s7, 0xf000 215; GFX6-NEXT: s_waitcnt lgkmcnt(0) 216; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 217; GFX6-NEXT: s_waitcnt lgkmcnt(0) 218; GFX6-NEXT: s_add_i32 s0, s0, 1 219; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 220; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 221; GFX6-NEXT: v_mov_b32_e32 v0, s0 222; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 223; GFX6-NEXT: s_endpgm 224 %load = load i32, i32 addrspace(1)* %in, align 4 225 %add = add i32 %load, 1 226 %ext = and i32 %add, 65535 227 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 16) 228 store i32 %bfe, i32 addrspace(1)* %out, align 4 229 ret void 230} 231 232define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 233; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_1: 234; GFX6: ; %bb.0: 235; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 236; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 237; GFX6-NEXT: s_mov_b32 s6, -1 238; GFX6-NEXT: s_mov_b32 s7, 0xf000 239; GFX6-NEXT: s_waitcnt lgkmcnt(0) 240; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 241; GFX6-NEXT: s_waitcnt lgkmcnt(0) 242; GFX6-NEXT: s_add_i32 s0, s0, 1 243; GFX6-NEXT: s_and_b32 s0, s0, 0xff 244; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80001 245; GFX6-NEXT: v_mov_b32_e32 v0, s0 246; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 247; GFX6-NEXT: s_endpgm 248 %load = load i32, i32 addrspace(1)* %in, align 4 249 %add = add i32 %load, 1 250 %ext = and i32 %add, 255 251 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 1, i32 8) 252 store i32 %bfe, i32 addrspace(1)* %out, align 4 253 ret void 254} 255 256define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 257; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_3: 258; GFX6: ; %bb.0: 259; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 260; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 261; GFX6-NEXT: s_mov_b32 s6, -1 262; GFX6-NEXT: s_mov_b32 s7, 0xf000 263; GFX6-NEXT: s_waitcnt lgkmcnt(0) 264; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 265; GFX6-NEXT: s_waitcnt lgkmcnt(0) 266; GFX6-NEXT: s_add_i32 s0, s0, 1 267; GFX6-NEXT: s_and_b32 s0, s0, 0xff 268; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80003 269; GFX6-NEXT: v_mov_b32_e32 v0, s0 270; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 271; GFX6-NEXT: s_endpgm 272 %load = load i32, i32 addrspace(1)* %in, align 4 273 %add = add i32 %load, 1 274 %ext = and i32 %add, 255 275 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 3, i32 8) 276 store i32 %bfe, i32 addrspace(1)* %out, align 4 277 ret void 278} 279 280define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 281; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_7: 282; GFX6: ; %bb.0: 283; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 284; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 285; GFX6-NEXT: s_mov_b32 s6, -1 286; GFX6-NEXT: s_mov_b32 s7, 0xf000 287; GFX6-NEXT: s_waitcnt lgkmcnt(0) 288; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 289; GFX6-NEXT: s_waitcnt lgkmcnt(0) 290; GFX6-NEXT: s_add_i32 s0, s0, 1 291; GFX6-NEXT: s_and_b32 s0, s0, 0xff 292; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80007 293; GFX6-NEXT: v_mov_b32_e32 v0, s0 294; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 295; GFX6-NEXT: s_endpgm 296 %load = load i32, i32 addrspace(1)* %in, align 4 297 %add = add i32 %load, 1 298 %ext = and i32 %add, 255 299 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 7, i32 8) 300 store i32 %bfe, i32 addrspace(1)* %out, align 4 301 ret void 302} 303 304define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 305; GFX6-LABEL: bfe_u32_zext_in_reg_i16_offset_8: 306; GFX6: ; %bb.0: 307; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 308; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 309; GFX6-NEXT: s_mov_b32 s6, -1 310; GFX6-NEXT: s_mov_b32 s7, 0xf000 311; GFX6-NEXT: s_waitcnt lgkmcnt(0) 312; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 313; GFX6-NEXT: s_waitcnt lgkmcnt(0) 314; GFX6-NEXT: s_add_i32 s0, s0, 1 315; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 316; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80008 317; GFX6-NEXT: v_mov_b32_e32 v0, s0 318; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 319; GFX6-NEXT: s_endpgm 320 %load = load i32, i32 addrspace(1)* %in, align 4 321 %add = add i32 %load, 1 322 %ext = and i32 %add, 65535 323 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 8, i32 8) 324 store i32 %bfe, i32 addrspace(1)* %out, align 4 325 ret void 326} 327 328define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 329; GFX6-LABEL: bfe_u32_test_1: 330; GFX6: ; %bb.0: 331; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 332; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 333; GFX6-NEXT: s_mov_b32 s6, -1 334; GFX6-NEXT: s_mov_b32 s7, 0xf000 335; GFX6-NEXT: s_waitcnt lgkmcnt(0) 336; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 337; GFX6-NEXT: s_waitcnt lgkmcnt(0) 338; GFX6-NEXT: s_bfe_u32 s0, s0, 0x10000 339; GFX6-NEXT: v_mov_b32_e32 v0, s0 340; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 341; GFX6-NEXT: s_endpgm 342 %x = load i32, i32 addrspace(1)* %in, align 4 343 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 0, i32 1) 344 store i32 %bfe, i32 addrspace(1)* %out, align 4 345 ret void 346} 347 348define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 349; GFX6-LABEL: bfe_u32_test_2: 350; GFX6: ; %bb.0: 351; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 352; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 353; GFX6-NEXT: s_mov_b32 s6, -1 354; GFX6-NEXT: s_mov_b32 s7, 0xf000 355; GFX6-NEXT: s_waitcnt lgkmcnt(0) 356; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 357; GFX6-NEXT: s_waitcnt lgkmcnt(0) 358; GFX6-NEXT: s_lshl_b32 s0, s0, 31 359; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80000 360; GFX6-NEXT: v_mov_b32_e32 v0, s0 361; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 362; GFX6-NEXT: s_endpgm 363 %x = load i32, i32 addrspace(1)* %in, align 4 364 %shl = shl i32 %x, 31 365 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 8) 366 store i32 %bfe, i32 addrspace(1)* %out, align 4 367 ret void 368} 369 370define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 371; GFX6-LABEL: bfe_u32_test_3: 372; GFX6: ; %bb.0: 373; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 374; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 375; GFX6-NEXT: s_mov_b32 s6, -1 376; GFX6-NEXT: s_mov_b32 s7, 0xf000 377; GFX6-NEXT: s_waitcnt lgkmcnt(0) 378; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 379; GFX6-NEXT: s_waitcnt lgkmcnt(0) 380; GFX6-NEXT: s_lshl_b32 s0, s0, 31 381; GFX6-NEXT: s_bfe_u32 s0, s0, 0x10000 382; GFX6-NEXT: v_mov_b32_e32 v0, s0 383; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 384; GFX6-NEXT: s_endpgm 385 %x = load i32, i32 addrspace(1)* %in, align 4 386 %shl = shl i32 %x, 31 387 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 1) 388 store i32 %bfe, i32 addrspace(1)* %out, align 4 389 ret void 390} 391 392define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 393; GFX6-LABEL: bfe_u32_test_4: 394; GFX6: ; %bb.0: 395; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 396; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 397; GFX6-NEXT: s_mov_b32 s6, -1 398; GFX6-NEXT: s_mov_b32 s7, 0xf000 399; GFX6-NEXT: s_waitcnt lgkmcnt(0) 400; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 401; GFX6-NEXT: s_waitcnt lgkmcnt(0) 402; GFX6-NEXT: s_lshl_b32 s0, s0, 31 403; GFX6-NEXT: s_lshr_b32 s0, s0, 31 404; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1001f 405; GFX6-NEXT: v_mov_b32_e32 v0, s0 406; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 407; GFX6-NEXT: s_endpgm 408 %x = load i32, i32 addrspace(1)* %in, align 4 409 %shl = shl i32 %x, 31 410 %shr = lshr i32 %shl, 31 411 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 31, i32 1) 412 store i32 %bfe, i32 addrspace(1)* %out, align 4 413 ret void 414} 415 416define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 417; GFX6-LABEL: bfe_u32_test_5: 418; GFX6: ; %bb.0: 419; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 420; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 421; GFX6-NEXT: s_mov_b32 s6, -1 422; GFX6-NEXT: s_mov_b32 s7, 0xf000 423; GFX6-NEXT: s_waitcnt lgkmcnt(0) 424; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 425; GFX6-NEXT: s_waitcnt lgkmcnt(0) 426; GFX6-NEXT: s_lshl_b32 s0, s0, 31 427; GFX6-NEXT: s_ashr_i32 s0, s0, 31 428; GFX6-NEXT: s_bfe_u32 s0, s0, 0x10000 429; GFX6-NEXT: v_mov_b32_e32 v0, s0 430; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 431; GFX6-NEXT: s_endpgm 432 %x = load i32, i32 addrspace(1)* %in, align 4 433 %shl = shl i32 %x, 31 434 %shr = ashr i32 %shl, 31 435 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 0, i32 1) 436 store i32 %bfe, i32 addrspace(1)* %out, align 4 437 ret void 438} 439 440define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 441; GFX6-LABEL: bfe_u32_test_6: 442; GFX6: ; %bb.0: 443; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 444; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 445; GFX6-NEXT: s_mov_b32 s6, -1 446; GFX6-NEXT: s_mov_b32 s7, 0xf000 447; GFX6-NEXT: s_waitcnt lgkmcnt(0) 448; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 449; GFX6-NEXT: s_waitcnt lgkmcnt(0) 450; GFX6-NEXT: s_lshl_b32 s0, s0, 31 451; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1f0001 452; GFX6-NEXT: v_mov_b32_e32 v0, s0 453; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 454; GFX6-NEXT: s_endpgm 455 %x = load i32, i32 addrspace(1)* %in, align 4 456 %shl = shl i32 %x, 31 457 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 1, i32 31) 458 store i32 %bfe, i32 addrspace(1)* %out, align 4 459 ret void 460} 461 462define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 463; GFX6-LABEL: bfe_u32_test_7: 464; GFX6: ; %bb.0: 465; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 466; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 467; GFX6-NEXT: s_mov_b32 s6, -1 468; GFX6-NEXT: s_mov_b32 s7, 0xf000 469; GFX6-NEXT: s_waitcnt lgkmcnt(0) 470; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 471; GFX6-NEXT: s_waitcnt lgkmcnt(0) 472; GFX6-NEXT: s_lshl_b32 s0, s0, 31 473; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1f0000 474; GFX6-NEXT: v_mov_b32_e32 v0, s0 475; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 476; GFX6-NEXT: s_endpgm 477 %x = load i32, i32 addrspace(1)* %in, align 4 478 %shl = shl i32 %x, 31 479 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 31) 480 store i32 %bfe, i32 addrspace(1)* %out, align 4 481 ret void 482} 483 484define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 485; GFX6-LABEL: bfe_u32_test_8: 486; GFX6: ; %bb.0: 487; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 488; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 489; GFX6-NEXT: s_mov_b32 s6, -1 490; GFX6-NEXT: s_mov_b32 s7, 0xf000 491; GFX6-NEXT: s_waitcnt lgkmcnt(0) 492; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 493; GFX6-NEXT: s_waitcnt lgkmcnt(0) 494; GFX6-NEXT: s_lshl_b32 s0, s0, 31 495; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1001f 496; GFX6-NEXT: v_mov_b32_e32 v0, s0 497; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 498; GFX6-NEXT: s_endpgm 499 %x = load i32, i32 addrspace(1)* %in, align 4 500 %shl = shl i32 %x, 31 501 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) 502 store i32 %bfe, i32 addrspace(1)* %out, align 4 503 ret void 504} 505 506define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 507; GFX6-LABEL: bfe_u32_test_9: 508; GFX6: ; %bb.0: 509; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 510; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 511; GFX6-NEXT: s_mov_b32 s6, -1 512; GFX6-NEXT: s_mov_b32 s7, 0xf000 513; GFX6-NEXT: s_waitcnt lgkmcnt(0) 514; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 515; GFX6-NEXT: s_waitcnt lgkmcnt(0) 516; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1001f 517; GFX6-NEXT: v_mov_b32_e32 v0, s0 518; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 519; GFX6-NEXT: s_endpgm 520 %x = load i32, i32 addrspace(1)* %in, align 4 521 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 31, i32 1) 522 store i32 %bfe, i32 addrspace(1)* %out, align 4 523 ret void 524} 525 526define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 527; GFX6-LABEL: bfe_u32_test_10: 528; GFX6: ; %bb.0: 529; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 530; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 531; GFX6-NEXT: s_mov_b32 s6, -1 532; GFX6-NEXT: s_mov_b32 s7, 0xf000 533; GFX6-NEXT: s_waitcnt lgkmcnt(0) 534; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 535; GFX6-NEXT: s_waitcnt lgkmcnt(0) 536; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1f0001 537; GFX6-NEXT: v_mov_b32_e32 v0, s0 538; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 539; GFX6-NEXT: s_endpgm 540 %x = load i32, i32 addrspace(1)* %in, align 4 541 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 1, i32 31) 542 store i32 %bfe, i32 addrspace(1)* %out, align 4 543 ret void 544} 545 546define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 547; GFX6-LABEL: bfe_u32_test_11: 548; GFX6: ; %bb.0: 549; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 550; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 551; GFX6-NEXT: s_mov_b32 s6, -1 552; GFX6-NEXT: s_mov_b32 s7, 0xf000 553; GFX6-NEXT: s_waitcnt lgkmcnt(0) 554; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 555; GFX6-NEXT: s_waitcnt lgkmcnt(0) 556; GFX6-NEXT: s_bfe_u32 s0, s0, 0x180008 557; GFX6-NEXT: v_mov_b32_e32 v0, s0 558; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 559; GFX6-NEXT: s_endpgm 560 %x = load i32, i32 addrspace(1)* %in, align 4 561 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 8, i32 24) 562 store i32 %bfe, i32 addrspace(1)* %out, align 4 563 ret void 564} 565 566define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 567; GFX6-LABEL: bfe_u32_test_12: 568; GFX6: ; %bb.0: 569; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 570; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 571; GFX6-NEXT: s_mov_b32 s6, -1 572; GFX6-NEXT: s_mov_b32 s7, 0xf000 573; GFX6-NEXT: s_waitcnt lgkmcnt(0) 574; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 575; GFX6-NEXT: s_waitcnt lgkmcnt(0) 576; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80018 577; GFX6-NEXT: v_mov_b32_e32 v0, s0 578; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 579; GFX6-NEXT: s_endpgm 580 %x = load i32, i32 addrspace(1)* %in, align 4 581 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 24, i32 8) 582 store i32 %bfe, i32 addrspace(1)* %out, align 4 583 ret void 584} 585 586; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} 587define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 588; GFX6-LABEL: bfe_u32_test_13: 589; GFX6: ; %bb.0: 590; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 591; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 592; GFX6-NEXT: s_mov_b32 s6, -1 593; GFX6-NEXT: s_mov_b32 s7, 0xf000 594; GFX6-NEXT: s_waitcnt lgkmcnt(0) 595; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 596; GFX6-NEXT: s_waitcnt lgkmcnt(0) 597; GFX6-NEXT: s_ashr_i32 s0, s0, 31 598; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1001f 599; GFX6-NEXT: v_mov_b32_e32 v0, s0 600; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 601; GFX6-NEXT: s_endpgm 602 %x = load i32, i32 addrspace(1)* %in, align 4 603 %shl = ashr i32 %x, 31 604 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) 605 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 606} 607 608define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 609; GFX6-LABEL: bfe_u32_test_14: 610; GFX6: ; %bb.0: 611; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 612; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 613; GFX6-NEXT: s_mov_b32 s6, -1 614; GFX6-NEXT: s_mov_b32 s7, 0xf000 615; GFX6-NEXT: s_waitcnt lgkmcnt(0) 616; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 617; GFX6-NEXT: s_waitcnt lgkmcnt(0) 618; GFX6-NEXT: s_lshr_b32 s0, s0, 31 619; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1001f 620; GFX6-NEXT: v_mov_b32_e32 v0, s0 621; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 622; GFX6-NEXT: s_endpgm 623 %x = load i32, i32 addrspace(1)* %in, align 4 624 %shl = lshr i32 %x, 31 625 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) 626 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 627} 628 629define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) #0 { 630; GFX6-LABEL: bfe_u32_constant_fold_test_0: 631; GFX6: ; %bb.0: 632; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 633; GFX6-NEXT: s_bfe_u32 s2, 0, 0 634; GFX6-NEXT: v_mov_b32_e32 v0, s2 635; GFX6-NEXT: s_mov_b32 s2, -1 636; GFX6-NEXT: s_mov_b32 s3, 0xf000 637; GFX6-NEXT: s_waitcnt lgkmcnt(0) 638; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 639; GFX6-NEXT: s_endpgm 640 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0) 641 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 642 ret void 643} 644 645define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) #0 { 646; GFX6-LABEL: bfe_u32_constant_fold_test_1: 647; GFX6: ; %bb.0: 648; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 649; GFX6-NEXT: s_bfe_u32 s2, 0x302e, 0 650; GFX6-NEXT: v_mov_b32_e32 v0, s2 651; GFX6-NEXT: s_mov_b32 s2, -1 652; GFX6-NEXT: s_mov_b32 s3, 0xf000 653; GFX6-NEXT: s_waitcnt lgkmcnt(0) 654; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 655; GFX6-NEXT: s_endpgm 656 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 12334, i32 0, i32 0) 657 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 658 ret void 659} 660 661define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) #0 { 662; GFX6-LABEL: bfe_u32_constant_fold_test_2: 663; GFX6: ; %bb.0: 664; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 665; GFX6-NEXT: s_bfe_u32 s2, 0, 0x10000 666; GFX6-NEXT: v_mov_b32_e32 v0, s2 667; GFX6-NEXT: s_mov_b32 s2, -1 668; GFX6-NEXT: s_mov_b32 s3, 0xf000 669; GFX6-NEXT: s_waitcnt lgkmcnt(0) 670; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 671; GFX6-NEXT: s_endpgm 672 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 1) 673 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 674 ret void 675} 676 677define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) #0 { 678; GFX6-LABEL: bfe_u32_constant_fold_test_3: 679; GFX6: ; %bb.0: 680; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 681; GFX6-NEXT: s_bfe_u32 s2, 1, 0x10000 682; GFX6-NEXT: v_mov_b32_e32 v0, s2 683; GFX6-NEXT: s_mov_b32 s2, -1 684; GFX6-NEXT: s_mov_b32 s3, 0xf000 685; GFX6-NEXT: s_waitcnt lgkmcnt(0) 686; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 687; GFX6-NEXT: s_endpgm 688 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 1, i32 0, i32 1) 689 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 690 ret void 691} 692 693define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) #0 { 694; GFX6-LABEL: bfe_u32_constant_fold_test_4: 695; GFX6: ; %bb.0: 696; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 697; GFX6-NEXT: s_bfe_u32 s3, -1, 0x10000 698; GFX6-NEXT: v_mov_b32_e32 v0, s3 699; GFX6-NEXT: s_mov_b32 s2, -1 700; GFX6-NEXT: s_mov_b32 s3, 0xf000 701; GFX6-NEXT: s_waitcnt lgkmcnt(0) 702; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 703; GFX6-NEXT: s_endpgm 704 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 0, i32 1) 705 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 706 ret void 707} 708 709define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) #0 { 710; GFX6-LABEL: bfe_u32_constant_fold_test_5: 711; GFX6: ; %bb.0: 712; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 713; GFX6-NEXT: s_mov_b32 s2, 0x10007 714; GFX6-NEXT: s_bfe_u32 s2, 0x80, s2 715; GFX6-NEXT: v_mov_b32_e32 v0, s2 716; GFX6-NEXT: s_mov_b32 s2, -1 717; GFX6-NEXT: s_mov_b32 s3, 0xf000 718; GFX6-NEXT: s_waitcnt lgkmcnt(0) 719; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 720; GFX6-NEXT: s_endpgm 721 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 7, i32 1) 722 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 723 ret void 724} 725 726define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) #0 { 727; GFX6-LABEL: bfe_u32_constant_fold_test_6: 728; GFX6: ; %bb.0: 729; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 730; GFX6-NEXT: s_mov_b32 s2, 0x80000 731; GFX6-NEXT: s_bfe_u32 s2, 0x80, s2 732; GFX6-NEXT: v_mov_b32_e32 v0, s2 733; GFX6-NEXT: s_mov_b32 s2, -1 734; GFX6-NEXT: s_mov_b32 s3, 0xf000 735; GFX6-NEXT: s_waitcnt lgkmcnt(0) 736; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 737; GFX6-NEXT: s_endpgm 738 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 0, i32 8) 739 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 740 ret void 741} 742 743define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) #0 { 744; GFX6-LABEL: bfe_u32_constant_fold_test_7: 745; GFX6: ; %bb.0: 746; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 747; GFX6-NEXT: s_mov_b32 s2, 0x80000 748; GFX6-NEXT: s_bfe_u32 s2, 0x7f, s2 749; GFX6-NEXT: v_mov_b32_e32 v0, s2 750; GFX6-NEXT: s_mov_b32 s2, -1 751; GFX6-NEXT: s_mov_b32 s3, 0xf000 752; GFX6-NEXT: s_waitcnt lgkmcnt(0) 753; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 754; GFX6-NEXT: s_endpgm 755 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 0, i32 8) 756 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 757 ret void 758} 759 760define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) #0 { 761; GFX6-LABEL: bfe_u32_constant_fold_test_8: 762; GFX6: ; %bb.0: 763; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 764; GFX6-NEXT: s_mov_b32 s2, 0x80006 765; GFX6-NEXT: s_bfe_u32 s2, 0x7f, s2 766; GFX6-NEXT: v_mov_b32_e32 v0, s2 767; GFX6-NEXT: s_mov_b32 s2, -1 768; GFX6-NEXT: s_mov_b32 s3, 0xf000 769; GFX6-NEXT: s_waitcnt lgkmcnt(0) 770; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 771; GFX6-NEXT: s_endpgm 772 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 6, i32 8) 773 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 774 ret void 775} 776 777define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) #0 { 778; GFX6-LABEL: bfe_u32_constant_fold_test_9: 779; GFX6: ; %bb.0: 780; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 781; GFX6-NEXT: s_mov_b32 s2, 0x80010 782; GFX6-NEXT: s_bfe_u32 s2, 0x10000, s2 783; GFX6-NEXT: v_mov_b32_e32 v0, s2 784; GFX6-NEXT: s_mov_b32 s2, -1 785; GFX6-NEXT: s_mov_b32 s3, 0xf000 786; GFX6-NEXT: s_waitcnt lgkmcnt(0) 787; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 788; GFX6-NEXT: s_endpgm 789 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65536, i32 16, i32 8) 790 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 791 ret void 792} 793 794define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) #0 { 795; GFX6-LABEL: bfe_u32_constant_fold_test_10: 796; GFX6: ; %bb.0: 797; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 798; GFX6-NEXT: s_mov_b32 s2, 0x100010 799; GFX6-NEXT: s_bfe_u32 s2, 0xffff, s2 800; GFX6-NEXT: v_mov_b32_e32 v0, s2 801; GFX6-NEXT: s_mov_b32 s2, -1 802; GFX6-NEXT: s_mov_b32 s3, 0xf000 803; GFX6-NEXT: s_waitcnt lgkmcnt(0) 804; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 805; GFX6-NEXT: s_endpgm 806 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65535, i32 16, i32 16) 807 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 808 ret void 809} 810 811define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) #0 { 812; GFX6-LABEL: bfe_u32_constant_fold_test_11: 813; GFX6: ; %bb.0: 814; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 815; GFX6-NEXT: s_mov_b32 s2, 0x40004 816; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 817; GFX6-NEXT: v_mov_b32_e32 v0, s2 818; GFX6-NEXT: s_mov_b32 s2, -1 819; GFX6-NEXT: s_mov_b32 s3, 0xf000 820; GFX6-NEXT: s_waitcnt lgkmcnt(0) 821; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 822; GFX6-NEXT: s_endpgm 823 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 4) 824 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 825 ret void 826} 827 828define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) #0 { 829; GFX6-LABEL: bfe_u32_constant_fold_test_12: 830; GFX6: ; %bb.0: 831; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 832; GFX6-NEXT: s_mov_b32 s2, 0x1001f 833; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 834; GFX6-NEXT: v_mov_b32_e32 v0, s2 835; GFX6-NEXT: s_mov_b32 s2, -1 836; GFX6-NEXT: s_mov_b32 s3, 0xf000 837; GFX6-NEXT: s_waitcnt lgkmcnt(0) 838; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 839; GFX6-NEXT: s_endpgm 840 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 31, i32 1) 841 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 842 ret void 843} 844 845define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) #0 { 846; GFX6-LABEL: bfe_u32_constant_fold_test_13: 847; GFX6: ; %bb.0: 848; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 849; GFX6-NEXT: s_mov_b32 s2, 0x100010 850; GFX6-NEXT: s_bfe_u32 s2, 0x1fffe, s2 851; GFX6-NEXT: v_mov_b32_e32 v0, s2 852; GFX6-NEXT: s_mov_b32 s2, -1 853; GFX6-NEXT: s_mov_b32 s3, 0xf000 854; GFX6-NEXT: s_waitcnt lgkmcnt(0) 855; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 856; GFX6-NEXT: s_endpgm 857 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 131070, i32 16, i32 16) 858 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 859 ret void 860} 861 862define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) #0 { 863; GFX6-LABEL: bfe_u32_constant_fold_test_14: 864; GFX6: ; %bb.0: 865; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 866; GFX6-NEXT: s_mov_b32 s2, 0x1e0002 867; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 868; GFX6-NEXT: v_mov_b32_e32 v0, s2 869; GFX6-NEXT: s_mov_b32 s2, -1 870; GFX6-NEXT: s_mov_b32 s3, 0xf000 871; GFX6-NEXT: s_waitcnt lgkmcnt(0) 872; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 873; GFX6-NEXT: s_endpgm 874 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 2, i32 30) 875 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 876 ret void 877} 878 879define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) #0 { 880; GFX6-LABEL: bfe_u32_constant_fold_test_15: 881; GFX6: ; %bb.0: 882; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 883; GFX6-NEXT: s_mov_b32 s2, 0x1c0004 884; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 885; GFX6-NEXT: v_mov_b32_e32 v0, s2 886; GFX6-NEXT: s_mov_b32 s2, -1 887; GFX6-NEXT: s_mov_b32 s3, 0xf000 888; GFX6-NEXT: s_waitcnt lgkmcnt(0) 889; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 890; GFX6-NEXT: s_endpgm 891 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 28) 892 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 893 ret void 894} 895 896define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) #0 { 897; GFX6-LABEL: bfe_u32_constant_fold_test_16: 898; GFX6: ; %bb.0: 899; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 900; GFX6-NEXT: s_bfe_u32 s3, -1, 0x70001 901; GFX6-NEXT: v_mov_b32_e32 v0, s3 902; GFX6-NEXT: s_mov_b32 s2, -1 903; GFX6-NEXT: s_mov_b32 s3, 0xf000 904; GFX6-NEXT: s_waitcnt lgkmcnt(0) 905; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 906; GFX6-NEXT: s_endpgm 907 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 1, i32 7) 908 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 909 ret void 910} 911 912define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) #0 { 913; GFX6-LABEL: bfe_u32_constant_fold_test_17: 914; GFX6: ; %bb.0: 915; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 916; GFX6-NEXT: s_mov_b32 s2, 0x1f0001 917; GFX6-NEXT: s_bfe_u32 s2, 0xff, s2 918; GFX6-NEXT: v_mov_b32_e32 v0, s2 919; GFX6-NEXT: s_mov_b32 s2, -1 920; GFX6-NEXT: s_mov_b32 s3, 0xf000 921; GFX6-NEXT: s_waitcnt lgkmcnt(0) 922; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 923; GFX6-NEXT: s_endpgm 924 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 1, i32 31) 925 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 926 ret void 927} 928 929define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) #0 { 930; GFX6-LABEL: bfe_u32_constant_fold_test_18: 931; GFX6: ; %bb.0: 932; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 933; GFX6-NEXT: s_mov_b32 s2, 0x1001f 934; GFX6-NEXT: s_bfe_u32 s2, 0xff, s2 935; GFX6-NEXT: v_mov_b32_e32 v0, s2 936; GFX6-NEXT: s_mov_b32 s2, -1 937; GFX6-NEXT: s_mov_b32 s3, 0xf000 938; GFX6-NEXT: s_waitcnt lgkmcnt(0) 939; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 940; GFX6-NEXT: s_endpgm 941 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 31, i32 1) 942 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 943 ret void 944} 945 946; Make sure that SimplifyDemandedBits doesn't cause the and to be 947; reduced to the bits demanded by the bfe. 948 949; XXX: The operand to v_bfe_u32 could also just directly be the load register. 950define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0, 951; GFX6-LABEL: simplify_bfe_u32_multi_use_arg: 952; GFX6: ; %bb.0: 953; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 954; GFX6-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xb 955; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd 956; GFX6-NEXT: s_mov_b32 s6, -1 957; GFX6-NEXT: s_mov_b32 s7, 0xf000 958; GFX6-NEXT: s_mov_b64 s[10:11], s[6:7] 959; GFX6-NEXT: s_waitcnt lgkmcnt(0) 960; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 961; GFX6-NEXT: s_waitcnt lgkmcnt(0) 962; GFX6-NEXT: s_and_b32 s0, s0, 63 963; GFX6-NEXT: s_bfe_u32 s1, s0, 0x20002 964; GFX6-NEXT: v_mov_b32_e32 v1, s1 965; GFX6-NEXT: v_mov_b32_e32 v0, s0 966; GFX6-NEXT: buffer_store_dword v1, off, s[4:7], 0 967; GFX6-NEXT: buffer_store_dword v0, off, s[8:11], 0 968; GFX6-NEXT: s_endpgm 969 i32 addrspace(1)* %out1, 970 i32 addrspace(1)* %in) #0 { 971 %src = load i32, i32 addrspace(1)* %in, align 4 972 %and = and i32 %src, 63 973 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2) 974 store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4 975 store i32 %and, i32 addrspace(1)* %out1, align 4 976 ret void 977} 978 979define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 { 980; GFX6-LABEL: lshr_and: 981; GFX6: ; %bb.0: 982; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 983; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 984; GFX6-NEXT: s_mov_b32 s6, -1 985; GFX6-NEXT: s_mov_b32 s7, 0xf000 986; GFX6-NEXT: s_waitcnt lgkmcnt(0) 987; GFX6-NEXT: s_lshr_b32 s0, s0, 6 988; GFX6-NEXT: s_and_b32 s0, s0, 7 989; GFX6-NEXT: v_mov_b32_e32 v0, s0 990; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 991; GFX6-NEXT: s_endpgm 992 %b = lshr i32 %a, 6 993 %c = and i32 %b, 7 994 store i32 %c, i32 addrspace(1)* %out, align 8 995 ret void 996} 997 998define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 999; GFX6-LABEL: v_lshr_and: 1000; GFX6: ; %bb.0: 1001; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1002; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 1003; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 1004; GFX6-NEXT: s_mov_b32 s6, -1 1005; GFX6-NEXT: s_mov_b32 s7, 0xf000 1006; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1007; GFX6-NEXT: s_lshr_b32 s0, s2, s0 1008; GFX6-NEXT: s_and_b32 s0, s0, 7 1009; GFX6-NEXT: v_mov_b32_e32 v0, s0 1010; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 1011; GFX6-NEXT: s_endpgm 1012 %c = lshr i32 %a, %b 1013 %d = and i32 %c, 7 1014 store i32 %d, i32 addrspace(1)* %out, align 8 1015 ret void 1016} 1017 1018define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 { 1019; GFX6-LABEL: and_lshr: 1020; GFX6: ; %bb.0: 1021; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1022; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 1023; GFX6-NEXT: s_mov_b32 s6, -1 1024; GFX6-NEXT: s_mov_b32 s7, 0xf000 1025; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1026; GFX6-NEXT: s_and_b32 s0, s0, 0x1c0 1027; GFX6-NEXT: s_lshr_b32 s0, s0, 6 1028; GFX6-NEXT: v_mov_b32_e32 v0, s0 1029; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 1030; GFX6-NEXT: s_endpgm 1031 %b = and i32 %a, 448 1032 %c = lshr i32 %b, 6 1033 store i32 %c, i32 addrspace(1)* %out, align 8 1034 ret void 1035} 1036 1037define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 { 1038; GFX6-LABEL: and_lshr2: 1039; GFX6: ; %bb.0: 1040; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1041; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 1042; GFX6-NEXT: s_mov_b32 s6, -1 1043; GFX6-NEXT: s_mov_b32 s7, 0xf000 1044; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1045; GFX6-NEXT: s_and_b32 s0, s0, 0x1ff 1046; GFX6-NEXT: s_lshr_b32 s0, s0, 6 1047; GFX6-NEXT: v_mov_b32_e32 v0, s0 1048; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 1049; GFX6-NEXT: s_endpgm 1050 %b = and i32 %a, 511 1051 %c = lshr i32 %b, 6 1052 store i32 %c, i32 addrspace(1)* %out, align 8 1053 ret void 1054} 1055 1056define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 { 1057; GFX6-LABEL: shl_lshr: 1058; GFX6: ; %bb.0: 1059; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1060; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 1061; GFX6-NEXT: s_mov_b32 s6, -1 1062; GFX6-NEXT: s_mov_b32 s7, 0xf000 1063; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1064; GFX6-NEXT: s_lshl_b32 s0, s0, 9 1065; GFX6-NEXT: s_lshr_b32 s0, s0, 11 1066; GFX6-NEXT: v_mov_b32_e32 v0, s0 1067; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 1068; GFX6-NEXT: s_endpgm 1069 %b = shl i32 %a, 9 1070 %c = lshr i32 %b, 11 1071 store i32 %c, i32 addrspace(1)* %out, align 8 1072 ret void 1073} 1074 1075declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1 1076declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) #1 1077 1078attributes #0 = { nounwind } 1079attributes #1 = { nounwind readnone } 1080