1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s 4 5; Test end to end matching of addressing modes when MUBUF is used for 6; global memory. 7 8define amdgpu_ps void @mubuf_store_sgpr_ptr(i32 addrspace(1)* inreg %ptr) { 9; GFX6-LABEL: mubuf_store_sgpr_ptr: 10; GFX6: ; %bb.0: 11; GFX6-NEXT: s_mov_b32 s0, s2 12; GFX6-NEXT: s_mov_b32 s1, s3 13; GFX6-NEXT: v_mov_b32_e32 v0, 0 14; GFX6-NEXT: s_mov_b32 s2, -1 15; GFX6-NEXT: s_mov_b32 s3, 0xf000 16; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 17; GFX6-NEXT: s_endpgm 18; 19; GFX7-LABEL: mubuf_store_sgpr_ptr: 20; GFX7: ; %bb.0: 21; GFX7-NEXT: s_mov_b32 s0, s2 22; GFX7-NEXT: s_mov_b32 s1, s3 23; GFX7-NEXT: v_mov_b32_e32 v0, 0 24; GFX7-NEXT: s_mov_b32 s2, -1 25; GFX7-NEXT: s_mov_b32 s3, 0xf000 26; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 27; GFX7-NEXT: s_endpgm 28 store i32 0, i32 addrspace(1)* %ptr 29 ret void 30} 31 32define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr) { 33; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095: 34; GFX6: ; %bb.0: 35; GFX6-NEXT: s_mov_b32 s0, s2 36; GFX6-NEXT: s_mov_b32 s1, s3 37; GFX6-NEXT: v_mov_b32_e32 v0, 0 38; GFX6-NEXT: s_mov_b32 s2, -1 39; GFX6-NEXT: s_mov_b32 s3, 0xf000 40; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 41; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], s4 42; GFX6-NEXT: s_endpgm 43; 44; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4095: 45; GFX7: ; %bb.0: 46; GFX7-NEXT: s_mov_b32 s0, s2 47; GFX7-NEXT: s_mov_b32 s1, s3 48; GFX7-NEXT: v_mov_b32_e32 v0, 0 49; GFX7-NEXT: s_mov_b32 s2, -1 50; GFX7-NEXT: s_mov_b32 s3, 0xf000 51; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 52; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s4 53; GFX7-NEXT: s_endpgm 54 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095 55 store i32 0, i32 addrspace(1)* %gep 56 ret void 57} 58 59define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr) { 60; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967296: 61; GFX6: ; %bb.0: 62; GFX6-NEXT: s_mov_b32 s4, 0 63; GFX6-NEXT: s_mov_b32 s5, 4 64; GFX6-NEXT: v_mov_b32_e32 v0, s4 65; GFX6-NEXT: s_mov_b32 s0, s2 66; GFX6-NEXT: s_mov_b32 s1, s3 67; GFX6-NEXT: v_mov_b32_e32 v2, 0 68; GFX6-NEXT: s_mov_b32 s3, 0xf000 69; GFX6-NEXT: s_mov_b32 s2, s4 70; GFX6-NEXT: v_mov_b32_e32 v1, s5 71; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 72; GFX6-NEXT: s_endpgm 73; 74; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4294967296: 75; GFX7: ; %bb.0: 76; GFX7-NEXT: s_mov_b32 s4, 0 77; GFX7-NEXT: s_mov_b32 s5, 4 78; GFX7-NEXT: v_mov_b32_e32 v0, s4 79; GFX7-NEXT: s_mov_b32 s0, s2 80; GFX7-NEXT: s_mov_b32 s1, s3 81; GFX7-NEXT: v_mov_b32_e32 v2, 0 82; GFX7-NEXT: s_mov_b32 s3, 0xf000 83; GFX7-NEXT: s_mov_b32 s2, s4 84; GFX7-NEXT: v_mov_b32_e32 v1, s5 85; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 86; GFX7-NEXT: s_endpgm 87 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296 88 store i32 0, i32 addrspace(1)* %gep 89 ret void 90} 91 92define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967297(i32 addrspace(1)* inreg %ptr) { 93; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967297: 94; GFX6: ; %bb.0: 95; GFX6-NEXT: s_mov_b32 s4, 4 96; GFX6-NEXT: s_mov_b32 s5, s4 97; GFX6-NEXT: v_mov_b32_e32 v0, s4 98; GFX6-NEXT: s_mov_b32 s0, s2 99; GFX6-NEXT: s_mov_b32 s1, s3 100; GFX6-NEXT: s_mov_b32 s2, 0 101; GFX6-NEXT: v_mov_b32_e32 v2, 0 102; GFX6-NEXT: s_mov_b32 s3, 0xf000 103; GFX6-NEXT: v_mov_b32_e32 v1, s5 104; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 105; GFX6-NEXT: s_endpgm 106; 107; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4294967297: 108; GFX7: ; %bb.0: 109; GFX7-NEXT: s_mov_b32 s4, 4 110; GFX7-NEXT: s_mov_b32 s5, s4 111; GFX7-NEXT: v_mov_b32_e32 v0, s4 112; GFX7-NEXT: s_mov_b32 s0, s2 113; GFX7-NEXT: s_mov_b32 s1, s3 114; GFX7-NEXT: s_mov_b32 s2, 0 115; GFX7-NEXT: v_mov_b32_e32 v2, 0 116; GFX7-NEXT: s_mov_b32 s3, 0xf000 117; GFX7-NEXT: v_mov_b32_e32 v1, s5 118; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 119; GFX7-NEXT: s_endpgm 120 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967297 121 store i32 0, i32 addrspace(1)* %gep 122 ret void 123} 124 125define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4096(i32 addrspace(1)* inreg %ptr) { 126; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4096: 127; GFX6: ; %bb.0: 128; GFX6-NEXT: s_mov_b32 s0, s2 129; GFX6-NEXT: s_mov_b32 s1, s3 130; GFX6-NEXT: v_mov_b32_e32 v0, 0 131; GFX6-NEXT: s_mov_b32 s2, -1 132; GFX6-NEXT: s_mov_b32 s3, 0xf000 133; GFX6-NEXT: s_movk_i32 s4, 0x4000 134; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], s4 135; GFX6-NEXT: s_endpgm 136; 137; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4096: 138; GFX7: ; %bb.0: 139; GFX7-NEXT: s_mov_b32 s0, s2 140; GFX7-NEXT: s_mov_b32 s1, s3 141; GFX7-NEXT: v_mov_b32_e32 v0, 0 142; GFX7-NEXT: s_mov_b32 s2, -1 143; GFX7-NEXT: s_mov_b32 s3, 0xf000 144; GFX7-NEXT: s_movk_i32 s4, 0x4000 145; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s4 146; GFX7-NEXT: s_endpgm 147 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4096 148 store i32 0, i32 addrspace(1)* %gep 149 ret void 150} 151 152define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr) { 153; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4095: 154; GFX6: ; %bb.0: 155; GFX6-NEXT: s_mov_b32 s2, 0 156; GFX6-NEXT: v_mov_b32_e32 v2, 0 157; GFX6-NEXT: s_mov_b32 s3, 0xf000 158; GFX6-NEXT: s_mov_b64 s[0:1], 0 159; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 160; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64 161; GFX6-NEXT: s_endpgm 162; 163; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4095: 164; GFX7: ; %bb.0: 165; GFX7-NEXT: s_mov_b32 s2, 0 166; GFX7-NEXT: v_mov_b32_e32 v2, 0 167; GFX7-NEXT: s_mov_b32 s3, 0xf000 168; GFX7-NEXT: s_mov_b64 s[0:1], 0 169; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 170; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64 171; GFX7-NEXT: s_endpgm 172 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095 173 store i32 0, i32 addrspace(1)* %gep 174 ret void 175} 176 177define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr) { 178; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967296: 179; GFX6: ; %bb.0: 180; GFX6-NEXT: s_mov_b32 s0, 0 181; GFX6-NEXT: s_mov_b32 s1, 4 182; GFX6-NEXT: v_mov_b32_e32 v2, 0 183; GFX6-NEXT: s_mov_b32 s3, 0xf000 184; GFX6-NEXT: s_mov_b32 s2, s0 185; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 186; GFX6-NEXT: s_endpgm 187; 188; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4294967296: 189; GFX7: ; %bb.0: 190; GFX7-NEXT: s_mov_b32 s0, 0 191; GFX7-NEXT: s_mov_b32 s1, 4 192; GFX7-NEXT: v_mov_b32_e32 v2, 0 193; GFX7-NEXT: s_mov_b32 s3, 0xf000 194; GFX7-NEXT: s_mov_b32 s2, s0 195; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 196; GFX7-NEXT: s_endpgm 197 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296 198 store i32 0, i32 addrspace(1)* %gep 199 ret void 200} 201 202define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(i32 addrspace(1)* %ptr) { 203; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967297: 204; GFX6: ; %bb.0: 205; GFX6-NEXT: s_mov_b32 s0, 4 206; GFX6-NEXT: s_mov_b32 s1, s0 207; GFX6-NEXT: s_mov_b32 s2, 0 208; GFX6-NEXT: v_mov_b32_e32 v2, 0 209; GFX6-NEXT: s_mov_b32 s3, 0xf000 210; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 211; GFX6-NEXT: s_endpgm 212; 213; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4294967297: 214; GFX7: ; %bb.0: 215; GFX7-NEXT: s_mov_b32 s0, 4 216; GFX7-NEXT: s_mov_b32 s1, s0 217; GFX7-NEXT: s_mov_b32 s2, 0 218; GFX7-NEXT: v_mov_b32_e32 v2, 0 219; GFX7-NEXT: s_mov_b32 s3, 0xf000 220; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 221; GFX7-NEXT: s_endpgm 222 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967297 223 store i32 0, i32 addrspace(1)* %gep 224 ret void 225} 226 227define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4096(i32 addrspace(1)* %ptr) { 228; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4096: 229; GFX6: ; %bb.0: 230; GFX6-NEXT: s_mov_b32 s2, 0 231; GFX6-NEXT: v_mov_b32_e32 v2, 0 232; GFX6-NEXT: s_mov_b32 s3, 0xf000 233; GFX6-NEXT: s_mov_b64 s[0:1], 0 234; GFX6-NEXT: s_movk_i32 s4, 0x4000 235; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64 236; GFX6-NEXT: s_endpgm 237; 238; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4096: 239; GFX7: ; %bb.0: 240; GFX7-NEXT: s_mov_b32 s2, 0 241; GFX7-NEXT: v_mov_b32_e32 v2, 0 242; GFX7-NEXT: s_mov_b32 s3, 0xf000 243; GFX7-NEXT: s_mov_b64 s[0:1], 0 244; GFX7-NEXT: s_movk_i32 s4, 0x4000 245; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64 246; GFX7-NEXT: s_endpgm 247 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4096 248 store i32 0, i32 addrspace(1)* %gep 249 ret void 250} 251 252define amdgpu_ps void @mubuf_store_sgpr_ptr_sgpr_offset(i32 addrspace(1)* inreg %ptr, i32 inreg %soffset) { 253; GFX6-LABEL: mubuf_store_sgpr_ptr_sgpr_offset: 254; GFX6: ; %bb.0: 255; GFX6-NEXT: s_mov_b32 s0, s2 256; GFX6-NEXT: s_mov_b32 s1, s3 257; GFX6-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x200000 258; GFX6-NEXT: s_lshl_b64 s[4:5], s[2:3], 2 259; GFX6-NEXT: v_mov_b32_e32 v0, s4 260; GFX6-NEXT: s_mov_b32 s2, 0 261; GFX6-NEXT: v_mov_b32_e32 v2, 0 262; GFX6-NEXT: s_mov_b32 s3, 0xf000 263; GFX6-NEXT: v_mov_b32_e32 v1, s5 264; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 265; GFX6-NEXT: s_endpgm 266; 267; GFX7-LABEL: mubuf_store_sgpr_ptr_sgpr_offset: 268; GFX7: ; %bb.0: 269; GFX7-NEXT: s_mov_b32 s0, s2 270; GFX7-NEXT: s_mov_b32 s1, s3 271; GFX7-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x200000 272; GFX7-NEXT: s_lshl_b64 s[4:5], s[2:3], 2 273; GFX7-NEXT: v_mov_b32_e32 v0, s4 274; GFX7-NEXT: s_mov_b32 s2, 0 275; GFX7-NEXT: v_mov_b32_e32 v2, 0 276; GFX7-NEXT: s_mov_b32 s3, 0xf000 277; GFX7-NEXT: v_mov_b32_e32 v1, s5 278; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 279; GFX7-NEXT: s_endpgm 280 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset 281 store i32 0, i32 addrspace(1)* %gep 282 ret void 283} 284 285define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) { 286; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset: 287; GFX6: ; %bb.0: 288; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 289; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 290; GFX6-NEXT: s_mov_b32 s2, 0 291; GFX6-NEXT: v_mov_b32_e32 v2, 0 292; GFX6-NEXT: s_mov_b32 s3, 0xf000 293; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 294; GFX6-NEXT: s_endpgm 295; 296; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset: 297; GFX7: ; %bb.0: 298; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 299; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 300; GFX7-NEXT: s_mov_b32 s2, 0 301; GFX7-NEXT: v_mov_b32_e32 v2, 0 302; GFX7-NEXT: s_mov_b32 s3, 0xf000 303; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 304; GFX7-NEXT: s_endpgm 305 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset 306 store i32 0, i32 addrspace(1)* %gep 307 ret void 308} 309 310define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset_offset256(i32 addrspace(1)* %ptr, i32 inreg %soffset) { 311; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset_offset256: 312; GFX6: ; %bb.0: 313; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 314; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 315; GFX6-NEXT: s_mov_b32 s2, 0 316; GFX6-NEXT: v_mov_b32_e32 v2, 0 317; GFX6-NEXT: s_mov_b32 s3, 0xf000 318; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024 319; GFX6-NEXT: s_endpgm 320; 321; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset_offset256: 322; GFX7: ; %bb.0: 323; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 324; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 325; GFX7-NEXT: s_mov_b32 s2, 0 326; GFX7-NEXT: v_mov_b32_e32 v2, 0 327; GFX7-NEXT: s_mov_b32 s3, 0xf000 328; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024 329; GFX7-NEXT: s_endpgm 330 %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset 331 %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 256 332 store i32 0, i32 addrspace(1)* %gep1 333 ret void 334} 335 336define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset256_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) { 337; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset: 338; GFX6: ; %bb.0: 339; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 340; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 341; GFX6-NEXT: s_mov_b32 s2, 0 342; GFX6-NEXT: v_mov_b32_e32 v2, 0 343; GFX6-NEXT: s_mov_b32 s3, 0xf000 344; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024 345; GFX6-NEXT: s_endpgm 346; 347; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset: 348; GFX7: ; %bb.0: 349; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 350; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 351; GFX7-NEXT: s_mov_b32 s2, 0 352; GFX7-NEXT: v_mov_b32_e32 v2, 0 353; GFX7-NEXT: s_mov_b32 s3, 0xf000 354; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024 355; GFX7-NEXT: s_endpgm 356 %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 256 357 %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %soffset 358 store i32 0, i32 addrspace(1)* %gep1 359 ret void 360} 361 362define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) { 363; GFX6-LABEL: mubuf_store_sgpr_ptr_vgpr_offset: 364; GFX6: ; %bb.0: 365; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 366; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 367; GFX6-NEXT: s_mov_b32 s0, s2 368; GFX6-NEXT: s_mov_b32 s1, s3 369; GFX6-NEXT: s_mov_b32 s2, 0 370; GFX6-NEXT: v_mov_b32_e32 v2, 0 371; GFX6-NEXT: s_mov_b32 s3, 0xf000 372; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 373; GFX6-NEXT: s_endpgm 374; 375; GFX7-LABEL: mubuf_store_sgpr_ptr_vgpr_offset: 376; GFX7: ; %bb.0: 377; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 378; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 379; GFX7-NEXT: s_mov_b32 s0, s2 380; GFX7-NEXT: s_mov_b32 s1, s3 381; GFX7-NEXT: s_mov_b32 s2, 0 382; GFX7-NEXT: v_mov_b32_e32 v2, 0 383; GFX7-NEXT: s_mov_b32 s3, 0xf000 384; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 385; GFX7-NEXT: s_endpgm 386 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset 387 store i32 0, i32 addrspace(1)* %gep 388 ret void 389} 390 391define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset_offset4095(i32 addrspace(1)* inreg %ptr, i32 %voffset) { 392; GFX6-LABEL: mubuf_store_sgpr_ptr_vgpr_offset_offset4095: 393; GFX6: ; %bb.0: 394; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 395; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 396; GFX6-NEXT: s_mov_b32 s0, s2 397; GFX6-NEXT: s_mov_b32 s1, s3 398; GFX6-NEXT: s_mov_b32 s2, 0 399; GFX6-NEXT: v_mov_b32_e32 v2, 0 400; GFX6-NEXT: s_mov_b32 s3, 0xf000 401; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 402; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64 403; GFX6-NEXT: s_endpgm 404; 405; GFX7-LABEL: mubuf_store_sgpr_ptr_vgpr_offset_offset4095: 406; GFX7: ; %bb.0: 407; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 408; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 409; GFX7-NEXT: s_mov_b32 s0, s2 410; GFX7-NEXT: s_mov_b32 s1, s3 411; GFX7-NEXT: s_mov_b32 s2, 0 412; GFX7-NEXT: v_mov_b32_e32 v2, 0 413; GFX7-NEXT: s_mov_b32 s3, 0xf000 414; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 415; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64 416; GFX7-NEXT: s_endpgm 417 %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset 418 %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 4095 419 store i32 0, i32 addrspace(1)* %gep1 420 ret void 421} 422define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) { 423; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset: 424; GFX6: ; %bb.0: 425; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 426; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 427; GFX6-NEXT: s_mov_b32 s0, s2 428; GFX6-NEXT: s_mov_b32 s1, s3 429; GFX6-NEXT: s_mov_b32 s2, 0 430; GFX6-NEXT: v_mov_b32_e32 v2, 0 431; GFX6-NEXT: s_mov_b32 s3, 0xf000 432; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 433; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64 434; GFX6-NEXT: s_endpgm 435; 436; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset: 437; GFX7: ; %bb.0: 438; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 439; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 440; GFX7-NEXT: s_mov_b32 s0, s2 441; GFX7-NEXT: s_mov_b32 s1, s3 442; GFX7-NEXT: s_mov_b32 s2, 0 443; GFX7-NEXT: v_mov_b32_e32 v2, 0 444; GFX7-NEXT: s_mov_b32 s3, 0xf000 445; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 446; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64 447; GFX7-NEXT: s_endpgm 448 %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 4095 449 %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %voffset 450 store i32 0, i32 addrspace(1)* %gep1 451 ret void 452} 453 454define amdgpu_ps float @mubuf_load_sgpr_ptr(float addrspace(1)* inreg %ptr) { 455; GFX6-LABEL: mubuf_load_sgpr_ptr: 456; GFX6: ; %bb.0: 457; GFX6-NEXT: s_mov_b32 s0, s2 458; GFX6-NEXT: s_mov_b32 s1, s3 459; GFX6-NEXT: s_mov_b32 s2, -1 460; GFX6-NEXT: s_mov_b32 s3, 0xf000 461; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 462; GFX6-NEXT: s_waitcnt vmcnt(0) 463; GFX6-NEXT: ; return to shader part epilog 464; 465; GFX7-LABEL: mubuf_load_sgpr_ptr: 466; GFX7: ; %bb.0: 467; GFX7-NEXT: s_mov_b32 s0, s2 468; GFX7-NEXT: s_mov_b32 s1, s3 469; GFX7-NEXT: s_mov_b32 s2, -1 470; GFX7-NEXT: s_mov_b32 s3, 0xf000 471; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 472; GFX7-NEXT: s_waitcnt vmcnt(0) 473; GFX7-NEXT: ; return to shader part epilog 474 %val = load volatile float, float addrspace(1)* %ptr 475 ret float %val 476} 477 478define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095(float addrspace(1)* inreg %ptr) { 479; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4095: 480; GFX6: ; %bb.0: 481; GFX6-NEXT: s_mov_b32 s0, s2 482; GFX6-NEXT: s_mov_b32 s1, s3 483; GFX6-NEXT: s_mov_b32 s2, -1 484; GFX6-NEXT: s_mov_b32 s3, 0xf000 485; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 486; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], s4 glc 487; GFX6-NEXT: s_waitcnt vmcnt(0) 488; GFX6-NEXT: ; return to shader part epilog 489; 490; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4095: 491; GFX7: ; %bb.0: 492; GFX7-NEXT: s_mov_b32 s0, s2 493; GFX7-NEXT: s_mov_b32 s1, s3 494; GFX7-NEXT: s_mov_b32 s2, -1 495; GFX7-NEXT: s_mov_b32 s3, 0xf000 496; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 497; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s4 glc 498; GFX7-NEXT: s_waitcnt vmcnt(0) 499; GFX7-NEXT: ; return to shader part epilog 500 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4095 501 %val = load volatile float, float addrspace(1)* %gep 502 ret float %val 503} 504 505define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967296(float addrspace(1)* inreg %ptr) { 506; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967296: 507; GFX6: ; %bb.0: 508; GFX6-NEXT: s_mov_b32 s4, 0 509; GFX6-NEXT: s_mov_b32 s5, 4 510; GFX6-NEXT: v_mov_b32_e32 v0, s4 511; GFX6-NEXT: s_mov_b32 s0, s2 512; GFX6-NEXT: s_mov_b32 s1, s3 513; GFX6-NEXT: s_mov_b32 s3, 0xf000 514; GFX6-NEXT: s_mov_b32 s2, s4 515; GFX6-NEXT: v_mov_b32_e32 v1, s5 516; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc 517; GFX6-NEXT: s_waitcnt vmcnt(0) 518; GFX6-NEXT: ; return to shader part epilog 519; 520; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4294967296: 521; GFX7: ; %bb.0: 522; GFX7-NEXT: s_mov_b32 s4, 0 523; GFX7-NEXT: s_mov_b32 s5, 4 524; GFX7-NEXT: v_mov_b32_e32 v0, s4 525; GFX7-NEXT: s_mov_b32 s0, s2 526; GFX7-NEXT: s_mov_b32 s1, s3 527; GFX7-NEXT: s_mov_b32 s3, 0xf000 528; GFX7-NEXT: s_mov_b32 s2, s4 529; GFX7-NEXT: v_mov_b32_e32 v1, s5 530; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc 531; GFX7-NEXT: s_waitcnt vmcnt(0) 532; GFX7-NEXT: ; return to shader part epilog 533 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967296 534 %val = load volatile float, float addrspace(1)* %gep 535 ret float %val 536} 537 538define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967297(float addrspace(1)* inreg %ptr) { 539; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967297: 540; GFX6: ; %bb.0: 541; GFX6-NEXT: s_mov_b32 s4, 4 542; GFX6-NEXT: s_mov_b32 s5, s4 543; GFX6-NEXT: v_mov_b32_e32 v0, s4 544; GFX6-NEXT: s_mov_b32 s0, s2 545; GFX6-NEXT: s_mov_b32 s1, s3 546; GFX6-NEXT: s_mov_b32 s2, 0 547; GFX6-NEXT: s_mov_b32 s3, 0xf000 548; GFX6-NEXT: v_mov_b32_e32 v1, s5 549; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc 550; GFX6-NEXT: s_waitcnt vmcnt(0) 551; GFX6-NEXT: ; return to shader part epilog 552; 553; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4294967297: 554; GFX7: ; %bb.0: 555; GFX7-NEXT: s_mov_b32 s4, 4 556; GFX7-NEXT: s_mov_b32 s5, s4 557; GFX7-NEXT: v_mov_b32_e32 v0, s4 558; GFX7-NEXT: s_mov_b32 s0, s2 559; GFX7-NEXT: s_mov_b32 s1, s3 560; GFX7-NEXT: s_mov_b32 s2, 0 561; GFX7-NEXT: s_mov_b32 s3, 0xf000 562; GFX7-NEXT: v_mov_b32_e32 v1, s5 563; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc 564; GFX7-NEXT: s_waitcnt vmcnt(0) 565; GFX7-NEXT: ; return to shader part epilog 566 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967297 567 %val = load volatile float, float addrspace(1)* %gep 568 ret float %val 569} 570 571define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4096(float addrspace(1)* inreg %ptr) { 572; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4096: 573; GFX6: ; %bb.0: 574; GFX6-NEXT: s_mov_b32 s0, s2 575; GFX6-NEXT: s_mov_b32 s1, s3 576; GFX6-NEXT: s_mov_b32 s2, -1 577; GFX6-NEXT: s_mov_b32 s3, 0xf000 578; GFX6-NEXT: s_movk_i32 s4, 0x4000 579; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], s4 glc 580; GFX6-NEXT: s_waitcnt vmcnt(0) 581; GFX6-NEXT: ; return to shader part epilog 582; 583; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4096: 584; GFX7: ; %bb.0: 585; GFX7-NEXT: s_mov_b32 s0, s2 586; GFX7-NEXT: s_mov_b32 s1, s3 587; GFX7-NEXT: s_mov_b32 s2, -1 588; GFX7-NEXT: s_mov_b32 s3, 0xf000 589; GFX7-NEXT: s_movk_i32 s4, 0x4000 590; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s4 glc 591; GFX7-NEXT: s_waitcnt vmcnt(0) 592; GFX7-NEXT: ; return to shader part epilog 593 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4096 594 %val = load volatile float, float addrspace(1)* %gep 595 ret float %val 596} 597 598define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4095(float addrspace(1)* %ptr) { 599; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4095: 600; GFX6: ; %bb.0: 601; GFX6-NEXT: s_mov_b32 s2, 0 602; GFX6-NEXT: s_mov_b32 s3, 0xf000 603; GFX6-NEXT: s_mov_b64 s[0:1], 0 604; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 605; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc 606; GFX6-NEXT: s_waitcnt vmcnt(0) 607; GFX6-NEXT: ; return to shader part epilog 608; 609; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4095: 610; GFX7: ; %bb.0: 611; GFX7-NEXT: s_mov_b32 s2, 0 612; GFX7-NEXT: s_mov_b32 s3, 0xf000 613; GFX7-NEXT: s_mov_b64 s[0:1], 0 614; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 615; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc 616; GFX7-NEXT: s_waitcnt vmcnt(0) 617; GFX7-NEXT: ; return to shader part epilog 618 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4095 619 %val = load volatile float, float addrspace(1)* %gep 620 ret float %val 621} 622 623define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967296(float addrspace(1)* %ptr) { 624; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4294967296: 625; GFX6: ; %bb.0: 626; GFX6-NEXT: s_mov_b32 s0, 0 627; GFX6-NEXT: s_mov_b32 s1, 4 628; GFX6-NEXT: s_mov_b32 s3, 0xf000 629; GFX6-NEXT: s_mov_b32 s2, s0 630; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc 631; GFX6-NEXT: s_waitcnt vmcnt(0) 632; GFX6-NEXT: ; return to shader part epilog 633; 634; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4294967296: 635; GFX7: ; %bb.0: 636; GFX7-NEXT: s_mov_b32 s0, 0 637; GFX7-NEXT: s_mov_b32 s1, 4 638; GFX7-NEXT: s_mov_b32 s3, 0xf000 639; GFX7-NEXT: s_mov_b32 s2, s0 640; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc 641; GFX7-NEXT: s_waitcnt vmcnt(0) 642; GFX7-NEXT: ; return to shader part epilog 643 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967296 644 %val = load volatile float, float addrspace(1)* %gep 645 ret float %val 646} 647 648define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(float addrspace(1)* %ptr) { 649; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4294967297: 650; GFX6: ; %bb.0: 651; GFX6-NEXT: s_mov_b32 s0, 4 652; GFX6-NEXT: s_mov_b32 s1, s0 653; GFX6-NEXT: s_mov_b32 s2, 0 654; GFX6-NEXT: s_mov_b32 s3, 0xf000 655; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc 656; GFX6-NEXT: s_waitcnt vmcnt(0) 657; GFX6-NEXT: ; return to shader part epilog 658; 659; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4294967297: 660; GFX7: ; %bb.0: 661; GFX7-NEXT: s_mov_b32 s0, 4 662; GFX7-NEXT: s_mov_b32 s1, s0 663; GFX7-NEXT: s_mov_b32 s2, 0 664; GFX7-NEXT: s_mov_b32 s3, 0xf000 665; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc 666; GFX7-NEXT: s_waitcnt vmcnt(0) 667; GFX7-NEXT: ; return to shader part epilog 668 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967297 669 %val = load volatile float, float addrspace(1)* %gep 670 ret float %val 671} 672 673define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4096(float addrspace(1)* %ptr) { 674; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4096: 675; GFX6: ; %bb.0: 676; GFX6-NEXT: s_mov_b32 s2, 0 677; GFX6-NEXT: s_mov_b32 s3, 0xf000 678; GFX6-NEXT: s_mov_b64 s[0:1], 0 679; GFX6-NEXT: s_movk_i32 s4, 0x4000 680; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc 681; GFX6-NEXT: s_waitcnt vmcnt(0) 682; GFX6-NEXT: ; return to shader part epilog 683; 684; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4096: 685; GFX7: ; %bb.0: 686; GFX7-NEXT: s_mov_b32 s2, 0 687; GFX7-NEXT: s_mov_b32 s3, 0xf000 688; GFX7-NEXT: s_mov_b64 s[0:1], 0 689; GFX7-NEXT: s_movk_i32 s4, 0x4000 690; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc 691; GFX7-NEXT: s_waitcnt vmcnt(0) 692; GFX7-NEXT: ; return to shader part epilog 693 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4096 694 %val = load volatile float, float addrspace(1)* %gep 695 ret float %val 696} 697 698define amdgpu_ps float @mubuf_load_sgpr_ptr_sgpr_offset(float addrspace(1)* inreg %ptr, i32 inreg %soffset) { 699; GFX6-LABEL: mubuf_load_sgpr_ptr_sgpr_offset: 700; GFX6: ; %bb.0: 701; GFX6-NEXT: s_mov_b32 s0, s2 702; GFX6-NEXT: s_mov_b32 s1, s3 703; GFX6-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x200000 704; GFX6-NEXT: s_lshl_b64 s[4:5], s[2:3], 2 705; GFX6-NEXT: v_mov_b32_e32 v0, s4 706; GFX6-NEXT: s_mov_b32 s2, 0 707; GFX6-NEXT: s_mov_b32 s3, 0xf000 708; GFX6-NEXT: v_mov_b32_e32 v1, s5 709; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc 710; GFX6-NEXT: s_waitcnt vmcnt(0) 711; GFX6-NEXT: ; return to shader part epilog 712; 713; GFX7-LABEL: mubuf_load_sgpr_ptr_sgpr_offset: 714; GFX7: ; %bb.0: 715; GFX7-NEXT: s_mov_b32 s0, s2 716; GFX7-NEXT: s_mov_b32 s1, s3 717; GFX7-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x200000 718; GFX7-NEXT: s_lshl_b64 s[4:5], s[2:3], 2 719; GFX7-NEXT: v_mov_b32_e32 v0, s4 720; GFX7-NEXT: s_mov_b32 s2, 0 721; GFX7-NEXT: s_mov_b32 s3, 0xf000 722; GFX7-NEXT: v_mov_b32_e32 v1, s5 723; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc 724; GFX7-NEXT: s_waitcnt vmcnt(0) 725; GFX7-NEXT: ; return to shader part epilog 726 %gep = getelementptr float, float addrspace(1)* %ptr, i32 %soffset 727 %val = load volatile float, float addrspace(1)* %gep 728 ret float %val 729} 730 731define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset(float addrspace(1)* %ptr, i32 inreg %soffset) { 732; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset: 733; GFX6: ; %bb.0: 734; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 735; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 736; GFX6-NEXT: s_mov_b32 s2, 0 737; GFX6-NEXT: s_mov_b32 s3, 0xf000 738; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc 739; GFX6-NEXT: s_waitcnt vmcnt(0) 740; GFX6-NEXT: ; return to shader part epilog 741; 742; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset: 743; GFX7: ; %bb.0: 744; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 745; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 746; GFX7-NEXT: s_mov_b32 s2, 0 747; GFX7-NEXT: s_mov_b32 s3, 0xf000 748; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc 749; GFX7-NEXT: s_waitcnt vmcnt(0) 750; GFX7-NEXT: ; return to shader part epilog 751 %gep = getelementptr float, float addrspace(1)* %ptr, i32 %soffset 752 %val = load volatile float, float addrspace(1)* %gep 753 ret float %val 754} 755 756define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset_offset256(float addrspace(1)* %ptr, i32 inreg %soffset) { 757; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset_offset256: 758; GFX6: ; %bb.0: 759; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 760; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 761; GFX6-NEXT: s_mov_b32 s2, 0 762; GFX6-NEXT: s_mov_b32 s3, 0xf000 763; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc 764; GFX6-NEXT: s_waitcnt vmcnt(0) 765; GFX6-NEXT: ; return to shader part epilog 766; 767; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset_offset256: 768; GFX7: ; %bb.0: 769; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 770; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 771; GFX7-NEXT: s_mov_b32 s2, 0 772; GFX7-NEXT: s_mov_b32 s3, 0xf000 773; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc 774; GFX7-NEXT: s_waitcnt vmcnt(0) 775; GFX7-NEXT: ; return to shader part epilog 776 %gep0 = getelementptr float, float addrspace(1)* %ptr, i32 %soffset 777 %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 256 778 %val = load volatile float, float addrspace(1)* %gep1 779 ret float %val 780} 781 782define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset256_offset(float addrspace(1)* %ptr, i32 inreg %soffset) { 783; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset: 784; GFX6: ; %bb.0: 785; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 786; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 787; GFX6-NEXT: s_mov_b32 s2, 0 788; GFX6-NEXT: s_mov_b32 s3, 0xf000 789; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc 790; GFX6-NEXT: s_waitcnt vmcnt(0) 791; GFX6-NEXT: ; return to shader part epilog 792; 793; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset: 794; GFX7: ; %bb.0: 795; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 796; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 797; GFX7-NEXT: s_mov_b32 s2, 0 798; GFX7-NEXT: s_mov_b32 s3, 0xf000 799; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc 800; GFX7-NEXT: s_waitcnt vmcnt(0) 801; GFX7-NEXT: ; return to shader part epilog 802 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 256 803 %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 %soffset 804 %val = load volatile float, float addrspace(1)* %gep1 805 ret float %val 806} 807 808define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset(float addrspace(1)* inreg %ptr, i32 %voffset) { 809; GFX6-LABEL: mubuf_load_sgpr_ptr_vgpr_offset: 810; GFX6: ; %bb.0: 811; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 812; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 813; GFX6-NEXT: s_mov_b32 s0, s2 814; GFX6-NEXT: s_mov_b32 s1, s3 815; GFX6-NEXT: s_mov_b32 s2, 0 816; GFX6-NEXT: s_mov_b32 s3, 0xf000 817; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc 818; GFX6-NEXT: s_waitcnt vmcnt(0) 819; GFX6-NEXT: ; return to shader part epilog 820; 821; GFX7-LABEL: mubuf_load_sgpr_ptr_vgpr_offset: 822; GFX7: ; %bb.0: 823; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 824; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 825; GFX7-NEXT: s_mov_b32 s0, s2 826; GFX7-NEXT: s_mov_b32 s1, s3 827; GFX7-NEXT: s_mov_b32 s2, 0 828; GFX7-NEXT: s_mov_b32 s3, 0xf000 829; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc 830; GFX7-NEXT: s_waitcnt vmcnt(0) 831; GFX7-NEXT: ; return to shader part epilog 832 %gep = getelementptr float, float addrspace(1)* %ptr, i32 %voffset 833 %val = load volatile float, float addrspace(1)* %gep 834 ret float %val 835} 836 837define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset_offset4095(float addrspace(1)* inreg %ptr, i32 %voffset) { 838; GFX6-LABEL: mubuf_load_sgpr_ptr_vgpr_offset_offset4095: 839; GFX6: ; %bb.0: 840; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 841; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 842; GFX6-NEXT: s_mov_b32 s0, s2 843; GFX6-NEXT: s_mov_b32 s1, s3 844; GFX6-NEXT: s_mov_b32 s2, 0 845; GFX6-NEXT: s_mov_b32 s3, 0xf000 846; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 847; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc 848; GFX6-NEXT: s_waitcnt vmcnt(0) 849; GFX6-NEXT: ; return to shader part epilog 850; 851; GFX7-LABEL: mubuf_load_sgpr_ptr_vgpr_offset_offset4095: 852; GFX7: ; %bb.0: 853; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 854; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 855; GFX7-NEXT: s_mov_b32 s0, s2 856; GFX7-NEXT: s_mov_b32 s1, s3 857; GFX7-NEXT: s_mov_b32 s2, 0 858; GFX7-NEXT: s_mov_b32 s3, 0xf000 859; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 860; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc 861; GFX7-NEXT: s_waitcnt vmcnt(0) 862; GFX7-NEXT: ; return to shader part epilog 863 %gep0 = getelementptr float, float addrspace(1)* %ptr, i32 %voffset 864 %gep1 = getelementptr float, float addrspace(1)* %gep0, i64 4095 865 %val = load volatile float, float addrspace(1)* %gep1 866 ret float %val 867} 868define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095_vgpr_offset(float addrspace(1)* inreg %ptr, i32 %voffset) { 869; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4095_vgpr_offset: 870; GFX6: ; %bb.0: 871; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 872; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 873; GFX6-NEXT: s_mov_b32 s0, s2 874; GFX6-NEXT: s_mov_b32 s1, s3 875; GFX6-NEXT: s_mov_b32 s2, 0 876; GFX6-NEXT: s_mov_b32 s3, 0xf000 877; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 878; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc 879; GFX6-NEXT: s_waitcnt vmcnt(0) 880; GFX6-NEXT: ; return to shader part epilog 881; 882; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4095_vgpr_offset: 883; GFX7: ; %bb.0: 884; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 885; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 886; GFX7-NEXT: s_mov_b32 s0, s2 887; GFX7-NEXT: s_mov_b32 s1, s3 888; GFX7-NEXT: s_mov_b32 s2, 0 889; GFX7-NEXT: s_mov_b32 s3, 0xf000 890; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 891; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc 892; GFX7-NEXT: s_waitcnt vmcnt(0) 893; GFX7-NEXT: ; return to shader part epilog 894 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 4095 895 %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 %voffset 896 %val = load volatile float, float addrspace(1)* %gep1 897 ret float %val 898} 899 900define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr) { 901; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4095: 902; GFX6: ; %bb.0: 903; GFX6-NEXT: s_mov_b32 s0, s2 904; GFX6-NEXT: s_mov_b32 s1, s3 905; GFX6-NEXT: v_mov_b32_e32 v0, 2 906; GFX6-NEXT: s_mov_b32 s2, -1 907; GFX6-NEXT: s_mov_b32 s3, 0xf000 908; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 909; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 910; GFX6-NEXT: buffer_atomic_add v0, off, s[0:3], s4 glc 911; GFX6-NEXT: s_waitcnt vmcnt(0) 912; GFX6-NEXT: buffer_wbinvl1 913; GFX6-NEXT: s_waitcnt expcnt(0) 914; GFX6-NEXT: ; return to shader part epilog 915; 916; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4095: 917; GFX7: ; %bb.0: 918; GFX7-NEXT: s_mov_b32 s0, s2 919; GFX7-NEXT: s_mov_b32 s1, s3 920; GFX7-NEXT: v_mov_b32_e32 v0, 2 921; GFX7-NEXT: s_mov_b32 s2, -1 922; GFX7-NEXT: s_mov_b32 s3, 0xf000 923; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 924; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 925; GFX7-NEXT: buffer_atomic_add v0, off, s[0:3], s4 glc 926; GFX7-NEXT: s_waitcnt vmcnt(0) 927; GFX7-NEXT: buffer_wbinvl1 928; GFX7-NEXT: ; return to shader part epilog 929 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095 930 %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst 931 %cast = bitcast i32 %result to float 932 ret float %cast 933} 934 935define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr) { 936; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4294967296: 937; GFX6: ; %bb.0: 938; GFX6-NEXT: s_mov_b32 s4, 0 939; GFX6-NEXT: s_mov_b32 s5, 4 940; GFX6-NEXT: v_mov_b32_e32 v1, s4 941; GFX6-NEXT: s_mov_b32 s0, s2 942; GFX6-NEXT: s_mov_b32 s1, s3 943; GFX6-NEXT: v_mov_b32_e32 v0, 2 944; GFX6-NEXT: s_mov_b32 s3, 0xf000 945; GFX6-NEXT: s_mov_b32 s2, s4 946; GFX6-NEXT: v_mov_b32_e32 v2, s5 947; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 948; GFX6-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc 949; GFX6-NEXT: s_waitcnt vmcnt(0) 950; GFX6-NEXT: buffer_wbinvl1 951; GFX6-NEXT: s_waitcnt expcnt(0) 952; GFX6-NEXT: ; return to shader part epilog 953; 954; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4294967296: 955; GFX7: ; %bb.0: 956; GFX7-NEXT: s_mov_b32 s4, 0 957; GFX7-NEXT: s_mov_b32 s5, 4 958; GFX7-NEXT: v_mov_b32_e32 v1, s4 959; GFX7-NEXT: s_mov_b32 s0, s2 960; GFX7-NEXT: s_mov_b32 s1, s3 961; GFX7-NEXT: v_mov_b32_e32 v0, 2 962; GFX7-NEXT: s_mov_b32 s3, 0xf000 963; GFX7-NEXT: s_mov_b32 s2, s4 964; GFX7-NEXT: v_mov_b32_e32 v2, s5 965; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 966; GFX7-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc 967; GFX7-NEXT: s_waitcnt vmcnt(0) 968; GFX7-NEXT: buffer_wbinvl1 969; GFX7-NEXT: ; return to shader part epilog 970 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296 971 %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst 972 %cast = bitcast i32 %result to float 973 ret float %cast 974} 975 976define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr) { 977; GFX6-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4095: 978; GFX6: ; %bb.0: 979; GFX6-NEXT: v_mov_b32_e32 v2, 2 980; GFX6-NEXT: s_mov_b32 s2, 0 981; GFX6-NEXT: s_mov_b32 s3, 0xf000 982; GFX6-NEXT: s_mov_b64 s[0:1], 0 983; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 984; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 985; GFX6-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], s4 addr64 glc 986; GFX6-NEXT: s_waitcnt vmcnt(0) 987; GFX6-NEXT: buffer_wbinvl1 988; GFX6-NEXT: v_mov_b32_e32 v0, v2 989; GFX6-NEXT: s_waitcnt expcnt(0) 990; GFX6-NEXT: ; return to shader part epilog 991; 992; GFX7-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4095: 993; GFX7: ; %bb.0: 994; GFX7-NEXT: v_mov_b32_e32 v2, 2 995; GFX7-NEXT: s_mov_b32 s2, 0 996; GFX7-NEXT: s_mov_b32 s3, 0xf000 997; GFX7-NEXT: s_mov_b64 s[0:1], 0 998; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 999; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1000; GFX7-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], s4 addr64 glc 1001; GFX7-NEXT: s_waitcnt vmcnt(0) 1002; GFX7-NEXT: buffer_wbinvl1 1003; GFX7-NEXT: v_mov_b32_e32 v0, v2 1004; GFX7-NEXT: ; return to shader part epilog 1005 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095 1006 %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst 1007 %cast = bitcast i32 %result to float 1008 ret float %cast 1009} 1010 1011define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr) { 1012; GFX6-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4294967296: 1013; GFX6: ; %bb.0: 1014; GFX6-NEXT: s_mov_b32 s0, 0 1015; GFX6-NEXT: s_mov_b32 s1, 4 1016; GFX6-NEXT: v_mov_b32_e32 v2, 2 1017; GFX6-NEXT: s_mov_b32 s3, 0xf000 1018; GFX6-NEXT: s_mov_b32 s2, s0 1019; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1020; GFX6-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc 1021; GFX6-NEXT: s_waitcnt vmcnt(0) 1022; GFX6-NEXT: buffer_wbinvl1 1023; GFX6-NEXT: v_mov_b32_e32 v0, v2 1024; GFX6-NEXT: s_waitcnt expcnt(0) 1025; GFX6-NEXT: ; return to shader part epilog 1026; 1027; GFX7-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4294967296: 1028; GFX7: ; %bb.0: 1029; GFX7-NEXT: s_mov_b32 s0, 0 1030; GFX7-NEXT: s_mov_b32 s1, 4 1031; GFX7-NEXT: v_mov_b32_e32 v2, 2 1032; GFX7-NEXT: s_mov_b32 s3, 0xf000 1033; GFX7-NEXT: s_mov_b32 s2, s0 1034; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1035; GFX7-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc 1036; GFX7-NEXT: s_waitcnt vmcnt(0) 1037; GFX7-NEXT: buffer_wbinvl1 1038; GFX7-NEXT: v_mov_b32_e32 v0, v2 1039; GFX7-NEXT: ; return to shader part epilog 1040 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296 1041 %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst 1042 %cast = bitcast i32 %result to float 1043 ret float %cast 1044} 1045 1046define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) { 1047; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_vgpr_offset: 1048; GFX6: ; %bb.0: 1049; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 1050; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 1051; GFX6-NEXT: s_mov_b32 s0, s2 1052; GFX6-NEXT: s_mov_b32 s1, s3 1053; GFX6-NEXT: v_mov_b32_e32 v2, 2 1054; GFX6-NEXT: s_mov_b32 s2, 0 1055; GFX6-NEXT: s_mov_b32 s3, 0xf000 1056; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1057; GFX6-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc 1058; GFX6-NEXT: s_waitcnt vmcnt(0) 1059; GFX6-NEXT: buffer_wbinvl1 1060; GFX6-NEXT: v_mov_b32_e32 v0, v2 1061; GFX6-NEXT: s_waitcnt expcnt(0) 1062; GFX6-NEXT: ; return to shader part epilog 1063; 1064; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_vgpr_offset: 1065; GFX7: ; %bb.0: 1066; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 1067; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 1068; GFX7-NEXT: s_mov_b32 s0, s2 1069; GFX7-NEXT: s_mov_b32 s1, s3 1070; GFX7-NEXT: v_mov_b32_e32 v2, 2 1071; GFX7-NEXT: s_mov_b32 s2, 0 1072; GFX7-NEXT: s_mov_b32 s3, 0xf000 1073; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1074; GFX7-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc 1075; GFX7-NEXT: s_waitcnt vmcnt(0) 1076; GFX7-NEXT: buffer_wbinvl1 1077; GFX7-NEXT: v_mov_b32_e32 v0, v2 1078; GFX7-NEXT: ; return to shader part epilog 1079 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset 1080 %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst 1081 %cast = bitcast i32 %result to float 1082 ret float %cast 1083} 1084 1085define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr, i32 %old, i32 %in) { 1086; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4095: 1087; GFX6: ; %bb.0: 1088; GFX6-NEXT: s_mov_b32 s0, s2 1089; GFX6-NEXT: s_mov_b32 s1, s3 1090; GFX6-NEXT: v_mov_b32_e32 v2, v0 1091; GFX6-NEXT: s_mov_b32 s2, -1 1092; GFX6-NEXT: s_mov_b32 s3, 0xf000 1093; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 1094; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1095; GFX6-NEXT: buffer_atomic_cmpswap v[1:2], off, s[0:3], s4 glc 1096; GFX6-NEXT: s_waitcnt vmcnt(0) 1097; GFX6-NEXT: buffer_wbinvl1 1098; GFX6-NEXT: v_mov_b32_e32 v0, v1 1099; GFX6-NEXT: s_waitcnt expcnt(0) 1100; GFX6-NEXT: ; return to shader part epilog 1101; 1102; GFX7-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4095: 1103; GFX7: ; %bb.0: 1104; GFX7-NEXT: s_mov_b32 s0, s2 1105; GFX7-NEXT: s_mov_b32 s1, s3 1106; GFX7-NEXT: v_mov_b32_e32 v2, v0 1107; GFX7-NEXT: s_mov_b32 s2, -1 1108; GFX7-NEXT: s_mov_b32 s3, 0xf000 1109; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 1110; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1111; GFX7-NEXT: buffer_atomic_cmpswap v[1:2], off, s[0:3], s4 glc 1112; GFX7-NEXT: s_waitcnt vmcnt(0) 1113; GFX7-NEXT: buffer_wbinvl1 1114; GFX7-NEXT: v_mov_b32_e32 v0, v1 1115; GFX7-NEXT: ; return to shader part epilog 1116 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095 1117 %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 1118 %result = extractvalue { i32, i1 } %result.struct, 0 1119 %cast = bitcast i32 %result to float 1120 ret float %cast 1121} 1122 1123define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr, i32 %old, i32 %in) { 1124; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4294967296: 1125; GFX6: ; %bb.0: 1126; GFX6-NEXT: s_mov_b32 s4, 0 1127; GFX6-NEXT: s_mov_b32 s5, 4 1128; GFX6-NEXT: v_mov_b32_e32 v3, s4 1129; GFX6-NEXT: s_mov_b32 s0, s2 1130; GFX6-NEXT: s_mov_b32 s1, s3 1131; GFX6-NEXT: v_mov_b32_e32 v2, v0 1132; GFX6-NEXT: s_mov_b32 s3, 0xf000 1133; GFX6-NEXT: s_mov_b32 s2, s4 1134; GFX6-NEXT: v_mov_b32_e32 v4, s5 1135; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1136; GFX6-NEXT: buffer_atomic_cmpswap v[1:2], v[3:4], s[0:3], 0 addr64 glc 1137; GFX6-NEXT: s_waitcnt vmcnt(0) 1138; GFX6-NEXT: buffer_wbinvl1 1139; GFX6-NEXT: v_mov_b32_e32 v0, v1 1140; GFX6-NEXT: s_waitcnt expcnt(0) 1141; GFX6-NEXT: ; return to shader part epilog 1142; 1143; GFX7-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4294967296: 1144; GFX7: ; %bb.0: 1145; GFX7-NEXT: s_mov_b32 s4, 0 1146; GFX7-NEXT: s_mov_b32 s5, 4 1147; GFX7-NEXT: v_mov_b32_e32 v3, s4 1148; GFX7-NEXT: s_mov_b32 s0, s2 1149; GFX7-NEXT: s_mov_b32 s1, s3 1150; GFX7-NEXT: v_mov_b32_e32 v2, v0 1151; GFX7-NEXT: s_mov_b32 s3, 0xf000 1152; GFX7-NEXT: s_mov_b32 s2, s4 1153; GFX7-NEXT: v_mov_b32_e32 v4, s5 1154; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1155; GFX7-NEXT: buffer_atomic_cmpswap v[1:2], v[3:4], s[0:3], 0 addr64 glc 1156; GFX7-NEXT: s_waitcnt vmcnt(0) 1157; GFX7-NEXT: buffer_wbinvl1 1158; GFX7-NEXT: v_mov_b32_e32 v0, v1 1159; GFX7-NEXT: ; return to shader part epilog 1160 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296 1161 %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 1162 %result = extractvalue { i32, i1 } %result.struct, 0 1163 %cast = bitcast i32 %result to float 1164 ret float %cast 1165} 1166 1167define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr, i32 %old, i32 %in) { 1168; GFX6-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4095: 1169; GFX6: ; %bb.0: 1170; GFX6-NEXT: v_mov_b32_e32 v4, v2 1171; GFX6-NEXT: s_mov_b32 s2, 0 1172; GFX6-NEXT: s_mov_b32 s3, 0xf000 1173; GFX6-NEXT: s_mov_b64 s[0:1], 0 1174; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 1175; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1176; GFX6-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], s4 addr64 glc 1177; GFX6-NEXT: s_waitcnt vmcnt(0) 1178; GFX6-NEXT: buffer_wbinvl1 1179; GFX6-NEXT: v_mov_b32_e32 v0, v3 1180; GFX6-NEXT: s_waitcnt expcnt(0) 1181; GFX6-NEXT: ; return to shader part epilog 1182; 1183; GFX7-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4095: 1184; GFX7: ; %bb.0: 1185; GFX7-NEXT: v_mov_b32_e32 v4, v2 1186; GFX7-NEXT: s_mov_b32 s2, 0 1187; GFX7-NEXT: s_mov_b32 s3, 0xf000 1188; GFX7-NEXT: s_mov_b64 s[0:1], 0 1189; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 1190; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1191; GFX7-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], s4 addr64 glc 1192; GFX7-NEXT: s_waitcnt vmcnt(0) 1193; GFX7-NEXT: buffer_wbinvl1 1194; GFX7-NEXT: v_mov_b32_e32 v0, v3 1195; GFX7-NEXT: ; return to shader part epilog 1196 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095 1197 %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 1198 %result = extractvalue { i32, i1 } %result.struct, 0 1199 %cast = bitcast i32 %result to float 1200 ret float %cast 1201} 1202 1203define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr, i32 %old, i32 %in) { 1204; GFX6-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4294967296: 1205; GFX6: ; %bb.0: 1206; GFX6-NEXT: s_mov_b32 s0, 0 1207; GFX6-NEXT: v_mov_b32_e32 v4, v2 1208; GFX6-NEXT: s_mov_b32 s1, 4 1209; GFX6-NEXT: s_mov_b32 s3, 0xf000 1210; GFX6-NEXT: s_mov_b32 s2, s0 1211; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1212; GFX6-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], 0 addr64 glc 1213; GFX6-NEXT: s_waitcnt vmcnt(0) 1214; GFX6-NEXT: buffer_wbinvl1 1215; GFX6-NEXT: v_mov_b32_e32 v0, v3 1216; GFX6-NEXT: s_waitcnt expcnt(0) 1217; GFX6-NEXT: ; return to shader part epilog 1218; 1219; GFX7-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4294967296: 1220; GFX7: ; %bb.0: 1221; GFX7-NEXT: s_mov_b32 s0, 0 1222; GFX7-NEXT: v_mov_b32_e32 v4, v2 1223; GFX7-NEXT: s_mov_b32 s1, 4 1224; GFX7-NEXT: s_mov_b32 s3, 0xf000 1225; GFX7-NEXT: s_mov_b32 s2, s0 1226; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1227; GFX7-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], 0 addr64 glc 1228; GFX7-NEXT: s_waitcnt vmcnt(0) 1229; GFX7-NEXT: buffer_wbinvl1 1230; GFX7-NEXT: v_mov_b32_e32 v0, v3 1231; GFX7-NEXT: ; return to shader part epilog 1232 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296 1233 %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 1234 %result = extractvalue { i32, i1 } %result.struct, 0 1235 %cast = bitcast i32 %result to float 1236 ret float %cast 1237} 1238 1239define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset, i32 %old, i32 %in) { 1240; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_vgpr_offset: 1241; GFX6: ; %bb.0: 1242; GFX6-NEXT: v_mov_b32_e32 v3, v1 1243; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 1244; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 1245; GFX6-NEXT: s_mov_b32 s0, s2 1246; GFX6-NEXT: s_mov_b32 s1, s3 1247; GFX6-NEXT: s_mov_b32 s2, 0 1248; GFX6-NEXT: s_mov_b32 s3, 0xf000 1249; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1250; GFX6-NEXT: buffer_atomic_cmpswap v[2:3], v[0:1], s[0:3], 0 addr64 glc 1251; GFX6-NEXT: s_waitcnt vmcnt(0) 1252; GFX6-NEXT: buffer_wbinvl1 1253; GFX6-NEXT: v_mov_b32_e32 v0, v2 1254; GFX6-NEXT: s_waitcnt expcnt(0) 1255; GFX6-NEXT: ; return to shader part epilog 1256; 1257; GFX7-LABEL: mubuf_cmpxchg_sgpr_ptr_vgpr_offset: 1258; GFX7: ; %bb.0: 1259; GFX7-NEXT: v_mov_b32_e32 v3, v1 1260; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 1261; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 1262; GFX7-NEXT: s_mov_b32 s0, s2 1263; GFX7-NEXT: s_mov_b32 s1, s3 1264; GFX7-NEXT: s_mov_b32 s2, 0 1265; GFX7-NEXT: s_mov_b32 s3, 0xf000 1266; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1267; GFX7-NEXT: buffer_atomic_cmpswap v[2:3], v[0:1], s[0:3], 0 addr64 glc 1268; GFX7-NEXT: s_waitcnt vmcnt(0) 1269; GFX7-NEXT: buffer_wbinvl1 1270; GFX7-NEXT: v_mov_b32_e32 v0, v2 1271; GFX7-NEXT: ; return to shader part epilog 1272 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset 1273 %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 1274 %result = extractvalue { i32, i1 } %result.struct, 0 1275 %cast = bitcast i32 %result to float 1276 ret float %cast 1277} 1278