1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s 5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s 6; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 -amdgcn-skip-cache-invalidations -verify-machineinstrs < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s 7 8define amdgpu_kernel void @global_volatile_load_0( 9; GFX6-LABEL: global_volatile_load_0: 10; GFX6: ; %bb.0: ; %entry 11; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 12; GFX6-NEXT: s_mov_b32 s3, 0xf000 13; GFX6-NEXT: s_mov_b32 s2, -1 14; GFX6-NEXT: s_waitcnt lgkmcnt(0) 15; GFX6-NEXT: s_mov_b32 s0, s4 16; GFX6-NEXT: s_mov_b32 s1, s5 17; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 18; GFX6-NEXT: s_waitcnt vmcnt(0) 19; GFX6-NEXT: s_mov_b32 s4, s6 20; GFX6-NEXT: s_mov_b32 s5, s7 21; GFX6-NEXT: s_mov_b32 s6, s2 22; GFX6-NEXT: s_mov_b32 s7, s3 23; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 24; GFX6-NEXT: s_endpgm 25; 26; GFX7-LABEL: global_volatile_load_0: 27; GFX7: ; %bb.0: ; %entry 28; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 29; GFX7-NEXT: s_waitcnt lgkmcnt(0) 30; GFX7-NEXT: v_mov_b32_e32 v0, s0 31; GFX7-NEXT: v_mov_b32_e32 v1, s1 32; GFX7-NEXT: flat_load_dword v0, v[0:1] glc 33; GFX7-NEXT: s_waitcnt vmcnt(0) 34; GFX7-NEXT: v_mov_b32_e32 v2, s2 35; GFX7-NEXT: v_mov_b32_e32 v3, s3 36; GFX7-NEXT: flat_store_dword v[2:3], v0 37; GFX7-NEXT: s_endpgm 38; 39; GFX10-WGP-LABEL: global_volatile_load_0: 40; GFX10-WGP: ; %bb.0: ; %entry 41; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 42; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 43; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 44; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 45; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 46; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 47; GFX10-WGP-NEXT: s_endpgm 48; 49; GFX10-CU-LABEL: global_volatile_load_0: 50; GFX10-CU: ; %bb.0: ; %entry 51; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 52; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 53; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 54; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 55; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 56; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 57; GFX10-CU-NEXT: s_endpgm 58; 59; SKIP-CACHE-INV-LABEL: global_volatile_load_0: 60; SKIP-CACHE-INV: ; %bb.0: ; %entry 61; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 62; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 63; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 64; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 65; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 66; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 67; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 68; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 69; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 70; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 71; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 72; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 73; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 74; SKIP-CACHE-INV-NEXT: s_endpgm 75 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 76entry: 77 %val = load volatile i32, i32 addrspace(1)* %in, align 4 78 store i32 %val, i32 addrspace(1)* %out 79 ret void 80} 81 82define amdgpu_kernel void @global_volatile_load_1( 83; GFX6-LABEL: global_volatile_load_1: 84; GFX6: ; %bb.0: ; %entry 85; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 86; GFX6-NEXT: s_mov_b32 s3, 0xf000 87; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0 88; GFX6-NEXT: v_mov_b32_e32 v1, 0 89; GFX6-NEXT: s_mov_b32 s2, -1 90; GFX6-NEXT: s_waitcnt lgkmcnt(0) 91; GFX6-NEXT: s_mov_b32 s0, s6 92; GFX6-NEXT: s_mov_b32 s1, s7 93; GFX6-NEXT: s_mov_b32 s6, 0 94; GFX6-NEXT: s_mov_b32 s7, s3 95; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 glc 96; GFX6-NEXT: s_waitcnt vmcnt(0) 97; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 98; GFX6-NEXT: s_endpgm 99; 100; GFX7-LABEL: global_volatile_load_1: 101; GFX7: ; %bb.0: ; %entry 102; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 103; GFX7-NEXT: v_lshlrev_b32_e32 v2, 2, v0 104; GFX7-NEXT: s_waitcnt lgkmcnt(0) 105; GFX7-NEXT: v_mov_b32_e32 v3, s1 106; GFX7-NEXT: v_add_i32_e32 v2, vcc, s0, v2 107; GFX7-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 108; GFX7-NEXT: flat_load_dword v2, v[2:3] glc 109; GFX7-NEXT: s_waitcnt vmcnt(0) 110; GFX7-NEXT: v_mov_b32_e32 v0, s2 111; GFX7-NEXT: v_mov_b32_e32 v1, s3 112; GFX7-NEXT: flat_store_dword v[0:1], v2 113; GFX7-NEXT: s_endpgm 114; 115; GFX10-WGP-LABEL: global_volatile_load_1: 116; GFX10-WGP: ; %bb.0: ; %entry 117; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 118; GFX10-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0 119; GFX10-WGP-NEXT: v_mov_b32_e32 v1, 0 120; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 121; GFX10-WGP-NEXT: global_load_dword v0, v0, s[0:1] glc dlc 122; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 123; GFX10-WGP-NEXT: global_store_dword v1, v0, s[2:3] 124; GFX10-WGP-NEXT: s_endpgm 125; 126; GFX10-CU-LABEL: global_volatile_load_1: 127; GFX10-CU: ; %bb.0: ; %entry 128; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 129; GFX10-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0 130; GFX10-CU-NEXT: v_mov_b32_e32 v1, 0 131; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 132; GFX10-CU-NEXT: global_load_dword v0, v0, s[0:1] glc dlc 133; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 134; GFX10-CU-NEXT: global_store_dword v1, v0, s[2:3] 135; GFX10-CU-NEXT: s_endpgm 136; 137; SKIP-CACHE-INV-LABEL: global_volatile_load_1: 138; SKIP-CACHE-INV: ; %bb.0: ; %entry 139; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 140; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 141; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 142; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, 0 143; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 144; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 145; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s6 146; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 147; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0 148; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 149; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 glc 150; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 151; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 152; SKIP-CACHE-INV-NEXT: s_endpgm 153 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 154entry: 155 %tid = call i32 @llvm.amdgcn.workitem.id.x() 156 %val.gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid 157 %val = load volatile i32, i32 addrspace(1)* %val.gep, align 4 158 store i32 %val, i32 addrspace(1)* %out 159 ret void 160} 161 162define amdgpu_kernel void @global_volatile_store_0( 163; GFX6-LABEL: global_volatile_store_0: 164; GFX6: ; %bb.0: ; %entry 165; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 166; GFX6-NEXT: s_mov_b32 s7, 0xf000 167; GFX6-NEXT: s_mov_b32 s6, -1 168; GFX6-NEXT: s_waitcnt lgkmcnt(0) 169; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 170; GFX6-NEXT: s_mov_b32 s4, s2 171; GFX6-NEXT: s_mov_b32 s5, s3 172; GFX6-NEXT: s_waitcnt lgkmcnt(0) 173; GFX6-NEXT: v_mov_b32_e32 v0, s0 174; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 175; GFX6-NEXT: s_waitcnt vmcnt(0) 176; GFX6-NEXT: s_endpgm 177; 178; GFX7-LABEL: global_volatile_store_0: 179; GFX7: ; %bb.0: ; %entry 180; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 181; GFX7-NEXT: s_waitcnt lgkmcnt(0) 182; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 183; GFX7-NEXT: v_mov_b32_e32 v0, s2 184; GFX7-NEXT: v_mov_b32_e32 v1, s3 185; GFX7-NEXT: s_waitcnt lgkmcnt(0) 186; GFX7-NEXT: v_mov_b32_e32 v2, s0 187; GFX7-NEXT: flat_store_dword v[0:1], v2 188; GFX7-NEXT: s_waitcnt vmcnt(0) 189; GFX7-NEXT: s_endpgm 190; 191; GFX10-WGP-LABEL: global_volatile_store_0: 192; GFX10-WGP: ; %bb.0: ; %entry 193; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 194; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 195; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 196; GFX10-WGP-NEXT: s_load_dword s0, s[0:1], 0x0 197; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 198; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 199; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 200; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 201; GFX10-WGP-NEXT: s_endpgm 202; 203; GFX10-CU-LABEL: global_volatile_store_0: 204; GFX10-CU: ; %bb.0: ; %entry 205; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 206; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 207; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 208; GFX10-CU-NEXT: s_load_dword s0, s[0:1], 0x0 209; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 210; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 211; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 212; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 213; GFX10-CU-NEXT: s_endpgm 214; 215; SKIP-CACHE-INV-LABEL: global_volatile_store_0: 216; SKIP-CACHE-INV: ; %bb.0: ; %entry 217; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 218; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 219; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 220; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 221; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 222; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s2 223; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s3 224; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 225; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 226; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 227; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 228; SKIP-CACHE-INV-NEXT: s_endpgm 229 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 230entry: 231 %val = load i32, i32 addrspace(1)* %in, align 4 232 store volatile i32 %val, i32 addrspace(1)* %out 233 ret void 234} 235 236define amdgpu_kernel void @global_volatile_store_1( 237; GFX6-LABEL: global_volatile_store_1: 238; GFX6: ; %bb.0: ; %entry 239; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 240; GFX6-NEXT: s_mov_b32 s7, 0xf000 241; GFX6-NEXT: s_mov_b32 s6, 0 242; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0 243; GFX6-NEXT: v_mov_b32_e32 v1, 0 244; GFX6-NEXT: s_waitcnt lgkmcnt(0) 245; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 246; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3] 247; GFX6-NEXT: s_waitcnt lgkmcnt(0) 248; GFX6-NEXT: v_mov_b32_e32 v2, s0 249; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 250; GFX6-NEXT: s_waitcnt vmcnt(0) 251; GFX6-NEXT: s_endpgm 252; 253; GFX7-LABEL: global_volatile_store_1: 254; GFX7: ; %bb.0: ; %entry 255; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 256; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 257; GFX7-NEXT: s_waitcnt lgkmcnt(0) 258; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 259; GFX7-NEXT: v_mov_b32_e32 v1, s3 260; GFX7-NEXT: v_add_i32_e32 v0, vcc, s2, v0 261; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 262; GFX7-NEXT: s_waitcnt lgkmcnt(0) 263; GFX7-NEXT: v_mov_b32_e32 v2, s0 264; GFX7-NEXT: flat_store_dword v[0:1], v2 265; GFX7-NEXT: s_waitcnt vmcnt(0) 266; GFX7-NEXT: s_endpgm 267; 268; GFX10-WGP-LABEL: global_volatile_store_1: 269; GFX10-WGP: ; %bb.0: ; %entry 270; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 271; GFX10-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0 272; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 273; GFX10-WGP-NEXT: s_load_dword s0, s[0:1], 0x0 274; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 275; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 276; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 277; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 278; GFX10-WGP-NEXT: s_endpgm 279; 280; GFX10-CU-LABEL: global_volatile_store_1: 281; GFX10-CU: ; %bb.0: ; %entry 282; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 283; GFX10-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0 284; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 285; GFX10-CU-NEXT: s_load_dword s0, s[0:1], 0x0 286; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 287; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 288; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 289; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 290; GFX10-CU-NEXT: s_endpgm 291; 292; SKIP-CACHE-INV-LABEL: global_volatile_store_1: 293; SKIP-CACHE-INV: ; %bb.0: ; %entry 294; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 295; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 296; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0 297; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 298; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, 0 299; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 300; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 301; SKIP-CACHE-INV-NEXT: s_mov_b64 s[4:5], s[2:3] 302; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 303; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 304; SKIP-CACHE-INV-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 305; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 306; SKIP-CACHE-INV-NEXT: s_endpgm 307 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 308entry: 309 %tid = call i32 @llvm.amdgcn.workitem.id.x() 310 %val = load i32, i32 addrspace(1)* %in, align 4 311 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid 312 store volatile i32 %val, i32 addrspace(1)* %out.gep 313 ret void 314} 315 316define amdgpu_kernel void @global_volatile_workgroup_acquire_load( 317; GFX6-LABEL: global_volatile_workgroup_acquire_load: 318; GFX6: ; %bb.0: ; %entry 319; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 320; GFX6-NEXT: s_mov_b32 s3, 0xf000 321; GFX6-NEXT: s_mov_b32 s2, -1 322; GFX6-NEXT: s_waitcnt lgkmcnt(0) 323; GFX6-NEXT: s_mov_b32 s0, s4 324; GFX6-NEXT: s_mov_b32 s1, s5 325; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 326; GFX6-NEXT: s_mov_b32 s4, s6 327; GFX6-NEXT: s_mov_b32 s5, s7 328; GFX6-NEXT: s_mov_b32 s6, s2 329; GFX6-NEXT: s_mov_b32 s7, s3 330; GFX6-NEXT: s_waitcnt vmcnt(0) 331; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 332; GFX6-NEXT: s_endpgm 333; 334; GFX7-LABEL: global_volatile_workgroup_acquire_load: 335; GFX7: ; %bb.0: ; %entry 336; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 337; GFX7-NEXT: s_waitcnt lgkmcnt(0) 338; GFX7-NEXT: v_mov_b32_e32 v0, s0 339; GFX7-NEXT: v_mov_b32_e32 v1, s1 340; GFX7-NEXT: flat_load_dword v0, v[0:1] 341; GFX7-NEXT: v_mov_b32_e32 v2, s2 342; GFX7-NEXT: v_mov_b32_e32 v3, s3 343; GFX7-NEXT: s_waitcnt vmcnt(0) 344; GFX7-NEXT: flat_store_dword v[2:3], v0 345; GFX7-NEXT: s_endpgm 346; 347; GFX10-WGP-LABEL: global_volatile_workgroup_acquire_load: 348; GFX10-WGP: ; %bb.0: ; %entry 349; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 350; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 351; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 352; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc 353; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 354; GFX10-WGP-NEXT: buffer_gl0_inv 355; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 356; GFX10-WGP-NEXT: s_endpgm 357; 358; GFX10-CU-LABEL: global_volatile_workgroup_acquire_load: 359; GFX10-CU: ; %bb.0: ; %entry 360; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 361; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 362; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 363; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] 364; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 365; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 366; GFX10-CU-NEXT: s_endpgm 367; 368; SKIP-CACHE-INV-LABEL: global_volatile_workgroup_acquire_load: 369; SKIP-CACHE-INV: ; %bb.0: ; %entry 370; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 371; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 372; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 373; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 374; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 375; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 376; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 377; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 378; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 379; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 380; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 381; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 382; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 383; SKIP-CACHE-INV-NEXT: s_endpgm 384 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 385entry: 386 %val = load atomic volatile i32, i32 addrspace(1)* %in syncscope("workgroup") acquire, align 4 387 store i32 %val, i32 addrspace(1)* %out 388 ret void 389} 390 391define amdgpu_kernel void @global_volatile_workgroup_release_store( 392; GFX6-LABEL: global_volatile_workgroup_release_store: 393; GFX6: ; %bb.0: ; %entry 394; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 395; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 396; GFX6-NEXT: s_mov_b32 s3, 0xf000 397; GFX6-NEXT: s_mov_b32 s2, -1 398; GFX6-NEXT: s_waitcnt lgkmcnt(0) 399; GFX6-NEXT: v_mov_b32_e32 v0, s4 400; GFX6-NEXT: s_waitcnt lgkmcnt(0) 401; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 402; GFX6-NEXT: s_endpgm 403; 404; GFX7-LABEL: global_volatile_workgroup_release_store: 405; GFX7: ; %bb.0: ; %entry 406; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 407; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 408; GFX7-NEXT: s_waitcnt lgkmcnt(0) 409; GFX7-NEXT: v_mov_b32_e32 v2, s2 410; GFX7-NEXT: v_mov_b32_e32 v0, s0 411; GFX7-NEXT: v_mov_b32_e32 v1, s1 412; GFX7-NEXT: s_waitcnt lgkmcnt(0) 413; GFX7-NEXT: flat_store_dword v[0:1], v2 414; GFX7-NEXT: s_endpgm 415; 416; GFX10-WGP-LABEL: global_volatile_workgroup_release_store: 417; GFX10-WGP: ; %bb.0: ; %entry 418; GFX10-WGP-NEXT: s_clause 0x1 419; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 420; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 421; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 422; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 423; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 424; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 425; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 426; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 427; GFX10-WGP-NEXT: s_endpgm 428; 429; GFX10-CU-LABEL: global_volatile_workgroup_release_store: 430; GFX10-CU: ; %bb.0: ; %entry 431; GFX10-CU-NEXT: s_clause 0x1 432; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 433; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 434; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 435; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 436; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 437; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 438; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 439; GFX10-CU-NEXT: s_endpgm 440; 441; SKIP-CACHE-INV-LABEL: global_volatile_workgroup_release_store: 442; SKIP-CACHE-INV: ; %bb.0: ; %entry 443; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 444; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 445; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 446; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 447; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 448; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 449; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 450; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 451; SKIP-CACHE-INV-NEXT: s_endpgm 452 i32 %in, i32 addrspace(1)* %out) { 453entry: 454 store atomic volatile i32 %in, i32 addrspace(1)* %out syncscope("workgroup") release, align 4 455 ret void 456} 457 458declare i32 @llvm.amdgcn.workitem.id.x() 459