1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s 5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s 6; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 -amdgcn-skip-cache-invalidations -verify-machineinstrs < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s 7; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s 8; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+tgsplit -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s 9 10define amdgpu_kernel void @global_agent_unordered_load( 11; GFX6-LABEL: global_agent_unordered_load: 12; GFX6: ; %bb.0: ; %entry 13; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 14; GFX6-NEXT: s_mov_b32 s3, 0x100f000 15; GFX6-NEXT: s_mov_b32 s2, -1 16; GFX6-NEXT: s_waitcnt lgkmcnt(0) 17; GFX6-NEXT: s_mov_b32 s0, s4 18; GFX6-NEXT: s_mov_b32 s1, s5 19; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 20; GFX6-NEXT: s_mov_b32 s4, s6 21; GFX6-NEXT: s_mov_b32 s5, s7 22; GFX6-NEXT: s_mov_b32 s6, s2 23; GFX6-NEXT: s_mov_b32 s7, s3 24; GFX6-NEXT: s_waitcnt vmcnt(0) 25; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 26; GFX6-NEXT: s_endpgm 27; 28; GFX7-LABEL: global_agent_unordered_load: 29; GFX7: ; %bb.0: ; %entry 30; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 31; GFX7-NEXT: s_waitcnt lgkmcnt(0) 32; GFX7-NEXT: v_mov_b32_e32 v0, s0 33; GFX7-NEXT: v_mov_b32_e32 v1, s1 34; GFX7-NEXT: flat_load_dword v0, v[0:1] 35; GFX7-NEXT: v_mov_b32_e32 v2, s2 36; GFX7-NEXT: v_mov_b32_e32 v3, s3 37; GFX7-NEXT: s_waitcnt vmcnt(0) 38; GFX7-NEXT: flat_store_dword v[2:3], v0 39; GFX7-NEXT: s_endpgm 40; 41; GFX10-WGP-LABEL: global_agent_unordered_load: 42; GFX10-WGP: ; %bb.0: ; %entry 43; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 44; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 45; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 46; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] 47; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 48; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 49; GFX10-WGP-NEXT: s_endpgm 50; 51; GFX10-CU-LABEL: global_agent_unordered_load: 52; GFX10-CU: ; %bb.0: ; %entry 53; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 54; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 55; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 56; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] 57; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 58; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 59; GFX10-CU-NEXT: s_endpgm 60; 61; SKIP-CACHE-INV-LABEL: global_agent_unordered_load: 62; SKIP-CACHE-INV: ; %bb.0: ; %entry 63; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 64; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 65; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 66; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 67; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 68; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 69; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 70; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 71; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 72; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 73; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 74; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 75; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 76; SKIP-CACHE-INV-NEXT: s_endpgm 77; 78; GFX90A-NOTTGSPLIT-LABEL: global_agent_unordered_load: 79; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 80; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 81; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 82; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 83; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] 84; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 85; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 86; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 87; 88; GFX90A-TGSPLIT-LABEL: global_agent_unordered_load: 89; GFX90A-TGSPLIT: ; %bb.0: ; %entry 90; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 91; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 92; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 93; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] 94; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 95; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 96; GFX90A-TGSPLIT-NEXT: s_endpgm 97 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 98entry: 99 %val = load atomic i32, i32 addrspace(1)* %in syncscope("agent") unordered, align 4 100 store i32 %val, i32 addrspace(1)* %out 101 ret void 102} 103 104define amdgpu_kernel void @global_agent_monotonic_load( 105; GFX6-LABEL: global_agent_monotonic_load: 106; GFX6: ; %bb.0: ; %entry 107; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 108; GFX6-NEXT: s_mov_b32 s3, 0x100f000 109; GFX6-NEXT: s_mov_b32 s2, -1 110; GFX6-NEXT: s_waitcnt lgkmcnt(0) 111; GFX6-NEXT: s_mov_b32 s0, s4 112; GFX6-NEXT: s_mov_b32 s1, s5 113; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 114; GFX6-NEXT: s_mov_b32 s4, s6 115; GFX6-NEXT: s_mov_b32 s5, s7 116; GFX6-NEXT: s_mov_b32 s6, s2 117; GFX6-NEXT: s_mov_b32 s7, s3 118; GFX6-NEXT: s_waitcnt vmcnt(0) 119; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 120; GFX6-NEXT: s_endpgm 121; 122; GFX7-LABEL: global_agent_monotonic_load: 123; GFX7: ; %bb.0: ; %entry 124; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 125; GFX7-NEXT: s_waitcnt lgkmcnt(0) 126; GFX7-NEXT: v_mov_b32_e32 v0, s0 127; GFX7-NEXT: v_mov_b32_e32 v1, s1 128; GFX7-NEXT: flat_load_dword v0, v[0:1] glc 129; GFX7-NEXT: v_mov_b32_e32 v2, s2 130; GFX7-NEXT: v_mov_b32_e32 v3, s3 131; GFX7-NEXT: s_waitcnt vmcnt(0) 132; GFX7-NEXT: flat_store_dword v[2:3], v0 133; GFX7-NEXT: s_endpgm 134; 135; GFX10-WGP-LABEL: global_agent_monotonic_load: 136; GFX10-WGP: ; %bb.0: ; %entry 137; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 138; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 139; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 140; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 141; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 142; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 143; GFX10-WGP-NEXT: s_endpgm 144; 145; GFX10-CU-LABEL: global_agent_monotonic_load: 146; GFX10-CU: ; %bb.0: ; %entry 147; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 148; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 149; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 150; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 151; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 152; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 153; GFX10-CU-NEXT: s_endpgm 154; 155; SKIP-CACHE-INV-LABEL: global_agent_monotonic_load: 156; SKIP-CACHE-INV: ; %bb.0: ; %entry 157; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 158; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 159; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 160; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 161; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 162; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 163; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 164; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 165; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 166; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 167; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 168; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 169; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 170; SKIP-CACHE-INV-NEXT: s_endpgm 171; 172; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_load: 173; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 174; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 175; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 176; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 177; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 178; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 179; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 180; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 181; 182; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_load: 183; GFX90A-TGSPLIT: ; %bb.0: ; %entry 184; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 185; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 186; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 187; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 188; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 189; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 190; GFX90A-TGSPLIT-NEXT: s_endpgm 191 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 192entry: 193 %val = load atomic i32, i32 addrspace(1)* %in syncscope("agent") monotonic, align 4 194 store i32 %val, i32 addrspace(1)* %out 195 ret void 196} 197 198define amdgpu_kernel void @global_agent_acquire_load( 199; GFX6-LABEL: global_agent_acquire_load: 200; GFX6: ; %bb.0: ; %entry 201; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 202; GFX6-NEXT: s_mov_b32 s3, 0x100f000 203; GFX6-NEXT: s_mov_b32 s2, -1 204; GFX6-NEXT: s_waitcnt lgkmcnt(0) 205; GFX6-NEXT: s_mov_b32 s0, s4 206; GFX6-NEXT: s_mov_b32 s1, s5 207; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 208; GFX6-NEXT: s_waitcnt vmcnt(0) 209; GFX6-NEXT: buffer_wbinvl1 210; GFX6-NEXT: s_mov_b32 s4, s6 211; GFX6-NEXT: s_mov_b32 s5, s7 212; GFX6-NEXT: s_mov_b32 s6, s2 213; GFX6-NEXT: s_mov_b32 s7, s3 214; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 215; GFX6-NEXT: s_endpgm 216; 217; GFX7-LABEL: global_agent_acquire_load: 218; GFX7: ; %bb.0: ; %entry 219; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 220; GFX7-NEXT: s_waitcnt lgkmcnt(0) 221; GFX7-NEXT: v_mov_b32_e32 v0, s0 222; GFX7-NEXT: v_mov_b32_e32 v1, s1 223; GFX7-NEXT: flat_load_dword v0, v[0:1] glc 224; GFX7-NEXT: s_waitcnt vmcnt(0) 225; GFX7-NEXT: buffer_wbinvl1_vol 226; GFX7-NEXT: v_mov_b32_e32 v2, s2 227; GFX7-NEXT: v_mov_b32_e32 v3, s3 228; GFX7-NEXT: flat_store_dword v[2:3], v0 229; GFX7-NEXT: s_endpgm 230; 231; GFX10-WGP-LABEL: global_agent_acquire_load: 232; GFX10-WGP: ; %bb.0: ; %entry 233; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 234; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 235; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 236; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 237; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 238; GFX10-WGP-NEXT: buffer_gl0_inv 239; GFX10-WGP-NEXT: buffer_gl1_inv 240; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 241; GFX10-WGP-NEXT: s_endpgm 242; 243; GFX10-CU-LABEL: global_agent_acquire_load: 244; GFX10-CU: ; %bb.0: ; %entry 245; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 246; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 247; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 248; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 249; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 250; GFX10-CU-NEXT: buffer_gl0_inv 251; GFX10-CU-NEXT: buffer_gl1_inv 252; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 253; GFX10-CU-NEXT: s_endpgm 254; 255; SKIP-CACHE-INV-LABEL: global_agent_acquire_load: 256; SKIP-CACHE-INV: ; %bb.0: ; %entry 257; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 258; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 259; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 260; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 261; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 262; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 263; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 264; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 265; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 266; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 267; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 268; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 269; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 270; SKIP-CACHE-INV-NEXT: s_endpgm 271; 272; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_load: 273; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 274; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 275; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 276; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 277; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 278; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 279; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 280; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 281; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 282; 283; GFX90A-TGSPLIT-LABEL: global_agent_acquire_load: 284; GFX90A-TGSPLIT: ; %bb.0: ; %entry 285; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 286; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 287; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 288; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 289; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 290; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 291; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 292; GFX90A-TGSPLIT-NEXT: s_endpgm 293 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 294entry: 295 %val = load atomic i32, i32 addrspace(1)* %in syncscope("agent") acquire, align 4 296 store i32 %val, i32 addrspace(1)* %out 297 ret void 298} 299 300define amdgpu_kernel void @global_agent_seq_cst_load( 301; GFX6-LABEL: global_agent_seq_cst_load: 302; GFX6: ; %bb.0: ; %entry 303; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 304; GFX6-NEXT: s_mov_b32 s3, 0x100f000 305; GFX6-NEXT: s_mov_b32 s2, -1 306; GFX6-NEXT: s_waitcnt lgkmcnt(0) 307; GFX6-NEXT: s_mov_b32 s0, s4 308; GFX6-NEXT: s_mov_b32 s1, s5 309; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 310; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 311; GFX6-NEXT: s_waitcnt vmcnt(0) 312; GFX6-NEXT: buffer_wbinvl1 313; GFX6-NEXT: s_mov_b32 s4, s6 314; GFX6-NEXT: s_mov_b32 s5, s7 315; GFX6-NEXT: s_mov_b32 s6, s2 316; GFX6-NEXT: s_mov_b32 s7, s3 317; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 318; GFX6-NEXT: s_endpgm 319; 320; GFX7-LABEL: global_agent_seq_cst_load: 321; GFX7: ; %bb.0: ; %entry 322; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 323; GFX7-NEXT: s_waitcnt lgkmcnt(0) 324; GFX7-NEXT: v_mov_b32_e32 v0, s0 325; GFX7-NEXT: v_mov_b32_e32 v1, s1 326; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 327; GFX7-NEXT: flat_load_dword v0, v[0:1] glc 328; GFX7-NEXT: s_waitcnt vmcnt(0) 329; GFX7-NEXT: buffer_wbinvl1_vol 330; GFX7-NEXT: v_mov_b32_e32 v2, s2 331; GFX7-NEXT: v_mov_b32_e32 v3, s3 332; GFX7-NEXT: flat_store_dword v[2:3], v0 333; GFX7-NEXT: s_endpgm 334; 335; GFX10-WGP-LABEL: global_agent_seq_cst_load: 336; GFX10-WGP: ; %bb.0: ; %entry 337; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 338; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 339; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 340; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 341; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 342; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 343; GFX10-WGP-NEXT: buffer_gl0_inv 344; GFX10-WGP-NEXT: buffer_gl1_inv 345; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 346; GFX10-WGP-NEXT: s_endpgm 347; 348; GFX10-CU-LABEL: global_agent_seq_cst_load: 349; GFX10-CU: ; %bb.0: ; %entry 350; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 351; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 352; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 353; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 354; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 355; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 356; GFX10-CU-NEXT: buffer_gl0_inv 357; GFX10-CU-NEXT: buffer_gl1_inv 358; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 359; GFX10-CU-NEXT: s_endpgm 360; 361; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_load: 362; SKIP-CACHE-INV: ; %bb.0: ; %entry 363; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 364; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 365; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 366; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 367; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 368; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 369; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 370; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 371; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 372; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 373; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 374; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 375; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 376; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 377; SKIP-CACHE-INV-NEXT: s_endpgm 378; 379; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_load: 380; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 381; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 382; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 383; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 384; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 385; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 386; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 387; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 388; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 389; 390; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_load: 391; GFX90A-TGSPLIT: ; %bb.0: ; %entry 392; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 393; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 394; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 395; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 396; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 397; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 398; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 399; GFX90A-TGSPLIT-NEXT: s_endpgm 400 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 401entry: 402 %val = load atomic i32, i32 addrspace(1)* %in syncscope("agent") seq_cst, align 4 403 store i32 %val, i32 addrspace(1)* %out 404 ret void 405} 406 407define amdgpu_kernel void @global_agent_unordered_store( 408; GFX6-LABEL: global_agent_unordered_store: 409; GFX6: ; %bb.0: ; %entry 410; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 411; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 412; GFX6-NEXT: s_mov_b32 s3, 0x100f000 413; GFX6-NEXT: s_mov_b32 s2, -1 414; GFX6-NEXT: s_waitcnt lgkmcnt(0) 415; GFX6-NEXT: v_mov_b32_e32 v0, s6 416; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 417; GFX6-NEXT: s_endpgm 418; 419; GFX7-LABEL: global_agent_unordered_store: 420; GFX7: ; %bb.0: ; %entry 421; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 422; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 423; GFX7-NEXT: s_waitcnt lgkmcnt(0) 424; GFX7-NEXT: v_mov_b32_e32 v2, s2 425; GFX7-NEXT: v_mov_b32_e32 v0, s0 426; GFX7-NEXT: v_mov_b32_e32 v1, s1 427; GFX7-NEXT: flat_store_dword v[0:1], v2 428; GFX7-NEXT: s_endpgm 429; 430; GFX10-WGP-LABEL: global_agent_unordered_store: 431; GFX10-WGP: ; %bb.0: ; %entry 432; GFX10-WGP-NEXT: s_clause 0x1 433; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 434; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 435; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 436; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 437; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 438; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 439; GFX10-WGP-NEXT: s_endpgm 440; 441; GFX10-CU-LABEL: global_agent_unordered_store: 442; GFX10-CU: ; %bb.0: ; %entry 443; GFX10-CU-NEXT: s_clause 0x1 444; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 445; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 446; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 447; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 448; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 449; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 450; GFX10-CU-NEXT: s_endpgm 451; 452; SKIP-CACHE-INV-LABEL: global_agent_unordered_store: 453; SKIP-CACHE-INV: ; %bb.0: ; %entry 454; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 455; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 456; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 457; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 458; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 459; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 460; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 461; SKIP-CACHE-INV-NEXT: s_endpgm 462; 463; GFX90A-NOTTGSPLIT-LABEL: global_agent_unordered_store: 464; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 465; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 466; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 467; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 468; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 469; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 470; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 471; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 472; 473; GFX90A-TGSPLIT-LABEL: global_agent_unordered_store: 474; GFX90A-TGSPLIT: ; %bb.0: ; %entry 475; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 476; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 477; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 478; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 479; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 480; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 481; GFX90A-TGSPLIT-NEXT: s_endpgm 482 i32 %in, i32 addrspace(1)* %out) { 483entry: 484 store atomic i32 %in, i32 addrspace(1)* %out syncscope("agent") unordered, align 4 485 ret void 486} 487 488define amdgpu_kernel void @global_agent_monotonic_store( 489; GFX6-LABEL: global_agent_monotonic_store: 490; GFX6: ; %bb.0: ; %entry 491; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 492; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 493; GFX6-NEXT: s_mov_b32 s3, 0x100f000 494; GFX6-NEXT: s_mov_b32 s2, -1 495; GFX6-NEXT: s_waitcnt lgkmcnt(0) 496; GFX6-NEXT: v_mov_b32_e32 v0, s6 497; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 498; GFX6-NEXT: s_endpgm 499; 500; GFX7-LABEL: global_agent_monotonic_store: 501; GFX7: ; %bb.0: ; %entry 502; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 503; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 504; GFX7-NEXT: s_waitcnt lgkmcnt(0) 505; GFX7-NEXT: v_mov_b32_e32 v2, s2 506; GFX7-NEXT: v_mov_b32_e32 v0, s0 507; GFX7-NEXT: v_mov_b32_e32 v1, s1 508; GFX7-NEXT: flat_store_dword v[0:1], v2 509; GFX7-NEXT: s_endpgm 510; 511; GFX10-WGP-LABEL: global_agent_monotonic_store: 512; GFX10-WGP: ; %bb.0: ; %entry 513; GFX10-WGP-NEXT: s_clause 0x1 514; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 515; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 516; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 517; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 518; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 519; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 520; GFX10-WGP-NEXT: s_endpgm 521; 522; GFX10-CU-LABEL: global_agent_monotonic_store: 523; GFX10-CU: ; %bb.0: ; %entry 524; GFX10-CU-NEXT: s_clause 0x1 525; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 526; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 527; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 528; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 529; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 530; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 531; GFX10-CU-NEXT: s_endpgm 532; 533; SKIP-CACHE-INV-LABEL: global_agent_monotonic_store: 534; SKIP-CACHE-INV: ; %bb.0: ; %entry 535; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 536; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 537; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 538; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 539; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 540; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 541; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 542; SKIP-CACHE-INV-NEXT: s_endpgm 543; 544; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_store: 545; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 546; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 547; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 548; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 549; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 550; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 551; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 552; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 553; 554; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_store: 555; GFX90A-TGSPLIT: ; %bb.0: ; %entry 556; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 557; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 558; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 559; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 560; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 561; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 562; GFX90A-TGSPLIT-NEXT: s_endpgm 563 i32 %in, i32 addrspace(1)* %out) { 564entry: 565 store atomic i32 %in, i32 addrspace(1)* %out syncscope("agent") monotonic, align 4 566 ret void 567} 568 569define amdgpu_kernel void @global_agent_release_store( 570; GFX6-LABEL: global_agent_release_store: 571; GFX6: ; %bb.0: ; %entry 572; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 573; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 574; GFX6-NEXT: s_mov_b32 s3, 0x100f000 575; GFX6-NEXT: s_mov_b32 s2, -1 576; GFX6-NEXT: s_waitcnt lgkmcnt(0) 577; GFX6-NEXT: v_mov_b32_e32 v0, s6 578; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 579; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 580; GFX6-NEXT: s_endpgm 581; 582; GFX7-LABEL: global_agent_release_store: 583; GFX7: ; %bb.0: ; %entry 584; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 585; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 586; GFX7-NEXT: s_waitcnt lgkmcnt(0) 587; GFX7-NEXT: v_mov_b32_e32 v2, s2 588; GFX7-NEXT: v_mov_b32_e32 v0, s0 589; GFX7-NEXT: v_mov_b32_e32 v1, s1 590; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 591; GFX7-NEXT: flat_store_dword v[0:1], v2 592; GFX7-NEXT: s_endpgm 593; 594; GFX10-WGP-LABEL: global_agent_release_store: 595; GFX10-WGP: ; %bb.0: ; %entry 596; GFX10-WGP-NEXT: s_clause 0x1 597; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 598; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 599; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 600; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 601; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 602; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 603; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 604; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 605; GFX10-WGP-NEXT: s_endpgm 606; 607; GFX10-CU-LABEL: global_agent_release_store: 608; GFX10-CU: ; %bb.0: ; %entry 609; GFX10-CU-NEXT: s_clause 0x1 610; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 611; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 612; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 613; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 614; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 615; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 616; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 617; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 618; GFX10-CU-NEXT: s_endpgm 619; 620; SKIP-CACHE-INV-LABEL: global_agent_release_store: 621; SKIP-CACHE-INV: ; %bb.0: ; %entry 622; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 623; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 624; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 625; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 626; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 627; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 628; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 629; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 630; SKIP-CACHE-INV-NEXT: s_endpgm 631; 632; GFX90A-NOTTGSPLIT-LABEL: global_agent_release_store: 633; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 634; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 635; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 636; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 637; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 638; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 639; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 640; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 641; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 642; 643; GFX90A-TGSPLIT-LABEL: global_agent_release_store: 644; GFX90A-TGSPLIT: ; %bb.0: ; %entry 645; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 646; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 647; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 648; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 649; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 650; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 651; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 652; GFX90A-TGSPLIT-NEXT: s_endpgm 653 i32 %in, i32 addrspace(1)* %out) { 654entry: 655 store atomic i32 %in, i32 addrspace(1)* %out syncscope("agent") release, align 4 656 ret void 657} 658 659define amdgpu_kernel void @global_agent_seq_cst_store( 660; GFX6-LABEL: global_agent_seq_cst_store: 661; GFX6: ; %bb.0: ; %entry 662; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 663; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 664; GFX6-NEXT: s_mov_b32 s3, 0x100f000 665; GFX6-NEXT: s_mov_b32 s2, -1 666; GFX6-NEXT: s_waitcnt lgkmcnt(0) 667; GFX6-NEXT: v_mov_b32_e32 v0, s6 668; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 669; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 670; GFX6-NEXT: s_endpgm 671; 672; GFX7-LABEL: global_agent_seq_cst_store: 673; GFX7: ; %bb.0: ; %entry 674; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 675; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 676; GFX7-NEXT: s_waitcnt lgkmcnt(0) 677; GFX7-NEXT: v_mov_b32_e32 v2, s2 678; GFX7-NEXT: v_mov_b32_e32 v0, s0 679; GFX7-NEXT: v_mov_b32_e32 v1, s1 680; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 681; GFX7-NEXT: flat_store_dword v[0:1], v2 682; GFX7-NEXT: s_endpgm 683; 684; GFX10-WGP-LABEL: global_agent_seq_cst_store: 685; GFX10-WGP: ; %bb.0: ; %entry 686; GFX10-WGP-NEXT: s_clause 0x1 687; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 688; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 689; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 690; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 691; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 692; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 693; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 694; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 695; GFX10-WGP-NEXT: s_endpgm 696; 697; GFX10-CU-LABEL: global_agent_seq_cst_store: 698; GFX10-CU: ; %bb.0: ; %entry 699; GFX10-CU-NEXT: s_clause 0x1 700; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 701; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 702; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 703; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 704; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 705; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 706; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 707; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 708; GFX10-CU-NEXT: s_endpgm 709; 710; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_store: 711; SKIP-CACHE-INV: ; %bb.0: ; %entry 712; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 713; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 714; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 715; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 716; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 717; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 718; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 719; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 720; SKIP-CACHE-INV-NEXT: s_endpgm 721; 722; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_store: 723; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 724; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 725; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 726; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 727; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 728; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 729; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 730; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 731; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 732; 733; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_store: 734; GFX90A-TGSPLIT: ; %bb.0: ; %entry 735; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 736; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 737; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 738; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 739; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 740; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 741; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 742; GFX90A-TGSPLIT-NEXT: s_endpgm 743 i32 %in, i32 addrspace(1)* %out) { 744entry: 745 store atomic i32 %in, i32 addrspace(1)* %out syncscope("agent") seq_cst, align 4 746 ret void 747} 748 749define amdgpu_kernel void @global_agent_monotonic_atomicrmw( 750; GFX6-LABEL: global_agent_monotonic_atomicrmw: 751; GFX6: ; %bb.0: ; %entry 752; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 753; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 754; GFX6-NEXT: s_mov_b32 s3, 0x100f000 755; GFX6-NEXT: s_mov_b32 s2, -1 756; GFX6-NEXT: s_waitcnt lgkmcnt(0) 757; GFX6-NEXT: v_mov_b32_e32 v0, s4 758; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 759; GFX6-NEXT: s_endpgm 760; 761; GFX7-LABEL: global_agent_monotonic_atomicrmw: 762; GFX7: ; %bb.0: ; %entry 763; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 764; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 765; GFX7-NEXT: s_waitcnt lgkmcnt(0) 766; GFX7-NEXT: v_mov_b32_e32 v0, s0 767; GFX7-NEXT: v_mov_b32_e32 v1, s1 768; GFX7-NEXT: v_mov_b32_e32 v2, s2 769; GFX7-NEXT: flat_atomic_swap v[0:1], v2 770; GFX7-NEXT: s_endpgm 771; 772; GFX10-WGP-LABEL: global_agent_monotonic_atomicrmw: 773; GFX10-WGP: ; %bb.0: ; %entry 774; GFX10-WGP-NEXT: s_clause 0x1 775; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 776; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 777; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 778; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 779; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 780; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 781; GFX10-WGP-NEXT: s_endpgm 782; 783; GFX10-CU-LABEL: global_agent_monotonic_atomicrmw: 784; GFX10-CU: ; %bb.0: ; %entry 785; GFX10-CU-NEXT: s_clause 0x1 786; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 787; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 788; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 789; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 790; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 791; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 792; GFX10-CU-NEXT: s_endpgm 793; 794; SKIP-CACHE-INV-LABEL: global_agent_monotonic_atomicrmw: 795; SKIP-CACHE-INV: ; %bb.0: ; %entry 796; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 797; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 798; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 799; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 800; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 801; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 802; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 803; SKIP-CACHE-INV-NEXT: s_endpgm 804; 805; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_atomicrmw: 806; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 807; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 808; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 809; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 810; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 811; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 812; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 813; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 814; 815; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_atomicrmw: 816; GFX90A-TGSPLIT: ; %bb.0: ; %entry 817; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 818; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 819; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 820; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 821; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 822; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 823; GFX90A-TGSPLIT-NEXT: s_endpgm 824 i32 addrspace(1)* %out, i32 %in) { 825entry: 826 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent") monotonic 827 ret void 828} 829 830define amdgpu_kernel void @global_agent_acquire_atomicrmw( 831; GFX6-LABEL: global_agent_acquire_atomicrmw: 832; GFX6: ; %bb.0: ; %entry 833; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 834; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 835; GFX6-NEXT: s_mov_b32 s3, 0x100f000 836; GFX6-NEXT: s_mov_b32 s2, -1 837; GFX6-NEXT: s_waitcnt lgkmcnt(0) 838; GFX6-NEXT: v_mov_b32_e32 v0, s4 839; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 840; GFX6-NEXT: s_waitcnt vmcnt(0) 841; GFX6-NEXT: buffer_wbinvl1 842; GFX6-NEXT: s_endpgm 843; 844; GFX7-LABEL: global_agent_acquire_atomicrmw: 845; GFX7: ; %bb.0: ; %entry 846; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 847; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 848; GFX7-NEXT: s_waitcnt lgkmcnt(0) 849; GFX7-NEXT: v_mov_b32_e32 v0, s0 850; GFX7-NEXT: v_mov_b32_e32 v1, s1 851; GFX7-NEXT: v_mov_b32_e32 v2, s2 852; GFX7-NEXT: flat_atomic_swap v[0:1], v2 853; GFX7-NEXT: s_waitcnt vmcnt(0) 854; GFX7-NEXT: buffer_wbinvl1_vol 855; GFX7-NEXT: s_endpgm 856; 857; GFX10-WGP-LABEL: global_agent_acquire_atomicrmw: 858; GFX10-WGP: ; %bb.0: ; %entry 859; GFX10-WGP-NEXT: s_clause 0x1 860; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 861; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 862; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 863; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 864; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 865; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 866; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 867; GFX10-WGP-NEXT: buffer_gl0_inv 868; GFX10-WGP-NEXT: buffer_gl1_inv 869; GFX10-WGP-NEXT: s_endpgm 870; 871; GFX10-CU-LABEL: global_agent_acquire_atomicrmw: 872; GFX10-CU: ; %bb.0: ; %entry 873; GFX10-CU-NEXT: s_clause 0x1 874; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 875; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 876; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 877; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 878; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 879; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 880; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 881; GFX10-CU-NEXT: buffer_gl0_inv 882; GFX10-CU-NEXT: buffer_gl1_inv 883; GFX10-CU-NEXT: s_endpgm 884; 885; SKIP-CACHE-INV-LABEL: global_agent_acquire_atomicrmw: 886; SKIP-CACHE-INV: ; %bb.0: ; %entry 887; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 888; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 889; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 890; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 891; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 892; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 893; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 894; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 895; SKIP-CACHE-INV-NEXT: s_endpgm 896; 897; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_atomicrmw: 898; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 899; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 900; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 901; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 902; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 903; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 904; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 905; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 906; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 907; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 908; 909; GFX90A-TGSPLIT-LABEL: global_agent_acquire_atomicrmw: 910; GFX90A-TGSPLIT: ; %bb.0: ; %entry 911; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 912; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 913; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 914; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 915; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 916; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 917; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 918; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 919; GFX90A-TGSPLIT-NEXT: s_endpgm 920 i32 addrspace(1)* %out, i32 %in) { 921entry: 922 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent") acquire 923 ret void 924} 925 926define amdgpu_kernel void @global_agent_release_atomicrmw( 927; GFX6-LABEL: global_agent_release_atomicrmw: 928; GFX6: ; %bb.0: ; %entry 929; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 930; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 931; GFX6-NEXT: s_mov_b32 s3, 0x100f000 932; GFX6-NEXT: s_mov_b32 s2, -1 933; GFX6-NEXT: s_waitcnt lgkmcnt(0) 934; GFX6-NEXT: v_mov_b32_e32 v0, s4 935; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 936; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 937; GFX6-NEXT: s_endpgm 938; 939; GFX7-LABEL: global_agent_release_atomicrmw: 940; GFX7: ; %bb.0: ; %entry 941; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 942; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 943; GFX7-NEXT: s_waitcnt lgkmcnt(0) 944; GFX7-NEXT: v_mov_b32_e32 v0, s0 945; GFX7-NEXT: v_mov_b32_e32 v1, s1 946; GFX7-NEXT: v_mov_b32_e32 v2, s2 947; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 948; GFX7-NEXT: flat_atomic_swap v[0:1], v2 949; GFX7-NEXT: s_endpgm 950; 951; GFX10-WGP-LABEL: global_agent_release_atomicrmw: 952; GFX10-WGP: ; %bb.0: ; %entry 953; GFX10-WGP-NEXT: s_clause 0x1 954; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 955; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 956; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 957; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 958; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 959; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 960; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 961; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 962; GFX10-WGP-NEXT: s_endpgm 963; 964; GFX10-CU-LABEL: global_agent_release_atomicrmw: 965; GFX10-CU: ; %bb.0: ; %entry 966; GFX10-CU-NEXT: s_clause 0x1 967; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 968; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 969; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 970; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 971; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 972; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 973; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 974; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 975; GFX10-CU-NEXT: s_endpgm 976; 977; SKIP-CACHE-INV-LABEL: global_agent_release_atomicrmw: 978; SKIP-CACHE-INV: ; %bb.0: ; %entry 979; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 980; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 981; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 982; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 983; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 984; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 985; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 986; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 987; SKIP-CACHE-INV-NEXT: s_endpgm 988; 989; GFX90A-NOTTGSPLIT-LABEL: global_agent_release_atomicrmw: 990; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 991; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 992; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 993; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 994; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 995; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 996; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 997; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 998; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 999; 1000; GFX90A-TGSPLIT-LABEL: global_agent_release_atomicrmw: 1001; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1002; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1003; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1004; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1005; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1006; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1007; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1008; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1009; GFX90A-TGSPLIT-NEXT: s_endpgm 1010 i32 addrspace(1)* %out, i32 %in) { 1011entry: 1012 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent") release 1013 ret void 1014} 1015 1016define amdgpu_kernel void @global_agent_acq_rel_atomicrmw( 1017; GFX6-LABEL: global_agent_acq_rel_atomicrmw: 1018; GFX6: ; %bb.0: ; %entry 1019; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1020; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 1021; GFX6-NEXT: s_mov_b32 s3, 0x100f000 1022; GFX6-NEXT: s_mov_b32 s2, -1 1023; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1024; GFX6-NEXT: v_mov_b32_e32 v0, s4 1025; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1026; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 1027; GFX6-NEXT: s_waitcnt vmcnt(0) 1028; GFX6-NEXT: buffer_wbinvl1 1029; GFX6-NEXT: s_endpgm 1030; 1031; GFX7-LABEL: global_agent_acq_rel_atomicrmw: 1032; GFX7: ; %bb.0: ; %entry 1033; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1034; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 1035; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1036; GFX7-NEXT: v_mov_b32_e32 v0, s0 1037; GFX7-NEXT: v_mov_b32_e32 v1, s1 1038; GFX7-NEXT: v_mov_b32_e32 v2, s2 1039; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1040; GFX7-NEXT: flat_atomic_swap v[0:1], v2 1041; GFX7-NEXT: s_waitcnt vmcnt(0) 1042; GFX7-NEXT: buffer_wbinvl1_vol 1043; GFX7-NEXT: s_endpgm 1044; 1045; GFX10-WGP-LABEL: global_agent_acq_rel_atomicrmw: 1046; GFX10-WGP: ; %bb.0: ; %entry 1047; GFX10-WGP-NEXT: s_clause 0x1 1048; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 1049; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1050; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1051; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1052; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1053; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1054; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1055; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 1056; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1057; GFX10-WGP-NEXT: buffer_gl0_inv 1058; GFX10-WGP-NEXT: buffer_gl1_inv 1059; GFX10-WGP-NEXT: s_endpgm 1060; 1061; GFX10-CU-LABEL: global_agent_acq_rel_atomicrmw: 1062; GFX10-CU: ; %bb.0: ; %entry 1063; GFX10-CU-NEXT: s_clause 0x1 1064; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 1065; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1066; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1067; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1068; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1069; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1070; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1071; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 1072; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1073; GFX10-CU-NEXT: buffer_gl0_inv 1074; GFX10-CU-NEXT: buffer_gl1_inv 1075; GFX10-CU-NEXT: s_endpgm 1076; 1077; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_atomicrmw: 1078; SKIP-CACHE-INV: ; %bb.0: ; %entry 1079; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1080; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1081; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1082; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1083; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1084; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1085; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1086; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 1087; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 1088; SKIP-CACHE-INV-NEXT: s_endpgm 1089; 1090; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_atomicrmw: 1091; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1092; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1093; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1094; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1095; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1096; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1097; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1098; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1099; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 1100; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 1101; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1102; 1103; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_atomicrmw: 1104; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1105; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1106; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1107; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1108; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1109; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1110; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1111; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1112; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1113; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 1114; GFX90A-TGSPLIT-NEXT: s_endpgm 1115 i32 addrspace(1)* %out, i32 %in) { 1116entry: 1117 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent") acq_rel 1118 ret void 1119} 1120 1121define amdgpu_kernel void @global_agent_seq_cst_atomicrmw( 1122; GFX6-LABEL: global_agent_seq_cst_atomicrmw: 1123; GFX6: ; %bb.0: ; %entry 1124; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1125; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 1126; GFX6-NEXT: s_mov_b32 s3, 0x100f000 1127; GFX6-NEXT: s_mov_b32 s2, -1 1128; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1129; GFX6-NEXT: v_mov_b32_e32 v0, s4 1130; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1131; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 1132; GFX6-NEXT: s_waitcnt vmcnt(0) 1133; GFX6-NEXT: buffer_wbinvl1 1134; GFX6-NEXT: s_endpgm 1135; 1136; GFX7-LABEL: global_agent_seq_cst_atomicrmw: 1137; GFX7: ; %bb.0: ; %entry 1138; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1139; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 1140; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1141; GFX7-NEXT: v_mov_b32_e32 v0, s0 1142; GFX7-NEXT: v_mov_b32_e32 v1, s1 1143; GFX7-NEXT: v_mov_b32_e32 v2, s2 1144; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1145; GFX7-NEXT: flat_atomic_swap v[0:1], v2 1146; GFX7-NEXT: s_waitcnt vmcnt(0) 1147; GFX7-NEXT: buffer_wbinvl1_vol 1148; GFX7-NEXT: s_endpgm 1149; 1150; GFX10-WGP-LABEL: global_agent_seq_cst_atomicrmw: 1151; GFX10-WGP: ; %bb.0: ; %entry 1152; GFX10-WGP-NEXT: s_clause 0x1 1153; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 1154; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1155; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1156; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1157; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1158; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1159; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1160; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 1161; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1162; GFX10-WGP-NEXT: buffer_gl0_inv 1163; GFX10-WGP-NEXT: buffer_gl1_inv 1164; GFX10-WGP-NEXT: s_endpgm 1165; 1166; GFX10-CU-LABEL: global_agent_seq_cst_atomicrmw: 1167; GFX10-CU: ; %bb.0: ; %entry 1168; GFX10-CU-NEXT: s_clause 0x1 1169; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 1170; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1171; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1172; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1173; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1174; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1175; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1176; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 1177; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1178; GFX10-CU-NEXT: buffer_gl0_inv 1179; GFX10-CU-NEXT: buffer_gl1_inv 1180; GFX10-CU-NEXT: s_endpgm 1181; 1182; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_atomicrmw: 1183; SKIP-CACHE-INV: ; %bb.0: ; %entry 1184; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1185; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1186; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1187; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1188; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1189; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1190; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1191; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 1192; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 1193; SKIP-CACHE-INV-NEXT: s_endpgm 1194; 1195; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_atomicrmw: 1196; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1197; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1198; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1199; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1200; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1201; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1202; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1203; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1204; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 1205; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 1206; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1207; 1208; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_atomicrmw: 1209; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1210; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1211; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1212; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1213; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1214; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1215; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1216; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1217; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1218; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 1219; GFX90A-TGSPLIT-NEXT: s_endpgm 1220 i32 addrspace(1)* %out, i32 %in) { 1221entry: 1222 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent") seq_cst 1223 ret void 1224} 1225 1226define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw( 1227; GFX6-LABEL: global_agent_acquire_ret_atomicrmw: 1228; GFX6: ; %bb.0: ; %entry 1229; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1230; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 1231; GFX6-NEXT: s_mov_b32 s3, 0x100f000 1232; GFX6-NEXT: s_mov_b32 s2, -1 1233; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1234; GFX6-NEXT: v_mov_b32_e32 v0, s4 1235; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 1236; GFX6-NEXT: s_waitcnt vmcnt(0) 1237; GFX6-NEXT: buffer_wbinvl1 1238; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 1239; GFX6-NEXT: s_endpgm 1240; 1241; GFX7-LABEL: global_agent_acquire_ret_atomicrmw: 1242; GFX7: ; %bb.0: ; %entry 1243; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1244; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 1245; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1246; GFX7-NEXT: v_mov_b32_e32 v0, s0 1247; GFX7-NEXT: v_mov_b32_e32 v1, s1 1248; GFX7-NEXT: v_mov_b32_e32 v2, s2 1249; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 1250; GFX7-NEXT: s_waitcnt vmcnt(0) 1251; GFX7-NEXT: buffer_wbinvl1_vol 1252; GFX7-NEXT: flat_store_dword v[0:1], v2 1253; GFX7-NEXT: s_endpgm 1254; 1255; GFX10-WGP-LABEL: global_agent_acquire_ret_atomicrmw: 1256; GFX10-WGP: ; %bb.0: ; %entry 1257; GFX10-WGP-NEXT: s_clause 0x1 1258; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 1259; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1260; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1261; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1262; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1263; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1264; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 1265; GFX10-WGP-NEXT: buffer_gl0_inv 1266; GFX10-WGP-NEXT: buffer_gl1_inv 1267; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 1268; GFX10-WGP-NEXT: s_endpgm 1269; 1270; GFX10-CU-LABEL: global_agent_acquire_ret_atomicrmw: 1271; GFX10-CU: ; %bb.0: ; %entry 1272; GFX10-CU-NEXT: s_clause 0x1 1273; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 1274; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1275; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1276; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1277; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1278; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1279; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 1280; GFX10-CU-NEXT: buffer_gl0_inv 1281; GFX10-CU-NEXT: buffer_gl1_inv 1282; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 1283; GFX10-CU-NEXT: s_endpgm 1284; 1285; SKIP-CACHE-INV-LABEL: global_agent_acquire_ret_atomicrmw: 1286; SKIP-CACHE-INV: ; %bb.0: ; %entry 1287; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1288; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1289; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1290; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1291; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1292; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1293; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 1294; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 1295; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 1296; SKIP-CACHE-INV-NEXT: s_endpgm 1297; 1298; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_ret_atomicrmw: 1299; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1300; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1301; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1302; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1303; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1304; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1305; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1306; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 1307; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 1308; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 1309; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1310; 1311; GFX90A-TGSPLIT-LABEL: global_agent_acquire_ret_atomicrmw: 1312; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1313; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1314; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1315; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1316; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1317; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1318; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1319; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1320; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 1321; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 1322; GFX90A-TGSPLIT-NEXT: s_endpgm 1323 i32 addrspace(1)* %out, i32 %in) { 1324entry: 1325 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent") acquire 1326 store i32 %val, i32 addrspace(1)* %out, align 4 1327 ret void 1328} 1329 1330define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw( 1331; GFX6-LABEL: global_agent_acq_rel_ret_atomicrmw: 1332; GFX6: ; %bb.0: ; %entry 1333; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1334; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 1335; GFX6-NEXT: s_mov_b32 s3, 0x100f000 1336; GFX6-NEXT: s_mov_b32 s2, -1 1337; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1338; GFX6-NEXT: v_mov_b32_e32 v0, s4 1339; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1340; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 1341; GFX6-NEXT: s_waitcnt vmcnt(0) 1342; GFX6-NEXT: buffer_wbinvl1 1343; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 1344; GFX6-NEXT: s_endpgm 1345; 1346; GFX7-LABEL: global_agent_acq_rel_ret_atomicrmw: 1347; GFX7: ; %bb.0: ; %entry 1348; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1349; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 1350; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1351; GFX7-NEXT: v_mov_b32_e32 v0, s0 1352; GFX7-NEXT: v_mov_b32_e32 v1, s1 1353; GFX7-NEXT: v_mov_b32_e32 v2, s2 1354; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1355; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 1356; GFX7-NEXT: s_waitcnt vmcnt(0) 1357; GFX7-NEXT: buffer_wbinvl1_vol 1358; GFX7-NEXT: flat_store_dword v[0:1], v2 1359; GFX7-NEXT: s_endpgm 1360; 1361; GFX10-WGP-LABEL: global_agent_acq_rel_ret_atomicrmw: 1362; GFX10-WGP: ; %bb.0: ; %entry 1363; GFX10-WGP-NEXT: s_clause 0x1 1364; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 1365; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1366; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1367; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1368; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1369; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1370; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1371; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1372; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 1373; GFX10-WGP-NEXT: buffer_gl0_inv 1374; GFX10-WGP-NEXT: buffer_gl1_inv 1375; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 1376; GFX10-WGP-NEXT: s_endpgm 1377; 1378; GFX10-CU-LABEL: global_agent_acq_rel_ret_atomicrmw: 1379; GFX10-CU: ; %bb.0: ; %entry 1380; GFX10-CU-NEXT: s_clause 0x1 1381; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 1382; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1383; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1384; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1385; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1386; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1387; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1388; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1389; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 1390; GFX10-CU-NEXT: buffer_gl0_inv 1391; GFX10-CU-NEXT: buffer_gl1_inv 1392; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 1393; GFX10-CU-NEXT: s_endpgm 1394; 1395; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_ret_atomicrmw: 1396; SKIP-CACHE-INV: ; %bb.0: ; %entry 1397; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1398; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1399; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1400; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1401; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1402; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1403; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1404; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 1405; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 1406; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 1407; SKIP-CACHE-INV-NEXT: s_endpgm 1408; 1409; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_ret_atomicrmw: 1410; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1411; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1412; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1413; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1414; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1415; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1416; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1417; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1418; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 1419; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 1420; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 1421; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1422; 1423; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_ret_atomicrmw: 1424; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1425; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1426; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1427; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1428; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1429; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1430; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1431; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1432; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1433; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 1434; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 1435; GFX90A-TGSPLIT-NEXT: s_endpgm 1436 i32 addrspace(1)* %out, i32 %in) { 1437entry: 1438 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent") acq_rel 1439 store i32 %val, i32 addrspace(1)* %out, align 4 1440 ret void 1441} 1442 1443define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw( 1444; GFX6-LABEL: global_agent_seq_cst_ret_atomicrmw: 1445; GFX6: ; %bb.0: ; %entry 1446; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1447; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 1448; GFX6-NEXT: s_mov_b32 s3, 0x100f000 1449; GFX6-NEXT: s_mov_b32 s2, -1 1450; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1451; GFX6-NEXT: v_mov_b32_e32 v0, s4 1452; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1453; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 1454; GFX6-NEXT: s_waitcnt vmcnt(0) 1455; GFX6-NEXT: buffer_wbinvl1 1456; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 1457; GFX6-NEXT: s_endpgm 1458; 1459; GFX7-LABEL: global_agent_seq_cst_ret_atomicrmw: 1460; GFX7: ; %bb.0: ; %entry 1461; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1462; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 1463; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1464; GFX7-NEXT: v_mov_b32_e32 v0, s0 1465; GFX7-NEXT: v_mov_b32_e32 v1, s1 1466; GFX7-NEXT: v_mov_b32_e32 v2, s2 1467; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1468; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 1469; GFX7-NEXT: s_waitcnt vmcnt(0) 1470; GFX7-NEXT: buffer_wbinvl1_vol 1471; GFX7-NEXT: flat_store_dword v[0:1], v2 1472; GFX7-NEXT: s_endpgm 1473; 1474; GFX10-WGP-LABEL: global_agent_seq_cst_ret_atomicrmw: 1475; GFX10-WGP: ; %bb.0: ; %entry 1476; GFX10-WGP-NEXT: s_clause 0x1 1477; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 1478; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1479; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1480; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1481; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1482; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1483; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1484; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1485; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 1486; GFX10-WGP-NEXT: buffer_gl0_inv 1487; GFX10-WGP-NEXT: buffer_gl1_inv 1488; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 1489; GFX10-WGP-NEXT: s_endpgm 1490; 1491; GFX10-CU-LABEL: global_agent_seq_cst_ret_atomicrmw: 1492; GFX10-CU: ; %bb.0: ; %entry 1493; GFX10-CU-NEXT: s_clause 0x1 1494; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 1495; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1496; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1497; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1498; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1499; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1500; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1501; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1502; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 1503; GFX10-CU-NEXT: buffer_gl0_inv 1504; GFX10-CU-NEXT: buffer_gl1_inv 1505; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 1506; GFX10-CU-NEXT: s_endpgm 1507; 1508; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_ret_atomicrmw: 1509; SKIP-CACHE-INV: ; %bb.0: ; %entry 1510; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1511; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1512; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1513; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1514; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1515; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1516; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1517; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 1518; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 1519; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 1520; SKIP-CACHE-INV-NEXT: s_endpgm 1521; 1522; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_ret_atomicrmw: 1523; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1524; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1525; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1526; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1527; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1528; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1529; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1530; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1531; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 1532; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 1533; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 1534; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1535; 1536; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_ret_atomicrmw: 1537; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1538; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1539; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1540; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1541; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1542; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1543; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1544; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1545; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1546; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 1547; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 1548; GFX90A-TGSPLIT-NEXT: s_endpgm 1549 i32 addrspace(1)* %out, i32 %in) { 1550entry: 1551 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent") seq_cst 1552 store i32 %val, i32 addrspace(1)* %out, align 4 1553 ret void 1554} 1555 1556define amdgpu_kernel void @global_agent_monotonic_monotonic_cmpxchg( 1557; GFX6-LABEL: global_agent_monotonic_monotonic_cmpxchg: 1558; GFX6: ; %bb.0: ; %entry 1559; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1560; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 1561; GFX6-NEXT: s_mov_b32 s3, 0x100f000 1562; GFX6-NEXT: s_mov_b32 s2, -1 1563; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1564; GFX6-NEXT: v_mov_b32_e32 v0, s4 1565; GFX6-NEXT: v_mov_b32_e32 v1, s5 1566; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 1567; GFX6-NEXT: s_endpgm 1568; 1569; GFX7-LABEL: global_agent_monotonic_monotonic_cmpxchg: 1570; GFX7: ; %bb.0: ; %entry 1571; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1572; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 1573; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1574; GFX7-NEXT: s_add_u32 s0, s0, 16 1575; GFX7-NEXT: s_addc_u32 s1, s1, 0 1576; GFX7-NEXT: v_mov_b32_e32 v0, s0 1577; GFX7-NEXT: v_mov_b32_e32 v2, s2 1578; GFX7-NEXT: v_mov_b32_e32 v1, s1 1579; GFX7-NEXT: v_mov_b32_e32 v3, s3 1580; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 1581; GFX7-NEXT: s_endpgm 1582; 1583; GFX10-WGP-LABEL: global_agent_monotonic_monotonic_cmpxchg: 1584; GFX10-WGP: ; %bb.0: ; %entry 1585; GFX10-WGP-NEXT: s_clause 0x1 1586; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1587; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1588; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 1589; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1590; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1591; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1592; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1593; GFX10-WGP-NEXT: s_endpgm 1594; 1595; GFX10-CU-LABEL: global_agent_monotonic_monotonic_cmpxchg: 1596; GFX10-CU: ; %bb.0: ; %entry 1597; GFX10-CU-NEXT: s_clause 0x1 1598; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1599; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1600; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 1601; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1602; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1603; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1604; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1605; GFX10-CU-NEXT: s_endpgm 1606; 1607; SKIP-CACHE-INV-LABEL: global_agent_monotonic_monotonic_cmpxchg: 1608; SKIP-CACHE-INV: ; %bb.0: ; %entry 1609; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1610; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1611; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1612; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1613; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1614; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1615; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1616; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1617; SKIP-CACHE-INV-NEXT: s_endpgm 1618; 1619; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_monotonic_cmpxchg: 1620; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1621; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1622; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 1623; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 1624; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1625; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 1626; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 1627; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1628; 1629; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_monotonic_cmpxchg: 1630; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1631; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1632; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 1633; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 1634; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1635; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 1636; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 1637; GFX90A-TGSPLIT-NEXT: s_endpgm 1638 i32 addrspace(1)* %out, i32 %in, i32 %old) { 1639entry: 1640 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 1641 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic 1642 ret void 1643} 1644 1645define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg( 1646; GFX6-LABEL: global_agent_acquire_monotonic_cmpxchg: 1647; GFX6: ; %bb.0: ; %entry 1648; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1649; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 1650; GFX6-NEXT: s_mov_b32 s3, 0x100f000 1651; GFX6-NEXT: s_mov_b32 s2, -1 1652; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1653; GFX6-NEXT: v_mov_b32_e32 v0, s4 1654; GFX6-NEXT: v_mov_b32_e32 v1, s5 1655; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 1656; GFX6-NEXT: s_waitcnt vmcnt(0) 1657; GFX6-NEXT: buffer_wbinvl1 1658; GFX6-NEXT: s_endpgm 1659; 1660; GFX7-LABEL: global_agent_acquire_monotonic_cmpxchg: 1661; GFX7: ; %bb.0: ; %entry 1662; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1663; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 1664; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1665; GFX7-NEXT: s_add_u32 s0, s0, 16 1666; GFX7-NEXT: s_addc_u32 s1, s1, 0 1667; GFX7-NEXT: v_mov_b32_e32 v0, s0 1668; GFX7-NEXT: v_mov_b32_e32 v2, s2 1669; GFX7-NEXT: v_mov_b32_e32 v1, s1 1670; GFX7-NEXT: v_mov_b32_e32 v3, s3 1671; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 1672; GFX7-NEXT: s_waitcnt vmcnt(0) 1673; GFX7-NEXT: buffer_wbinvl1_vol 1674; GFX7-NEXT: s_endpgm 1675; 1676; GFX10-WGP-LABEL: global_agent_acquire_monotonic_cmpxchg: 1677; GFX10-WGP: ; %bb.0: ; %entry 1678; GFX10-WGP-NEXT: s_clause 0x1 1679; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1680; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1681; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 1682; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1683; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1684; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1685; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1686; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1687; GFX10-WGP-NEXT: buffer_gl0_inv 1688; GFX10-WGP-NEXT: buffer_gl1_inv 1689; GFX10-WGP-NEXT: s_endpgm 1690; 1691; GFX10-CU-LABEL: global_agent_acquire_monotonic_cmpxchg: 1692; GFX10-CU: ; %bb.0: ; %entry 1693; GFX10-CU-NEXT: s_clause 0x1 1694; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1695; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1696; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 1697; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1698; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1699; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1700; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1701; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1702; GFX10-CU-NEXT: buffer_gl0_inv 1703; GFX10-CU-NEXT: buffer_gl1_inv 1704; GFX10-CU-NEXT: s_endpgm 1705; 1706; SKIP-CACHE-INV-LABEL: global_agent_acquire_monotonic_cmpxchg: 1707; SKIP-CACHE-INV: ; %bb.0: ; %entry 1708; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1709; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1710; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1711; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1712; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1713; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1714; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1715; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1716; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 1717; SKIP-CACHE-INV-NEXT: s_endpgm 1718; 1719; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_monotonic_cmpxchg: 1720; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1721; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1722; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 1723; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 1724; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1725; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 1726; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 1727; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 1728; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 1729; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1730; 1731; GFX90A-TGSPLIT-LABEL: global_agent_acquire_monotonic_cmpxchg: 1732; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1733; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1734; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 1735; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 1736; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1737; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 1738; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 1739; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1740; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 1741; GFX90A-TGSPLIT-NEXT: s_endpgm 1742 i32 addrspace(1)* %out, i32 %in, i32 %old) { 1743entry: 1744 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 1745 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic 1746 ret void 1747} 1748 1749define amdgpu_kernel void @global_agent_release_monotonic_cmpxchg( 1750; GFX6-LABEL: global_agent_release_monotonic_cmpxchg: 1751; GFX6: ; %bb.0: ; %entry 1752; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1753; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 1754; GFX6-NEXT: s_mov_b32 s3, 0x100f000 1755; GFX6-NEXT: s_mov_b32 s2, -1 1756; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1757; GFX6-NEXT: v_mov_b32_e32 v0, s4 1758; GFX6-NEXT: v_mov_b32_e32 v1, s5 1759; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1760; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 1761; GFX6-NEXT: s_endpgm 1762; 1763; GFX7-LABEL: global_agent_release_monotonic_cmpxchg: 1764; GFX7: ; %bb.0: ; %entry 1765; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1766; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 1767; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1768; GFX7-NEXT: s_add_u32 s0, s0, 16 1769; GFX7-NEXT: s_addc_u32 s1, s1, 0 1770; GFX7-NEXT: v_mov_b32_e32 v0, s0 1771; GFX7-NEXT: v_mov_b32_e32 v2, s2 1772; GFX7-NEXT: v_mov_b32_e32 v1, s1 1773; GFX7-NEXT: v_mov_b32_e32 v3, s3 1774; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1775; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 1776; GFX7-NEXT: s_endpgm 1777; 1778; GFX10-WGP-LABEL: global_agent_release_monotonic_cmpxchg: 1779; GFX10-WGP: ; %bb.0: ; %entry 1780; GFX10-WGP-NEXT: s_clause 0x1 1781; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1782; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1783; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 1784; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1785; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1786; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1787; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1788; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1789; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1790; GFX10-WGP-NEXT: s_endpgm 1791; 1792; GFX10-CU-LABEL: global_agent_release_monotonic_cmpxchg: 1793; GFX10-CU: ; %bb.0: ; %entry 1794; GFX10-CU-NEXT: s_clause 0x1 1795; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1796; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1797; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 1798; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1799; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1800; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1801; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1802; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1803; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1804; GFX10-CU-NEXT: s_endpgm 1805; 1806; SKIP-CACHE-INV-LABEL: global_agent_release_monotonic_cmpxchg: 1807; SKIP-CACHE-INV: ; %bb.0: ; %entry 1808; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1809; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1810; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1811; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1812; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1813; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1814; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1815; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1816; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1817; SKIP-CACHE-INV-NEXT: s_endpgm 1818; 1819; GFX90A-NOTTGSPLIT-LABEL: global_agent_release_monotonic_cmpxchg: 1820; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1821; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1822; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 1823; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 1824; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1825; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 1826; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1827; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 1828; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1829; 1830; GFX90A-TGSPLIT-LABEL: global_agent_release_monotonic_cmpxchg: 1831; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1832; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1833; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 1834; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 1835; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1836; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 1837; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1838; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 1839; GFX90A-TGSPLIT-NEXT: s_endpgm 1840 i32 addrspace(1)* %out, i32 %in, i32 %old) { 1841entry: 1842 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 1843 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") release monotonic 1844 ret void 1845} 1846 1847define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg( 1848; GFX6-LABEL: global_agent_acq_rel_monotonic_cmpxchg: 1849; GFX6: ; %bb.0: ; %entry 1850; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1851; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 1852; GFX6-NEXT: s_mov_b32 s3, 0x100f000 1853; GFX6-NEXT: s_mov_b32 s2, -1 1854; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1855; GFX6-NEXT: v_mov_b32_e32 v0, s4 1856; GFX6-NEXT: v_mov_b32_e32 v1, s5 1857; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1858; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 1859; GFX6-NEXT: s_waitcnt vmcnt(0) 1860; GFX6-NEXT: buffer_wbinvl1 1861; GFX6-NEXT: s_endpgm 1862; 1863; GFX7-LABEL: global_agent_acq_rel_monotonic_cmpxchg: 1864; GFX7: ; %bb.0: ; %entry 1865; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1866; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 1867; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1868; GFX7-NEXT: s_add_u32 s0, s0, 16 1869; GFX7-NEXT: s_addc_u32 s1, s1, 0 1870; GFX7-NEXT: v_mov_b32_e32 v0, s0 1871; GFX7-NEXT: v_mov_b32_e32 v2, s2 1872; GFX7-NEXT: v_mov_b32_e32 v1, s1 1873; GFX7-NEXT: v_mov_b32_e32 v3, s3 1874; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1875; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 1876; GFX7-NEXT: s_waitcnt vmcnt(0) 1877; GFX7-NEXT: buffer_wbinvl1_vol 1878; GFX7-NEXT: s_endpgm 1879; 1880; GFX10-WGP-LABEL: global_agent_acq_rel_monotonic_cmpxchg: 1881; GFX10-WGP: ; %bb.0: ; %entry 1882; GFX10-WGP-NEXT: s_clause 0x1 1883; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1884; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1885; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 1886; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1887; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1888; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1889; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1890; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1891; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1892; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1893; GFX10-WGP-NEXT: buffer_gl0_inv 1894; GFX10-WGP-NEXT: buffer_gl1_inv 1895; GFX10-WGP-NEXT: s_endpgm 1896; 1897; GFX10-CU-LABEL: global_agent_acq_rel_monotonic_cmpxchg: 1898; GFX10-CU: ; %bb.0: ; %entry 1899; GFX10-CU-NEXT: s_clause 0x1 1900; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1901; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1902; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 1903; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1904; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1905; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1906; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1907; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1908; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1909; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1910; GFX10-CU-NEXT: buffer_gl0_inv 1911; GFX10-CU-NEXT: buffer_gl1_inv 1912; GFX10-CU-NEXT: s_endpgm 1913; 1914; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_monotonic_cmpxchg: 1915; SKIP-CACHE-INV: ; %bb.0: ; %entry 1916; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1917; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1918; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1919; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1920; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1921; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1922; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1923; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1924; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1925; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 1926; SKIP-CACHE-INV-NEXT: s_endpgm 1927; 1928; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_monotonic_cmpxchg: 1929; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1930; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1931; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 1932; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 1933; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1934; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 1935; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1936; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 1937; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 1938; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 1939; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1940; 1941; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_monotonic_cmpxchg: 1942; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1943; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1944; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 1945; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 1946; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1947; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 1948; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1949; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 1950; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1951; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 1952; GFX90A-TGSPLIT-NEXT: s_endpgm 1953 i32 addrspace(1)* %out, i32 %in, i32 %old) { 1954entry: 1955 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 1956 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic 1957 ret void 1958} 1959 1960define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg( 1961; GFX6-LABEL: global_agent_seq_cst_monotonic_cmpxchg: 1962; GFX6: ; %bb.0: ; %entry 1963; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1964; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 1965; GFX6-NEXT: s_mov_b32 s3, 0x100f000 1966; GFX6-NEXT: s_mov_b32 s2, -1 1967; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1968; GFX6-NEXT: v_mov_b32_e32 v0, s4 1969; GFX6-NEXT: v_mov_b32_e32 v1, s5 1970; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1971; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 1972; GFX6-NEXT: s_waitcnt vmcnt(0) 1973; GFX6-NEXT: buffer_wbinvl1 1974; GFX6-NEXT: s_endpgm 1975; 1976; GFX7-LABEL: global_agent_seq_cst_monotonic_cmpxchg: 1977; GFX7: ; %bb.0: ; %entry 1978; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1979; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 1980; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1981; GFX7-NEXT: s_add_u32 s0, s0, 16 1982; GFX7-NEXT: s_addc_u32 s1, s1, 0 1983; GFX7-NEXT: v_mov_b32_e32 v0, s0 1984; GFX7-NEXT: v_mov_b32_e32 v2, s2 1985; GFX7-NEXT: v_mov_b32_e32 v1, s1 1986; GFX7-NEXT: v_mov_b32_e32 v3, s3 1987; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1988; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 1989; GFX7-NEXT: s_waitcnt vmcnt(0) 1990; GFX7-NEXT: buffer_wbinvl1_vol 1991; GFX7-NEXT: s_endpgm 1992; 1993; GFX10-WGP-LABEL: global_agent_seq_cst_monotonic_cmpxchg: 1994; GFX10-WGP: ; %bb.0: ; %entry 1995; GFX10-WGP-NEXT: s_clause 0x1 1996; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1997; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1998; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 1999; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2000; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2001; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2002; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2003; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2004; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2005; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2006; GFX10-WGP-NEXT: buffer_gl0_inv 2007; GFX10-WGP-NEXT: buffer_gl1_inv 2008; GFX10-WGP-NEXT: s_endpgm 2009; 2010; GFX10-CU-LABEL: global_agent_seq_cst_monotonic_cmpxchg: 2011; GFX10-CU: ; %bb.0: ; %entry 2012; GFX10-CU-NEXT: s_clause 0x1 2013; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2014; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2015; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2016; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2017; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2018; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2019; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2020; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2021; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2022; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2023; GFX10-CU-NEXT: buffer_gl0_inv 2024; GFX10-CU-NEXT: buffer_gl1_inv 2025; GFX10-CU-NEXT: s_endpgm 2026; 2027; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_monotonic_cmpxchg: 2028; SKIP-CACHE-INV: ; %bb.0: ; %entry 2029; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2030; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2031; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2032; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2033; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2034; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2035; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2036; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2037; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 2038; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2039; SKIP-CACHE-INV-NEXT: s_endpgm 2040; 2041; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_monotonic_cmpxchg: 2042; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2043; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2044; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2045; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2046; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2047; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2048; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2049; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2050; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2051; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2052; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2053; 2054; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_monotonic_cmpxchg: 2055; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2056; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2057; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2058; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2059; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2060; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2061; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2062; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2063; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2064; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2065; GFX90A-TGSPLIT-NEXT: s_endpgm 2066 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2067entry: 2068 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2069 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic 2070 ret void 2071} 2072 2073define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg( 2074; GFX6-LABEL: global_agent_acquire_acquire_cmpxchg: 2075; GFX6: ; %bb.0: ; %entry 2076; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2077; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 2078; GFX6-NEXT: s_mov_b32 s3, 0x100f000 2079; GFX6-NEXT: s_mov_b32 s2, -1 2080; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2081; GFX6-NEXT: v_mov_b32_e32 v0, s4 2082; GFX6-NEXT: v_mov_b32_e32 v1, s5 2083; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 2084; GFX6-NEXT: s_waitcnt vmcnt(0) 2085; GFX6-NEXT: buffer_wbinvl1 2086; GFX6-NEXT: s_endpgm 2087; 2088; GFX7-LABEL: global_agent_acquire_acquire_cmpxchg: 2089; GFX7: ; %bb.0: ; %entry 2090; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2091; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2092; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2093; GFX7-NEXT: s_add_u32 s0, s0, 16 2094; GFX7-NEXT: s_addc_u32 s1, s1, 0 2095; GFX7-NEXT: v_mov_b32_e32 v0, s0 2096; GFX7-NEXT: v_mov_b32_e32 v2, s2 2097; GFX7-NEXT: v_mov_b32_e32 v1, s1 2098; GFX7-NEXT: v_mov_b32_e32 v3, s3 2099; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 2100; GFX7-NEXT: s_waitcnt vmcnt(0) 2101; GFX7-NEXT: buffer_wbinvl1_vol 2102; GFX7-NEXT: s_endpgm 2103; 2104; GFX10-WGP-LABEL: global_agent_acquire_acquire_cmpxchg: 2105; GFX10-WGP: ; %bb.0: ; %entry 2106; GFX10-WGP-NEXT: s_clause 0x1 2107; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2108; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2109; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2110; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2111; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2112; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2113; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2114; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2115; GFX10-WGP-NEXT: buffer_gl0_inv 2116; GFX10-WGP-NEXT: buffer_gl1_inv 2117; GFX10-WGP-NEXT: s_endpgm 2118; 2119; GFX10-CU-LABEL: global_agent_acquire_acquire_cmpxchg: 2120; GFX10-CU: ; %bb.0: ; %entry 2121; GFX10-CU-NEXT: s_clause 0x1 2122; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2123; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2124; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2125; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2126; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2127; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2128; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2129; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2130; GFX10-CU-NEXT: buffer_gl0_inv 2131; GFX10-CU-NEXT: buffer_gl1_inv 2132; GFX10-CU-NEXT: s_endpgm 2133; 2134; SKIP-CACHE-INV-LABEL: global_agent_acquire_acquire_cmpxchg: 2135; SKIP-CACHE-INV: ; %bb.0: ; %entry 2136; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2137; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2138; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2139; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2140; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2141; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2142; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2143; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 2144; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2145; SKIP-CACHE-INV-NEXT: s_endpgm 2146; 2147; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_acquire_cmpxchg: 2148; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2149; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2150; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2151; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2152; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2153; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2154; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2155; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2156; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2157; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2158; 2159; GFX90A-TGSPLIT-LABEL: global_agent_acquire_acquire_cmpxchg: 2160; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2161; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2162; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2163; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2164; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2165; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2166; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2167; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2168; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2169; GFX90A-TGSPLIT-NEXT: s_endpgm 2170 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2171entry: 2172 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2173 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire 2174 ret void 2175} 2176 2177define amdgpu_kernel void @global_agent_release_acquire_cmpxchg( 2178; GFX6-LABEL: global_agent_release_acquire_cmpxchg: 2179; GFX6: ; %bb.0: ; %entry 2180; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2181; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 2182; GFX6-NEXT: s_mov_b32 s3, 0x100f000 2183; GFX6-NEXT: s_mov_b32 s2, -1 2184; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2185; GFX6-NEXT: v_mov_b32_e32 v0, s4 2186; GFX6-NEXT: v_mov_b32_e32 v1, s5 2187; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2188; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 2189; GFX6-NEXT: s_waitcnt vmcnt(0) 2190; GFX6-NEXT: buffer_wbinvl1 2191; GFX6-NEXT: s_endpgm 2192; 2193; GFX7-LABEL: global_agent_release_acquire_cmpxchg: 2194; GFX7: ; %bb.0: ; %entry 2195; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2196; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2197; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2198; GFX7-NEXT: s_add_u32 s0, s0, 16 2199; GFX7-NEXT: s_addc_u32 s1, s1, 0 2200; GFX7-NEXT: v_mov_b32_e32 v0, s0 2201; GFX7-NEXT: v_mov_b32_e32 v2, s2 2202; GFX7-NEXT: v_mov_b32_e32 v1, s1 2203; GFX7-NEXT: v_mov_b32_e32 v3, s3 2204; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2205; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 2206; GFX7-NEXT: s_waitcnt vmcnt(0) 2207; GFX7-NEXT: buffer_wbinvl1_vol 2208; GFX7-NEXT: s_endpgm 2209; 2210; GFX10-WGP-LABEL: global_agent_release_acquire_cmpxchg: 2211; GFX10-WGP: ; %bb.0: ; %entry 2212; GFX10-WGP-NEXT: s_clause 0x1 2213; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2214; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2215; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2216; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2217; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2218; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2219; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2220; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2221; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2222; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2223; GFX10-WGP-NEXT: buffer_gl0_inv 2224; GFX10-WGP-NEXT: buffer_gl1_inv 2225; GFX10-WGP-NEXT: s_endpgm 2226; 2227; GFX10-CU-LABEL: global_agent_release_acquire_cmpxchg: 2228; GFX10-CU: ; %bb.0: ; %entry 2229; GFX10-CU-NEXT: s_clause 0x1 2230; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2231; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2232; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2233; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2234; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2235; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2236; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2237; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2238; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2239; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2240; GFX10-CU-NEXT: buffer_gl0_inv 2241; GFX10-CU-NEXT: buffer_gl1_inv 2242; GFX10-CU-NEXT: s_endpgm 2243; 2244; SKIP-CACHE-INV-LABEL: global_agent_release_acquire_cmpxchg: 2245; SKIP-CACHE-INV: ; %bb.0: ; %entry 2246; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2247; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2248; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2249; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2250; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2251; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2252; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2253; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2254; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 2255; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2256; SKIP-CACHE-INV-NEXT: s_endpgm 2257; 2258; GFX90A-NOTTGSPLIT-LABEL: global_agent_release_acquire_cmpxchg: 2259; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2260; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2261; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2262; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2263; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2264; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2265; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2266; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2267; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2268; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2269; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2270; 2271; GFX90A-TGSPLIT-LABEL: global_agent_release_acquire_cmpxchg: 2272; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2273; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2274; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2275; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2276; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2277; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2278; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2279; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2280; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2281; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2282; GFX90A-TGSPLIT-NEXT: s_endpgm 2283 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2284entry: 2285 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2286 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") release acquire 2287 ret void 2288} 2289 2290define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg( 2291; GFX6-LABEL: global_agent_acq_rel_acquire_cmpxchg: 2292; GFX6: ; %bb.0: ; %entry 2293; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2294; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 2295; GFX6-NEXT: s_mov_b32 s3, 0x100f000 2296; GFX6-NEXT: s_mov_b32 s2, -1 2297; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2298; GFX6-NEXT: v_mov_b32_e32 v0, s4 2299; GFX6-NEXT: v_mov_b32_e32 v1, s5 2300; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2301; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 2302; GFX6-NEXT: s_waitcnt vmcnt(0) 2303; GFX6-NEXT: buffer_wbinvl1 2304; GFX6-NEXT: s_endpgm 2305; 2306; GFX7-LABEL: global_agent_acq_rel_acquire_cmpxchg: 2307; GFX7: ; %bb.0: ; %entry 2308; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2309; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2310; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2311; GFX7-NEXT: s_add_u32 s0, s0, 16 2312; GFX7-NEXT: s_addc_u32 s1, s1, 0 2313; GFX7-NEXT: v_mov_b32_e32 v0, s0 2314; GFX7-NEXT: v_mov_b32_e32 v2, s2 2315; GFX7-NEXT: v_mov_b32_e32 v1, s1 2316; GFX7-NEXT: v_mov_b32_e32 v3, s3 2317; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2318; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 2319; GFX7-NEXT: s_waitcnt vmcnt(0) 2320; GFX7-NEXT: buffer_wbinvl1_vol 2321; GFX7-NEXT: s_endpgm 2322; 2323; GFX10-WGP-LABEL: global_agent_acq_rel_acquire_cmpxchg: 2324; GFX10-WGP: ; %bb.0: ; %entry 2325; GFX10-WGP-NEXT: s_clause 0x1 2326; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2327; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2328; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2329; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2330; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2331; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2332; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2333; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2334; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2335; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2336; GFX10-WGP-NEXT: buffer_gl0_inv 2337; GFX10-WGP-NEXT: buffer_gl1_inv 2338; GFX10-WGP-NEXT: s_endpgm 2339; 2340; GFX10-CU-LABEL: global_agent_acq_rel_acquire_cmpxchg: 2341; GFX10-CU: ; %bb.0: ; %entry 2342; GFX10-CU-NEXT: s_clause 0x1 2343; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2344; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2345; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2346; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2347; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2348; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2349; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2350; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2351; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2352; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2353; GFX10-CU-NEXT: buffer_gl0_inv 2354; GFX10-CU-NEXT: buffer_gl1_inv 2355; GFX10-CU-NEXT: s_endpgm 2356; 2357; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_acquire_cmpxchg: 2358; SKIP-CACHE-INV: ; %bb.0: ; %entry 2359; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2360; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2361; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2362; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2363; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2364; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2365; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2366; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2367; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 2368; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2369; SKIP-CACHE-INV-NEXT: s_endpgm 2370; 2371; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_acquire_cmpxchg: 2372; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2373; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2374; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2375; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2376; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2377; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2378; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2379; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2380; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2381; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2382; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2383; 2384; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_acquire_cmpxchg: 2385; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2386; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2387; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2388; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2389; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2390; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2391; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2392; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2393; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2394; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2395; GFX90A-TGSPLIT-NEXT: s_endpgm 2396 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2397entry: 2398 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2399 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire 2400 ret void 2401} 2402 2403define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg( 2404; GFX6-LABEL: global_agent_seq_cst_acquire_cmpxchg: 2405; GFX6: ; %bb.0: ; %entry 2406; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2407; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 2408; GFX6-NEXT: s_mov_b32 s3, 0x100f000 2409; GFX6-NEXT: s_mov_b32 s2, -1 2410; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2411; GFX6-NEXT: v_mov_b32_e32 v0, s4 2412; GFX6-NEXT: v_mov_b32_e32 v1, s5 2413; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2414; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 2415; GFX6-NEXT: s_waitcnt vmcnt(0) 2416; GFX6-NEXT: buffer_wbinvl1 2417; GFX6-NEXT: s_endpgm 2418; 2419; GFX7-LABEL: global_agent_seq_cst_acquire_cmpxchg: 2420; GFX7: ; %bb.0: ; %entry 2421; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2422; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2423; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2424; GFX7-NEXT: s_add_u32 s0, s0, 16 2425; GFX7-NEXT: s_addc_u32 s1, s1, 0 2426; GFX7-NEXT: v_mov_b32_e32 v0, s0 2427; GFX7-NEXT: v_mov_b32_e32 v2, s2 2428; GFX7-NEXT: v_mov_b32_e32 v1, s1 2429; GFX7-NEXT: v_mov_b32_e32 v3, s3 2430; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2431; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 2432; GFX7-NEXT: s_waitcnt vmcnt(0) 2433; GFX7-NEXT: buffer_wbinvl1_vol 2434; GFX7-NEXT: s_endpgm 2435; 2436; GFX10-WGP-LABEL: global_agent_seq_cst_acquire_cmpxchg: 2437; GFX10-WGP: ; %bb.0: ; %entry 2438; GFX10-WGP-NEXT: s_clause 0x1 2439; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2440; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2441; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2442; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2443; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2444; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2445; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2446; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2447; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2448; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2449; GFX10-WGP-NEXT: buffer_gl0_inv 2450; GFX10-WGP-NEXT: buffer_gl1_inv 2451; GFX10-WGP-NEXT: s_endpgm 2452; 2453; GFX10-CU-LABEL: global_agent_seq_cst_acquire_cmpxchg: 2454; GFX10-CU: ; %bb.0: ; %entry 2455; GFX10-CU-NEXT: s_clause 0x1 2456; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2457; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2458; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2459; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2460; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2461; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2462; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2463; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2464; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2465; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2466; GFX10-CU-NEXT: buffer_gl0_inv 2467; GFX10-CU-NEXT: buffer_gl1_inv 2468; GFX10-CU-NEXT: s_endpgm 2469; 2470; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_acquire_cmpxchg: 2471; SKIP-CACHE-INV: ; %bb.0: ; %entry 2472; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2473; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2474; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2475; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2476; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2477; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2478; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2479; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2480; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 2481; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2482; SKIP-CACHE-INV-NEXT: s_endpgm 2483; 2484; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_acquire_cmpxchg: 2485; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2486; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2487; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2488; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2489; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2490; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2491; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2492; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2493; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2494; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2495; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2496; 2497; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_acquire_cmpxchg: 2498; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2499; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2500; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2501; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2502; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2503; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2504; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2505; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2506; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2507; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2508; GFX90A-TGSPLIT-NEXT: s_endpgm 2509 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2510entry: 2511 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2512 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire 2513 ret void 2514} 2515 2516define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg( 2517; GFX6-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: 2518; GFX6: ; %bb.0: ; %entry 2519; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2520; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 2521; GFX6-NEXT: s_mov_b32 s3, 0x100f000 2522; GFX6-NEXT: s_mov_b32 s2, -1 2523; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2524; GFX6-NEXT: v_mov_b32_e32 v0, s4 2525; GFX6-NEXT: v_mov_b32_e32 v1, s5 2526; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2527; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 2528; GFX6-NEXT: s_waitcnt vmcnt(0) 2529; GFX6-NEXT: buffer_wbinvl1 2530; GFX6-NEXT: s_endpgm 2531; 2532; GFX7-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: 2533; GFX7: ; %bb.0: ; %entry 2534; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2535; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2536; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2537; GFX7-NEXT: s_add_u32 s0, s0, 16 2538; GFX7-NEXT: s_addc_u32 s1, s1, 0 2539; GFX7-NEXT: v_mov_b32_e32 v0, s0 2540; GFX7-NEXT: v_mov_b32_e32 v2, s2 2541; GFX7-NEXT: v_mov_b32_e32 v1, s1 2542; GFX7-NEXT: v_mov_b32_e32 v3, s3 2543; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2544; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 2545; GFX7-NEXT: s_waitcnt vmcnt(0) 2546; GFX7-NEXT: buffer_wbinvl1_vol 2547; GFX7-NEXT: s_endpgm 2548; 2549; GFX10-WGP-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: 2550; GFX10-WGP: ; %bb.0: ; %entry 2551; GFX10-WGP-NEXT: s_clause 0x1 2552; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2553; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2554; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2555; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2556; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2557; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2558; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2559; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2560; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2561; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2562; GFX10-WGP-NEXT: buffer_gl0_inv 2563; GFX10-WGP-NEXT: buffer_gl1_inv 2564; GFX10-WGP-NEXT: s_endpgm 2565; 2566; GFX10-CU-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: 2567; GFX10-CU: ; %bb.0: ; %entry 2568; GFX10-CU-NEXT: s_clause 0x1 2569; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2570; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2571; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2572; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2573; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2574; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2575; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2576; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2577; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2578; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2579; GFX10-CU-NEXT: buffer_gl0_inv 2580; GFX10-CU-NEXT: buffer_gl1_inv 2581; GFX10-CU-NEXT: s_endpgm 2582; 2583; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: 2584; SKIP-CACHE-INV: ; %bb.0: ; %entry 2585; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2586; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2587; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2588; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2589; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2590; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2591; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2592; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2593; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 2594; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2595; SKIP-CACHE-INV-NEXT: s_endpgm 2596; 2597; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: 2598; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2599; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2600; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2601; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2602; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2603; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2604; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2605; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2606; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2607; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2608; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2609; 2610; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: 2611; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2612; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2613; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2614; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2615; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2616; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2617; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2618; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2619; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2620; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2621; GFX90A-TGSPLIT-NEXT: s_endpgm 2622 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2623entry: 2624 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2625 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst 2626 ret void 2627} 2628 2629define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg( 2630; GFX6-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: 2631; GFX6: ; %bb.0: ; %entry 2632; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2633; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 2634; GFX6-NEXT: s_mov_b32 s3, 0x100f000 2635; GFX6-NEXT: s_mov_b32 s2, -1 2636; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2637; GFX6-NEXT: v_mov_b32_e32 v0, s4 2638; GFX6-NEXT: v_mov_b32_e32 v1, s5 2639; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 2640; GFX6-NEXT: s_waitcnt vmcnt(0) 2641; GFX6-NEXT: buffer_wbinvl1 2642; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 2643; GFX6-NEXT: s_endpgm 2644; 2645; GFX7-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: 2646; GFX7: ; %bb.0: ; %entry 2647; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2648; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2649; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2650; GFX7-NEXT: s_add_u32 s4, s0, 16 2651; GFX7-NEXT: s_addc_u32 s5, s1, 0 2652; GFX7-NEXT: v_mov_b32_e32 v0, s4 2653; GFX7-NEXT: v_mov_b32_e32 v2, s2 2654; GFX7-NEXT: v_mov_b32_e32 v1, s5 2655; GFX7-NEXT: v_mov_b32_e32 v3, s3 2656; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 2657; GFX7-NEXT: s_waitcnt vmcnt(0) 2658; GFX7-NEXT: buffer_wbinvl1_vol 2659; GFX7-NEXT: v_mov_b32_e32 v0, s0 2660; GFX7-NEXT: v_mov_b32_e32 v1, s1 2661; GFX7-NEXT: flat_store_dword v[0:1], v2 2662; GFX7-NEXT: s_endpgm 2663; 2664; GFX10-WGP-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: 2665; GFX10-WGP: ; %bb.0: ; %entry 2666; GFX10-WGP-NEXT: s_clause 0x1 2667; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2668; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2669; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2670; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2671; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2672; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2673; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2674; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 2675; GFX10-WGP-NEXT: buffer_gl0_inv 2676; GFX10-WGP-NEXT: buffer_gl1_inv 2677; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 2678; GFX10-WGP-NEXT: s_endpgm 2679; 2680; GFX10-CU-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: 2681; GFX10-CU: ; %bb.0: ; %entry 2682; GFX10-CU-NEXT: s_clause 0x1 2683; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2684; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2685; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2686; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2687; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2688; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2689; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2690; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 2691; GFX10-CU-NEXT: buffer_gl0_inv 2692; GFX10-CU-NEXT: buffer_gl1_inv 2693; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 2694; GFX10-CU-NEXT: s_endpgm 2695; 2696; SKIP-CACHE-INV-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: 2697; SKIP-CACHE-INV: ; %bb.0: ; %entry 2698; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2699; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2700; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2701; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2702; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2703; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2704; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2705; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 2706; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2707; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 2708; SKIP-CACHE-INV-NEXT: s_endpgm 2709; 2710; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: 2711; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2712; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2713; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2714; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2715; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2716; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2717; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 2718; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2719; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2720; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 2721; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2722; 2723; GFX90A-TGSPLIT-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: 2724; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2725; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2726; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2727; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2728; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2729; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2730; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 2731; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2732; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2733; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 2734; GFX90A-TGSPLIT-NEXT: s_endpgm 2735 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2736entry: 2737 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2738 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic 2739 %val0 = extractvalue { i32, i1 } %val, 0 2740 store i32 %val0, i32 addrspace(1)* %out, align 4 2741 ret void 2742} 2743 2744define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg( 2745; GFX6-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: 2746; GFX6: ; %bb.0: ; %entry 2747; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2748; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 2749; GFX6-NEXT: s_mov_b32 s3, 0x100f000 2750; GFX6-NEXT: s_mov_b32 s2, -1 2751; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2752; GFX6-NEXT: v_mov_b32_e32 v0, s4 2753; GFX6-NEXT: v_mov_b32_e32 v1, s5 2754; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2755; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 2756; GFX6-NEXT: s_waitcnt vmcnt(0) 2757; GFX6-NEXT: buffer_wbinvl1 2758; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 2759; GFX6-NEXT: s_endpgm 2760; 2761; GFX7-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: 2762; GFX7: ; %bb.0: ; %entry 2763; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2764; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2765; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2766; GFX7-NEXT: s_add_u32 s4, s0, 16 2767; GFX7-NEXT: s_addc_u32 s5, s1, 0 2768; GFX7-NEXT: v_mov_b32_e32 v0, s4 2769; GFX7-NEXT: v_mov_b32_e32 v2, s2 2770; GFX7-NEXT: v_mov_b32_e32 v1, s5 2771; GFX7-NEXT: v_mov_b32_e32 v3, s3 2772; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2773; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 2774; GFX7-NEXT: s_waitcnt vmcnt(0) 2775; GFX7-NEXT: buffer_wbinvl1_vol 2776; GFX7-NEXT: v_mov_b32_e32 v0, s0 2777; GFX7-NEXT: v_mov_b32_e32 v1, s1 2778; GFX7-NEXT: flat_store_dword v[0:1], v2 2779; GFX7-NEXT: s_endpgm 2780; 2781; GFX10-WGP-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: 2782; GFX10-WGP: ; %bb.0: ; %entry 2783; GFX10-WGP-NEXT: s_clause 0x1 2784; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2785; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2786; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2787; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2788; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2789; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2790; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2791; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2792; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2793; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 2794; GFX10-WGP-NEXT: buffer_gl0_inv 2795; GFX10-WGP-NEXT: buffer_gl1_inv 2796; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 2797; GFX10-WGP-NEXT: s_endpgm 2798; 2799; GFX10-CU-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: 2800; GFX10-CU: ; %bb.0: ; %entry 2801; GFX10-CU-NEXT: s_clause 0x1 2802; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2803; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2804; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2805; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2806; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2807; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2808; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2809; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2810; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2811; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 2812; GFX10-CU-NEXT: buffer_gl0_inv 2813; GFX10-CU-NEXT: buffer_gl1_inv 2814; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 2815; GFX10-CU-NEXT: s_endpgm 2816; 2817; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: 2818; SKIP-CACHE-INV: ; %bb.0: ; %entry 2819; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2820; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2821; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2822; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2823; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2824; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2825; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2826; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2827; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 2828; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2829; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 2830; SKIP-CACHE-INV-NEXT: s_endpgm 2831; 2832; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: 2833; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2834; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2835; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2836; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2837; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2838; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2839; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2840; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 2841; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2842; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2843; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 2844; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2845; 2846; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: 2847; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2848; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2849; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2850; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2851; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2852; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2853; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2854; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 2855; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2856; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2857; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 2858; GFX90A-TGSPLIT-NEXT: s_endpgm 2859 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2860entry: 2861 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2862 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic 2863 %val0 = extractvalue { i32, i1 } %val, 0 2864 store i32 %val0, i32 addrspace(1)* %out, align 4 2865 ret void 2866} 2867 2868define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg( 2869; GFX6-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: 2870; GFX6: ; %bb.0: ; %entry 2871; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2872; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 2873; GFX6-NEXT: s_mov_b32 s3, 0x100f000 2874; GFX6-NEXT: s_mov_b32 s2, -1 2875; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2876; GFX6-NEXT: v_mov_b32_e32 v0, s4 2877; GFX6-NEXT: v_mov_b32_e32 v1, s5 2878; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2879; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 2880; GFX6-NEXT: s_waitcnt vmcnt(0) 2881; GFX6-NEXT: buffer_wbinvl1 2882; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 2883; GFX6-NEXT: s_endpgm 2884; 2885; GFX7-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: 2886; GFX7: ; %bb.0: ; %entry 2887; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2888; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2889; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2890; GFX7-NEXT: s_add_u32 s4, s0, 16 2891; GFX7-NEXT: s_addc_u32 s5, s1, 0 2892; GFX7-NEXT: v_mov_b32_e32 v0, s4 2893; GFX7-NEXT: v_mov_b32_e32 v2, s2 2894; GFX7-NEXT: v_mov_b32_e32 v1, s5 2895; GFX7-NEXT: v_mov_b32_e32 v3, s3 2896; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2897; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 2898; GFX7-NEXT: s_waitcnt vmcnt(0) 2899; GFX7-NEXT: buffer_wbinvl1_vol 2900; GFX7-NEXT: v_mov_b32_e32 v0, s0 2901; GFX7-NEXT: v_mov_b32_e32 v1, s1 2902; GFX7-NEXT: flat_store_dword v[0:1], v2 2903; GFX7-NEXT: s_endpgm 2904; 2905; GFX10-WGP-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: 2906; GFX10-WGP: ; %bb.0: ; %entry 2907; GFX10-WGP-NEXT: s_clause 0x1 2908; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2909; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2910; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2911; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2912; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2913; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2914; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2915; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2916; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2917; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 2918; GFX10-WGP-NEXT: buffer_gl0_inv 2919; GFX10-WGP-NEXT: buffer_gl1_inv 2920; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 2921; GFX10-WGP-NEXT: s_endpgm 2922; 2923; GFX10-CU-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: 2924; GFX10-CU: ; %bb.0: ; %entry 2925; GFX10-CU-NEXT: s_clause 0x1 2926; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2927; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2928; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2929; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2930; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2931; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2932; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2933; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2934; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2935; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 2936; GFX10-CU-NEXT: buffer_gl0_inv 2937; GFX10-CU-NEXT: buffer_gl1_inv 2938; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 2939; GFX10-CU-NEXT: s_endpgm 2940; 2941; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: 2942; SKIP-CACHE-INV: ; %bb.0: ; %entry 2943; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2944; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2945; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2946; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2947; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2948; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2949; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2950; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2951; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 2952; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2953; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 2954; SKIP-CACHE-INV-NEXT: s_endpgm 2955; 2956; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: 2957; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2958; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2959; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2960; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2961; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2962; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2963; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2964; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 2965; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2966; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2967; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 2968; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2969; 2970; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: 2971; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2972; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2973; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2974; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2975; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2976; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2977; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2978; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 2979; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2980; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2981; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 2982; GFX90A-TGSPLIT-NEXT: s_endpgm 2983 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2984entry: 2985 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2986 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic 2987 %val0 = extractvalue { i32, i1 } %val, 0 2988 store i32 %val0, i32 addrspace(1)* %out, align 4 2989 ret void 2990} 2991 2992define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg( 2993; GFX6-LABEL: global_agent_acquire_acquire_ret_cmpxchg: 2994; GFX6: ; %bb.0: ; %entry 2995; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2996; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 2997; GFX6-NEXT: s_mov_b32 s3, 0x100f000 2998; GFX6-NEXT: s_mov_b32 s2, -1 2999; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3000; GFX6-NEXT: v_mov_b32_e32 v0, s4 3001; GFX6-NEXT: v_mov_b32_e32 v1, s5 3002; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 3003; GFX6-NEXT: s_waitcnt vmcnt(0) 3004; GFX6-NEXT: buffer_wbinvl1 3005; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 3006; GFX6-NEXT: s_endpgm 3007; 3008; GFX7-LABEL: global_agent_acquire_acquire_ret_cmpxchg: 3009; GFX7: ; %bb.0: ; %entry 3010; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3011; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 3012; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3013; GFX7-NEXT: s_add_u32 s4, s0, 16 3014; GFX7-NEXT: s_addc_u32 s5, s1, 0 3015; GFX7-NEXT: v_mov_b32_e32 v0, s4 3016; GFX7-NEXT: v_mov_b32_e32 v2, s2 3017; GFX7-NEXT: v_mov_b32_e32 v1, s5 3018; GFX7-NEXT: v_mov_b32_e32 v3, s3 3019; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 3020; GFX7-NEXT: s_waitcnt vmcnt(0) 3021; GFX7-NEXT: buffer_wbinvl1_vol 3022; GFX7-NEXT: v_mov_b32_e32 v0, s0 3023; GFX7-NEXT: v_mov_b32_e32 v1, s1 3024; GFX7-NEXT: flat_store_dword v[0:1], v2 3025; GFX7-NEXT: s_endpgm 3026; 3027; GFX10-WGP-LABEL: global_agent_acquire_acquire_ret_cmpxchg: 3028; GFX10-WGP: ; %bb.0: ; %entry 3029; GFX10-WGP-NEXT: s_clause 0x1 3030; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3031; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 3032; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 3033; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3034; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3035; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3036; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 3037; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3038; GFX10-WGP-NEXT: buffer_gl0_inv 3039; GFX10-WGP-NEXT: buffer_gl1_inv 3040; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 3041; GFX10-WGP-NEXT: s_endpgm 3042; 3043; GFX10-CU-LABEL: global_agent_acquire_acquire_ret_cmpxchg: 3044; GFX10-CU: ; %bb.0: ; %entry 3045; GFX10-CU-NEXT: s_clause 0x1 3046; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3047; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 3048; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 3049; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3050; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3051; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3052; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 3053; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3054; GFX10-CU-NEXT: buffer_gl0_inv 3055; GFX10-CU-NEXT: buffer_gl1_inv 3056; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 3057; GFX10-CU-NEXT: s_endpgm 3058; 3059; SKIP-CACHE-INV-LABEL: global_agent_acquire_acquire_ret_cmpxchg: 3060; SKIP-CACHE-INV: ; %bb.0: ; %entry 3061; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3062; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 3063; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 3064; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 3065; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3066; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3067; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3068; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 3069; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3070; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3071; SKIP-CACHE-INV-NEXT: s_endpgm 3072; 3073; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_acquire_ret_cmpxchg: 3074; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3075; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3076; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3077; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3078; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3079; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3080; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3081; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3082; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 3083; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3084; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3085; 3086; GFX90A-TGSPLIT-LABEL: global_agent_acquire_acquire_ret_cmpxchg: 3087; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3088; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3089; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3090; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3091; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3092; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3093; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3094; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3095; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 3096; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3097; GFX90A-TGSPLIT-NEXT: s_endpgm 3098 i32 addrspace(1)* %out, i32 %in, i32 %old) { 3099entry: 3100 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 3101 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire 3102 %val0 = extractvalue { i32, i1 } %val, 0 3103 store i32 %val0, i32 addrspace(1)* %out, align 4 3104 ret void 3105} 3106 3107define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg( 3108; GFX6-LABEL: global_agent_release_acquire_ret_cmpxchg: 3109; GFX6: ; %bb.0: ; %entry 3110; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3111; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 3112; GFX6-NEXT: s_mov_b32 s3, 0x100f000 3113; GFX6-NEXT: s_mov_b32 s2, -1 3114; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3115; GFX6-NEXT: v_mov_b32_e32 v0, s4 3116; GFX6-NEXT: v_mov_b32_e32 v1, s5 3117; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3118; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 3119; GFX6-NEXT: s_waitcnt vmcnt(0) 3120; GFX6-NEXT: buffer_wbinvl1 3121; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 3122; GFX6-NEXT: s_endpgm 3123; 3124; GFX7-LABEL: global_agent_release_acquire_ret_cmpxchg: 3125; GFX7: ; %bb.0: ; %entry 3126; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3127; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 3128; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3129; GFX7-NEXT: s_add_u32 s4, s0, 16 3130; GFX7-NEXT: s_addc_u32 s5, s1, 0 3131; GFX7-NEXT: v_mov_b32_e32 v0, s4 3132; GFX7-NEXT: v_mov_b32_e32 v2, s2 3133; GFX7-NEXT: v_mov_b32_e32 v1, s5 3134; GFX7-NEXT: v_mov_b32_e32 v3, s3 3135; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3136; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 3137; GFX7-NEXT: s_waitcnt vmcnt(0) 3138; GFX7-NEXT: buffer_wbinvl1_vol 3139; GFX7-NEXT: v_mov_b32_e32 v0, s0 3140; GFX7-NEXT: v_mov_b32_e32 v1, s1 3141; GFX7-NEXT: flat_store_dword v[0:1], v2 3142; GFX7-NEXT: s_endpgm 3143; 3144; GFX10-WGP-LABEL: global_agent_release_acquire_ret_cmpxchg: 3145; GFX10-WGP: ; %bb.0: ; %entry 3146; GFX10-WGP-NEXT: s_clause 0x1 3147; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3148; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 3149; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 3150; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3151; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3152; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3153; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3154; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3155; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 3156; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3157; GFX10-WGP-NEXT: buffer_gl0_inv 3158; GFX10-WGP-NEXT: buffer_gl1_inv 3159; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 3160; GFX10-WGP-NEXT: s_endpgm 3161; 3162; GFX10-CU-LABEL: global_agent_release_acquire_ret_cmpxchg: 3163; GFX10-CU: ; %bb.0: ; %entry 3164; GFX10-CU-NEXT: s_clause 0x1 3165; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3166; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 3167; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 3168; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3169; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3170; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3171; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3172; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3173; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 3174; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3175; GFX10-CU-NEXT: buffer_gl0_inv 3176; GFX10-CU-NEXT: buffer_gl1_inv 3177; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 3178; GFX10-CU-NEXT: s_endpgm 3179; 3180; SKIP-CACHE-INV-LABEL: global_agent_release_acquire_ret_cmpxchg: 3181; SKIP-CACHE-INV: ; %bb.0: ; %entry 3182; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3183; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 3184; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 3185; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 3186; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3187; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3188; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3189; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3190; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 3191; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3192; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3193; SKIP-CACHE-INV-NEXT: s_endpgm 3194; 3195; GFX90A-NOTTGSPLIT-LABEL: global_agent_release_acquire_ret_cmpxchg: 3196; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3197; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3198; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3199; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3200; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3201; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3202; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3203; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3204; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3205; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 3206; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3207; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3208; 3209; GFX90A-TGSPLIT-LABEL: global_agent_release_acquire_ret_cmpxchg: 3210; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3211; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3212; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3213; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3214; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3215; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3216; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3217; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3218; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3219; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 3220; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3221; GFX90A-TGSPLIT-NEXT: s_endpgm 3222 i32 addrspace(1)* %out, i32 %in, i32 %old) { 3223entry: 3224 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 3225 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") release acquire 3226 %val0 = extractvalue { i32, i1 } %val, 0 3227 store i32 %val0, i32 addrspace(1)* %out, align 4 3228 ret void 3229} 3230 3231define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg( 3232; GFX6-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: 3233; GFX6: ; %bb.0: ; %entry 3234; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3235; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 3236; GFX6-NEXT: s_mov_b32 s3, 0x100f000 3237; GFX6-NEXT: s_mov_b32 s2, -1 3238; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3239; GFX6-NEXT: v_mov_b32_e32 v0, s4 3240; GFX6-NEXT: v_mov_b32_e32 v1, s5 3241; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3242; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 3243; GFX6-NEXT: s_waitcnt vmcnt(0) 3244; GFX6-NEXT: buffer_wbinvl1 3245; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 3246; GFX6-NEXT: s_endpgm 3247; 3248; GFX7-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: 3249; GFX7: ; %bb.0: ; %entry 3250; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3251; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 3252; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3253; GFX7-NEXT: s_add_u32 s4, s0, 16 3254; GFX7-NEXT: s_addc_u32 s5, s1, 0 3255; GFX7-NEXT: v_mov_b32_e32 v0, s4 3256; GFX7-NEXT: v_mov_b32_e32 v2, s2 3257; GFX7-NEXT: v_mov_b32_e32 v1, s5 3258; GFX7-NEXT: v_mov_b32_e32 v3, s3 3259; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3260; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 3261; GFX7-NEXT: s_waitcnt vmcnt(0) 3262; GFX7-NEXT: buffer_wbinvl1_vol 3263; GFX7-NEXT: v_mov_b32_e32 v0, s0 3264; GFX7-NEXT: v_mov_b32_e32 v1, s1 3265; GFX7-NEXT: flat_store_dword v[0:1], v2 3266; GFX7-NEXT: s_endpgm 3267; 3268; GFX10-WGP-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: 3269; GFX10-WGP: ; %bb.0: ; %entry 3270; GFX10-WGP-NEXT: s_clause 0x1 3271; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3272; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 3273; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 3274; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3275; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3276; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3277; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3278; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3279; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 3280; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3281; GFX10-WGP-NEXT: buffer_gl0_inv 3282; GFX10-WGP-NEXT: buffer_gl1_inv 3283; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 3284; GFX10-WGP-NEXT: s_endpgm 3285; 3286; GFX10-CU-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: 3287; GFX10-CU: ; %bb.0: ; %entry 3288; GFX10-CU-NEXT: s_clause 0x1 3289; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3290; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 3291; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 3292; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3293; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3294; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3295; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3296; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3297; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 3298; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3299; GFX10-CU-NEXT: buffer_gl0_inv 3300; GFX10-CU-NEXT: buffer_gl1_inv 3301; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 3302; GFX10-CU-NEXT: s_endpgm 3303; 3304; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: 3305; SKIP-CACHE-INV: ; %bb.0: ; %entry 3306; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3307; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 3308; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 3309; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 3310; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3311; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3312; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3313; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3314; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 3315; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3316; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3317; SKIP-CACHE-INV-NEXT: s_endpgm 3318; 3319; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: 3320; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3321; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3322; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3323; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3324; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3325; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3326; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3327; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3328; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3329; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 3330; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3331; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3332; 3333; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: 3334; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3335; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3336; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3337; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3338; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3339; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3340; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3341; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3342; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3343; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 3344; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3345; GFX90A-TGSPLIT-NEXT: s_endpgm 3346 i32 addrspace(1)* %out, i32 %in, i32 %old) { 3347entry: 3348 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 3349 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire 3350 %val0 = extractvalue { i32, i1 } %val, 0 3351 store i32 %val0, i32 addrspace(1)* %out, align 4 3352 ret void 3353} 3354 3355define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg( 3356; GFX6-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: 3357; GFX6: ; %bb.0: ; %entry 3358; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3359; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 3360; GFX6-NEXT: s_mov_b32 s3, 0x100f000 3361; GFX6-NEXT: s_mov_b32 s2, -1 3362; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3363; GFX6-NEXT: v_mov_b32_e32 v0, s4 3364; GFX6-NEXT: v_mov_b32_e32 v1, s5 3365; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3366; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 3367; GFX6-NEXT: s_waitcnt vmcnt(0) 3368; GFX6-NEXT: buffer_wbinvl1 3369; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 3370; GFX6-NEXT: s_endpgm 3371; 3372; GFX7-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: 3373; GFX7: ; %bb.0: ; %entry 3374; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3375; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 3376; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3377; GFX7-NEXT: s_add_u32 s4, s0, 16 3378; GFX7-NEXT: s_addc_u32 s5, s1, 0 3379; GFX7-NEXT: v_mov_b32_e32 v0, s4 3380; GFX7-NEXT: v_mov_b32_e32 v2, s2 3381; GFX7-NEXT: v_mov_b32_e32 v1, s5 3382; GFX7-NEXT: v_mov_b32_e32 v3, s3 3383; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3384; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 3385; GFX7-NEXT: s_waitcnt vmcnt(0) 3386; GFX7-NEXT: buffer_wbinvl1_vol 3387; GFX7-NEXT: v_mov_b32_e32 v0, s0 3388; GFX7-NEXT: v_mov_b32_e32 v1, s1 3389; GFX7-NEXT: flat_store_dword v[0:1], v2 3390; GFX7-NEXT: s_endpgm 3391; 3392; GFX10-WGP-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: 3393; GFX10-WGP: ; %bb.0: ; %entry 3394; GFX10-WGP-NEXT: s_clause 0x1 3395; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3396; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 3397; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 3398; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3399; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3400; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3401; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3402; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3403; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 3404; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3405; GFX10-WGP-NEXT: buffer_gl0_inv 3406; GFX10-WGP-NEXT: buffer_gl1_inv 3407; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 3408; GFX10-WGP-NEXT: s_endpgm 3409; 3410; GFX10-CU-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: 3411; GFX10-CU: ; %bb.0: ; %entry 3412; GFX10-CU-NEXT: s_clause 0x1 3413; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3414; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 3415; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 3416; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3417; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3418; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3419; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3420; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3421; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 3422; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3423; GFX10-CU-NEXT: buffer_gl0_inv 3424; GFX10-CU-NEXT: buffer_gl1_inv 3425; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 3426; GFX10-CU-NEXT: s_endpgm 3427; 3428; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: 3429; SKIP-CACHE-INV: ; %bb.0: ; %entry 3430; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3431; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 3432; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 3433; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 3434; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3435; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3436; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3437; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3438; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 3439; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3440; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3441; SKIP-CACHE-INV-NEXT: s_endpgm 3442; 3443; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: 3444; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3445; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3446; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3447; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3448; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3449; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3450; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3451; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3452; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3453; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 3454; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3455; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3456; 3457; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: 3458; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3459; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3460; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3461; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3462; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3463; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3464; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3465; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3466; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3467; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 3468; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3469; GFX90A-TGSPLIT-NEXT: s_endpgm 3470 i32 addrspace(1)* %out, i32 %in, i32 %old) { 3471entry: 3472 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 3473 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire 3474 %val0 = extractvalue { i32, i1 } %val, 0 3475 store i32 %val0, i32 addrspace(1)* %out, align 4 3476 ret void 3477} 3478 3479define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg( 3480; GFX6-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: 3481; GFX6: ; %bb.0: ; %entry 3482; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3483; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 3484; GFX6-NEXT: s_mov_b32 s3, 0x100f000 3485; GFX6-NEXT: s_mov_b32 s2, -1 3486; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3487; GFX6-NEXT: v_mov_b32_e32 v0, s4 3488; GFX6-NEXT: v_mov_b32_e32 v1, s5 3489; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3490; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 3491; GFX6-NEXT: s_waitcnt vmcnt(0) 3492; GFX6-NEXT: buffer_wbinvl1 3493; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 3494; GFX6-NEXT: s_endpgm 3495; 3496; GFX7-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: 3497; GFX7: ; %bb.0: ; %entry 3498; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3499; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 3500; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3501; GFX7-NEXT: s_add_u32 s4, s0, 16 3502; GFX7-NEXT: s_addc_u32 s5, s1, 0 3503; GFX7-NEXT: v_mov_b32_e32 v0, s4 3504; GFX7-NEXT: v_mov_b32_e32 v2, s2 3505; GFX7-NEXT: v_mov_b32_e32 v1, s5 3506; GFX7-NEXT: v_mov_b32_e32 v3, s3 3507; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3508; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 3509; GFX7-NEXT: s_waitcnt vmcnt(0) 3510; GFX7-NEXT: buffer_wbinvl1_vol 3511; GFX7-NEXT: v_mov_b32_e32 v0, s0 3512; GFX7-NEXT: v_mov_b32_e32 v1, s1 3513; GFX7-NEXT: flat_store_dword v[0:1], v2 3514; GFX7-NEXT: s_endpgm 3515; 3516; GFX10-WGP-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: 3517; GFX10-WGP: ; %bb.0: ; %entry 3518; GFX10-WGP-NEXT: s_clause 0x1 3519; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3520; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 3521; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 3522; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3523; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3524; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3525; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3526; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3527; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 3528; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3529; GFX10-WGP-NEXT: buffer_gl0_inv 3530; GFX10-WGP-NEXT: buffer_gl1_inv 3531; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 3532; GFX10-WGP-NEXT: s_endpgm 3533; 3534; GFX10-CU-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: 3535; GFX10-CU: ; %bb.0: ; %entry 3536; GFX10-CU-NEXT: s_clause 0x1 3537; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3538; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 3539; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 3540; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3541; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3542; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3543; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3544; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3545; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 3546; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3547; GFX10-CU-NEXT: buffer_gl0_inv 3548; GFX10-CU-NEXT: buffer_gl1_inv 3549; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 3550; GFX10-CU-NEXT: s_endpgm 3551; 3552; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: 3553; SKIP-CACHE-INV: ; %bb.0: ; %entry 3554; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3555; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 3556; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 3557; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 3558; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3559; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3560; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3561; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3562; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 3563; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3564; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3565; SKIP-CACHE-INV-NEXT: s_endpgm 3566; 3567; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: 3568; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3569; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3570; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3571; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3572; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3573; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3574; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3575; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3576; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3577; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 3578; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3579; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3580; 3581; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: 3582; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3583; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3584; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3585; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3586; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3587; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3588; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3589; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3590; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3591; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 3592; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3593; GFX90A-TGSPLIT-NEXT: s_endpgm 3594 i32 addrspace(1)* %out, i32 %in, i32 %old) { 3595entry: 3596 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 3597 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst 3598 %val0 = extractvalue { i32, i1 } %val, 0 3599 store i32 %val0, i32 addrspace(1)* %out, align 4 3600 ret void 3601} 3602 3603define amdgpu_kernel void @global_agent_one_as_unordered_load( 3604; GFX6-LABEL: global_agent_one_as_unordered_load: 3605; GFX6: ; %bb.0: ; %entry 3606; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 3607; GFX6-NEXT: s_mov_b32 s3, 0x100f000 3608; GFX6-NEXT: s_mov_b32 s2, -1 3609; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3610; GFX6-NEXT: s_mov_b32 s0, s4 3611; GFX6-NEXT: s_mov_b32 s1, s5 3612; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 3613; GFX6-NEXT: s_mov_b32 s4, s6 3614; GFX6-NEXT: s_mov_b32 s5, s7 3615; GFX6-NEXT: s_mov_b32 s6, s2 3616; GFX6-NEXT: s_mov_b32 s7, s3 3617; GFX6-NEXT: s_waitcnt vmcnt(0) 3618; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 3619; GFX6-NEXT: s_endpgm 3620; 3621; GFX7-LABEL: global_agent_one_as_unordered_load: 3622; GFX7: ; %bb.0: ; %entry 3623; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3624; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3625; GFX7-NEXT: v_mov_b32_e32 v0, s0 3626; GFX7-NEXT: v_mov_b32_e32 v1, s1 3627; GFX7-NEXT: flat_load_dword v0, v[0:1] 3628; GFX7-NEXT: v_mov_b32_e32 v2, s2 3629; GFX7-NEXT: v_mov_b32_e32 v3, s3 3630; GFX7-NEXT: s_waitcnt vmcnt(0) 3631; GFX7-NEXT: flat_store_dword v[2:3], v0 3632; GFX7-NEXT: s_endpgm 3633; 3634; GFX10-WGP-LABEL: global_agent_one_as_unordered_load: 3635; GFX10-WGP: ; %bb.0: ; %entry 3636; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3637; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3638; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3639; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] 3640; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3641; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 3642; GFX10-WGP-NEXT: s_endpgm 3643; 3644; GFX10-CU-LABEL: global_agent_one_as_unordered_load: 3645; GFX10-CU: ; %bb.0: ; %entry 3646; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3647; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3648; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3649; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] 3650; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3651; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 3652; GFX10-CU-NEXT: s_endpgm 3653; 3654; SKIP-CACHE-INV-LABEL: global_agent_one_as_unordered_load: 3655; SKIP-CACHE-INV: ; %bb.0: ; %entry 3656; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 3657; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 3658; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 3659; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3660; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 3661; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 3662; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 3663; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 3664; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 3665; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 3666; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 3667; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3668; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3669; SKIP-CACHE-INV-NEXT: s_endpgm 3670; 3671; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_unordered_load: 3672; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3673; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3674; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3675; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3676; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] 3677; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3678; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 3679; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3680; 3681; GFX90A-TGSPLIT-LABEL: global_agent_one_as_unordered_load: 3682; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3683; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3684; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3685; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3686; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] 3687; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3688; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 3689; GFX90A-TGSPLIT-NEXT: s_endpgm 3690 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 3691entry: 3692 %val = load atomic i32, i32 addrspace(1)* %in syncscope("agent-one-as") unordered, align 4 3693 store i32 %val, i32 addrspace(1)* %out 3694 ret void 3695} 3696 3697define amdgpu_kernel void @global_agent_one_as_monotonic_load( 3698; GFX6-LABEL: global_agent_one_as_monotonic_load: 3699; GFX6: ; %bb.0: ; %entry 3700; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 3701; GFX6-NEXT: s_mov_b32 s3, 0x100f000 3702; GFX6-NEXT: s_mov_b32 s2, -1 3703; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3704; GFX6-NEXT: s_mov_b32 s0, s4 3705; GFX6-NEXT: s_mov_b32 s1, s5 3706; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 3707; GFX6-NEXT: s_mov_b32 s4, s6 3708; GFX6-NEXT: s_mov_b32 s5, s7 3709; GFX6-NEXT: s_mov_b32 s6, s2 3710; GFX6-NEXT: s_mov_b32 s7, s3 3711; GFX6-NEXT: s_waitcnt vmcnt(0) 3712; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 3713; GFX6-NEXT: s_endpgm 3714; 3715; GFX7-LABEL: global_agent_one_as_monotonic_load: 3716; GFX7: ; %bb.0: ; %entry 3717; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3718; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3719; GFX7-NEXT: v_mov_b32_e32 v0, s0 3720; GFX7-NEXT: v_mov_b32_e32 v1, s1 3721; GFX7-NEXT: flat_load_dword v0, v[0:1] glc 3722; GFX7-NEXT: v_mov_b32_e32 v2, s2 3723; GFX7-NEXT: v_mov_b32_e32 v3, s3 3724; GFX7-NEXT: s_waitcnt vmcnt(0) 3725; GFX7-NEXT: flat_store_dword v[2:3], v0 3726; GFX7-NEXT: s_endpgm 3727; 3728; GFX10-WGP-LABEL: global_agent_one_as_monotonic_load: 3729; GFX10-WGP: ; %bb.0: ; %entry 3730; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3731; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3732; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3733; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 3734; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3735; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 3736; GFX10-WGP-NEXT: s_endpgm 3737; 3738; GFX10-CU-LABEL: global_agent_one_as_monotonic_load: 3739; GFX10-CU: ; %bb.0: ; %entry 3740; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3741; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3742; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3743; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 3744; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3745; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 3746; GFX10-CU-NEXT: s_endpgm 3747; 3748; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_load: 3749; SKIP-CACHE-INV: ; %bb.0: ; %entry 3750; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 3751; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 3752; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 3753; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3754; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 3755; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 3756; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 3757; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 3758; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 3759; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 3760; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 3761; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3762; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3763; SKIP-CACHE-INV-NEXT: s_endpgm 3764; 3765; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_load: 3766; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3767; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3768; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3769; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3770; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 3771; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3772; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 3773; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3774; 3775; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_load: 3776; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3777; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3778; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3779; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3780; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 3781; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3782; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 3783; GFX90A-TGSPLIT-NEXT: s_endpgm 3784 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 3785entry: 3786 %val = load atomic i32, i32 addrspace(1)* %in syncscope("agent-one-as") monotonic, align 4 3787 store i32 %val, i32 addrspace(1)* %out 3788 ret void 3789} 3790 3791define amdgpu_kernel void @global_agent_one_as_acquire_load( 3792; GFX6-LABEL: global_agent_one_as_acquire_load: 3793; GFX6: ; %bb.0: ; %entry 3794; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 3795; GFX6-NEXT: s_mov_b32 s3, 0x100f000 3796; GFX6-NEXT: s_mov_b32 s2, -1 3797; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3798; GFX6-NEXT: s_mov_b32 s0, s4 3799; GFX6-NEXT: s_mov_b32 s1, s5 3800; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 3801; GFX6-NEXT: s_waitcnt vmcnt(0) 3802; GFX6-NEXT: buffer_wbinvl1 3803; GFX6-NEXT: s_mov_b32 s4, s6 3804; GFX6-NEXT: s_mov_b32 s5, s7 3805; GFX6-NEXT: s_mov_b32 s6, s2 3806; GFX6-NEXT: s_mov_b32 s7, s3 3807; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 3808; GFX6-NEXT: s_endpgm 3809; 3810; GFX7-LABEL: global_agent_one_as_acquire_load: 3811; GFX7: ; %bb.0: ; %entry 3812; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3813; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3814; GFX7-NEXT: v_mov_b32_e32 v0, s0 3815; GFX7-NEXT: v_mov_b32_e32 v1, s1 3816; GFX7-NEXT: flat_load_dword v0, v[0:1] glc 3817; GFX7-NEXT: s_waitcnt vmcnt(0) 3818; GFX7-NEXT: buffer_wbinvl1_vol 3819; GFX7-NEXT: v_mov_b32_e32 v2, s2 3820; GFX7-NEXT: v_mov_b32_e32 v3, s3 3821; GFX7-NEXT: flat_store_dword v[2:3], v0 3822; GFX7-NEXT: s_endpgm 3823; 3824; GFX10-WGP-LABEL: global_agent_one_as_acquire_load: 3825; GFX10-WGP: ; %bb.0: ; %entry 3826; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3827; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3828; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3829; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 3830; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3831; GFX10-WGP-NEXT: buffer_gl0_inv 3832; GFX10-WGP-NEXT: buffer_gl1_inv 3833; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 3834; GFX10-WGP-NEXT: s_endpgm 3835; 3836; GFX10-CU-LABEL: global_agent_one_as_acquire_load: 3837; GFX10-CU: ; %bb.0: ; %entry 3838; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3839; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3840; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3841; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 3842; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3843; GFX10-CU-NEXT: buffer_gl0_inv 3844; GFX10-CU-NEXT: buffer_gl1_inv 3845; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 3846; GFX10-CU-NEXT: s_endpgm 3847; 3848; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_load: 3849; SKIP-CACHE-INV: ; %bb.0: ; %entry 3850; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 3851; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 3852; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 3853; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3854; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 3855; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 3856; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 3857; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3858; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 3859; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 3860; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 3861; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 3862; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3863; SKIP-CACHE-INV-NEXT: s_endpgm 3864; 3865; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_load: 3866; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3867; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3868; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3869; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3870; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 3871; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3872; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 3873; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 3874; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3875; 3876; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_load: 3877; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3878; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3879; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3880; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3881; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 3882; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3883; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 3884; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 3885; GFX90A-TGSPLIT-NEXT: s_endpgm 3886 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 3887entry: 3888 %val = load atomic i32, i32 addrspace(1)* %in syncscope("agent-one-as") acquire, align 4 3889 store i32 %val, i32 addrspace(1)* %out 3890 ret void 3891} 3892 3893define amdgpu_kernel void @global_agent_one_as_seq_cst_load( 3894; GFX6-LABEL: global_agent_one_as_seq_cst_load: 3895; GFX6: ; %bb.0: ; %entry 3896; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 3897; GFX6-NEXT: s_mov_b32 s3, 0x100f000 3898; GFX6-NEXT: s_mov_b32 s2, -1 3899; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3900; GFX6-NEXT: s_mov_b32 s0, s4 3901; GFX6-NEXT: s_mov_b32 s1, s5 3902; GFX6-NEXT: s_waitcnt vmcnt(0) 3903; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 3904; GFX6-NEXT: s_waitcnt vmcnt(0) 3905; GFX6-NEXT: buffer_wbinvl1 3906; GFX6-NEXT: s_mov_b32 s4, s6 3907; GFX6-NEXT: s_mov_b32 s5, s7 3908; GFX6-NEXT: s_mov_b32 s6, s2 3909; GFX6-NEXT: s_mov_b32 s7, s3 3910; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 3911; GFX6-NEXT: s_endpgm 3912; 3913; GFX7-LABEL: global_agent_one_as_seq_cst_load: 3914; GFX7: ; %bb.0: ; %entry 3915; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3916; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3917; GFX7-NEXT: v_mov_b32_e32 v0, s0 3918; GFX7-NEXT: v_mov_b32_e32 v1, s1 3919; GFX7-NEXT: s_waitcnt vmcnt(0) 3920; GFX7-NEXT: flat_load_dword v0, v[0:1] glc 3921; GFX7-NEXT: s_waitcnt vmcnt(0) 3922; GFX7-NEXT: buffer_wbinvl1_vol 3923; GFX7-NEXT: v_mov_b32_e32 v2, s2 3924; GFX7-NEXT: v_mov_b32_e32 v3, s3 3925; GFX7-NEXT: flat_store_dword v[2:3], v0 3926; GFX7-NEXT: s_endpgm 3927; 3928; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_load: 3929; GFX10-WGP: ; %bb.0: ; %entry 3930; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3931; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3932; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3933; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3934; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 3935; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3936; GFX10-WGP-NEXT: buffer_gl0_inv 3937; GFX10-WGP-NEXT: buffer_gl1_inv 3938; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 3939; GFX10-WGP-NEXT: s_endpgm 3940; 3941; GFX10-CU-LABEL: global_agent_one_as_seq_cst_load: 3942; GFX10-CU: ; %bb.0: ; %entry 3943; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3944; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3945; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3946; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3947; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 3948; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3949; GFX10-CU-NEXT: buffer_gl0_inv 3950; GFX10-CU-NEXT: buffer_gl1_inv 3951; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 3952; GFX10-CU-NEXT: s_endpgm 3953; 3954; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_load: 3955; SKIP-CACHE-INV: ; %bb.0: ; %entry 3956; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 3957; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 3958; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 3959; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3960; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 3961; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 3962; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3963; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 3964; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3965; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 3966; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 3967; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 3968; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 3969; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3970; SKIP-CACHE-INV-NEXT: s_endpgm 3971; 3972; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_load: 3973; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3974; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3975; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3976; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3977; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 3978; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3979; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 3980; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 3981; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3982; 3983; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_load: 3984; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3985; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3986; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3987; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3988; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 3989; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3990; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 3991; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 3992; GFX90A-TGSPLIT-NEXT: s_endpgm 3993 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 3994entry: 3995 %val = load atomic i32, i32 addrspace(1)* %in syncscope("agent-one-as") seq_cst, align 4 3996 store i32 %val, i32 addrspace(1)* %out 3997 ret void 3998} 3999 4000define amdgpu_kernel void @global_agent_one_as_unordered_store( 4001; GFX6-LABEL: global_agent_one_as_unordered_store: 4002; GFX6: ; %bb.0: ; %entry 4003; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 4004; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 4005; GFX6-NEXT: s_mov_b32 s3, 0x100f000 4006; GFX6-NEXT: s_mov_b32 s2, -1 4007; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4008; GFX6-NEXT: v_mov_b32_e32 v0, s6 4009; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 4010; GFX6-NEXT: s_endpgm 4011; 4012; GFX7-LABEL: global_agent_one_as_unordered_store: 4013; GFX7: ; %bb.0: ; %entry 4014; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 4015; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 4016; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4017; GFX7-NEXT: v_mov_b32_e32 v2, s2 4018; GFX7-NEXT: v_mov_b32_e32 v0, s0 4019; GFX7-NEXT: v_mov_b32_e32 v1, s1 4020; GFX7-NEXT: flat_store_dword v[0:1], v2 4021; GFX7-NEXT: s_endpgm 4022; 4023; GFX10-WGP-LABEL: global_agent_one_as_unordered_store: 4024; GFX10-WGP: ; %bb.0: ; %entry 4025; GFX10-WGP-NEXT: s_clause 0x1 4026; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 4027; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4028; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4029; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4030; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4031; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 4032; GFX10-WGP-NEXT: s_endpgm 4033; 4034; GFX10-CU-LABEL: global_agent_one_as_unordered_store: 4035; GFX10-CU: ; %bb.0: ; %entry 4036; GFX10-CU-NEXT: s_clause 0x1 4037; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 4038; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4039; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4040; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4041; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4042; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 4043; GFX10-CU-NEXT: s_endpgm 4044; 4045; SKIP-CACHE-INV-LABEL: global_agent_one_as_unordered_store: 4046; SKIP-CACHE-INV: ; %bb.0: ; %entry 4047; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 4048; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4049; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 4050; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 4051; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4052; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 4053; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 4054; SKIP-CACHE-INV-NEXT: s_endpgm 4055; 4056; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_unordered_store: 4057; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4058; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 4059; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4060; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4061; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4062; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4063; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 4064; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4065; 4066; GFX90A-TGSPLIT-LABEL: global_agent_one_as_unordered_store: 4067; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4068; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 4069; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4070; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4071; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4072; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4073; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 4074; GFX90A-TGSPLIT-NEXT: s_endpgm 4075 i32 %in, i32 addrspace(1)* %out) { 4076entry: 4077 store atomic i32 %in, i32 addrspace(1)* %out syncscope("agent-one-as") unordered, align 4 4078 ret void 4079} 4080 4081define amdgpu_kernel void @global_agent_one_as_monotonic_store( 4082; GFX6-LABEL: global_agent_one_as_monotonic_store: 4083; GFX6: ; %bb.0: ; %entry 4084; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 4085; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 4086; GFX6-NEXT: s_mov_b32 s3, 0x100f000 4087; GFX6-NEXT: s_mov_b32 s2, -1 4088; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4089; GFX6-NEXT: v_mov_b32_e32 v0, s6 4090; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 4091; GFX6-NEXT: s_endpgm 4092; 4093; GFX7-LABEL: global_agent_one_as_monotonic_store: 4094; GFX7: ; %bb.0: ; %entry 4095; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 4096; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 4097; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4098; GFX7-NEXT: v_mov_b32_e32 v2, s2 4099; GFX7-NEXT: v_mov_b32_e32 v0, s0 4100; GFX7-NEXT: v_mov_b32_e32 v1, s1 4101; GFX7-NEXT: flat_store_dword v[0:1], v2 4102; GFX7-NEXT: s_endpgm 4103; 4104; GFX10-WGP-LABEL: global_agent_one_as_monotonic_store: 4105; GFX10-WGP: ; %bb.0: ; %entry 4106; GFX10-WGP-NEXT: s_clause 0x1 4107; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 4108; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4109; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4110; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4111; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4112; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 4113; GFX10-WGP-NEXT: s_endpgm 4114; 4115; GFX10-CU-LABEL: global_agent_one_as_monotonic_store: 4116; GFX10-CU: ; %bb.0: ; %entry 4117; GFX10-CU-NEXT: s_clause 0x1 4118; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 4119; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4120; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4121; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4122; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4123; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 4124; GFX10-CU-NEXT: s_endpgm 4125; 4126; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_store: 4127; SKIP-CACHE-INV: ; %bb.0: ; %entry 4128; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 4129; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4130; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 4131; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 4132; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4133; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 4134; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 4135; SKIP-CACHE-INV-NEXT: s_endpgm 4136; 4137; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_store: 4138; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4139; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 4140; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4141; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4142; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4143; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4144; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 4145; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4146; 4147; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_store: 4148; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4149; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 4150; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4151; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4152; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4153; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4154; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 4155; GFX90A-TGSPLIT-NEXT: s_endpgm 4156 i32 %in, i32 addrspace(1)* %out) { 4157entry: 4158 store atomic i32 %in, i32 addrspace(1)* %out syncscope("agent-one-as") monotonic, align 4 4159 ret void 4160} 4161 4162define amdgpu_kernel void @global_agent_one_as_release_store( 4163; GFX6-LABEL: global_agent_one_as_release_store: 4164; GFX6: ; %bb.0: ; %entry 4165; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 4166; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 4167; GFX6-NEXT: s_mov_b32 s3, 0x100f000 4168; GFX6-NEXT: s_mov_b32 s2, -1 4169; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4170; GFX6-NEXT: v_mov_b32_e32 v0, s6 4171; GFX6-NEXT: s_waitcnt vmcnt(0) 4172; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 4173; GFX6-NEXT: s_endpgm 4174; 4175; GFX7-LABEL: global_agent_one_as_release_store: 4176; GFX7: ; %bb.0: ; %entry 4177; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 4178; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 4179; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4180; GFX7-NEXT: v_mov_b32_e32 v2, s2 4181; GFX7-NEXT: v_mov_b32_e32 v0, s0 4182; GFX7-NEXT: v_mov_b32_e32 v1, s1 4183; GFX7-NEXT: s_waitcnt vmcnt(0) 4184; GFX7-NEXT: flat_store_dword v[0:1], v2 4185; GFX7-NEXT: s_endpgm 4186; 4187; GFX10-WGP-LABEL: global_agent_one_as_release_store: 4188; GFX10-WGP: ; %bb.0: ; %entry 4189; GFX10-WGP-NEXT: s_clause 0x1 4190; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 4191; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4192; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4193; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4194; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4195; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4196; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4197; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 4198; GFX10-WGP-NEXT: s_endpgm 4199; 4200; GFX10-CU-LABEL: global_agent_one_as_release_store: 4201; GFX10-CU: ; %bb.0: ; %entry 4202; GFX10-CU-NEXT: s_clause 0x1 4203; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 4204; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4205; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4206; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4207; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4208; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4209; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4210; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 4211; GFX10-CU-NEXT: s_endpgm 4212; 4213; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_store: 4214; SKIP-CACHE-INV: ; %bb.0: ; %entry 4215; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 4216; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4217; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 4218; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 4219; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4220; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 4221; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4222; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 4223; SKIP-CACHE-INV-NEXT: s_endpgm 4224; 4225; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_release_store: 4226; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4227; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 4228; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4229; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4230; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4231; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4232; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4233; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 4234; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4235; 4236; GFX90A-TGSPLIT-LABEL: global_agent_one_as_release_store: 4237; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4238; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 4239; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4240; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4241; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4242; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4243; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4244; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 4245; GFX90A-TGSPLIT-NEXT: s_endpgm 4246 i32 %in, i32 addrspace(1)* %out) { 4247entry: 4248 store atomic i32 %in, i32 addrspace(1)* %out syncscope("agent-one-as") release, align 4 4249 ret void 4250} 4251 4252define amdgpu_kernel void @global_agent_one_as_seq_cst_store( 4253; GFX6-LABEL: global_agent_one_as_seq_cst_store: 4254; GFX6: ; %bb.0: ; %entry 4255; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 4256; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 4257; GFX6-NEXT: s_mov_b32 s3, 0x100f000 4258; GFX6-NEXT: s_mov_b32 s2, -1 4259; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4260; GFX6-NEXT: v_mov_b32_e32 v0, s6 4261; GFX6-NEXT: s_waitcnt vmcnt(0) 4262; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 4263; GFX6-NEXT: s_endpgm 4264; 4265; GFX7-LABEL: global_agent_one_as_seq_cst_store: 4266; GFX7: ; %bb.0: ; %entry 4267; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 4268; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 4269; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4270; GFX7-NEXT: v_mov_b32_e32 v2, s2 4271; GFX7-NEXT: v_mov_b32_e32 v0, s0 4272; GFX7-NEXT: v_mov_b32_e32 v1, s1 4273; GFX7-NEXT: s_waitcnt vmcnt(0) 4274; GFX7-NEXT: flat_store_dword v[0:1], v2 4275; GFX7-NEXT: s_endpgm 4276; 4277; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_store: 4278; GFX10-WGP: ; %bb.0: ; %entry 4279; GFX10-WGP-NEXT: s_clause 0x1 4280; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 4281; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4282; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4283; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4284; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4285; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4286; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4287; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 4288; GFX10-WGP-NEXT: s_endpgm 4289; 4290; GFX10-CU-LABEL: global_agent_one_as_seq_cst_store: 4291; GFX10-CU: ; %bb.0: ; %entry 4292; GFX10-CU-NEXT: s_clause 0x1 4293; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 4294; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4295; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4296; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4297; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4298; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4299; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4300; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 4301; GFX10-CU-NEXT: s_endpgm 4302; 4303; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_store: 4304; SKIP-CACHE-INV: ; %bb.0: ; %entry 4305; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 4306; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4307; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 4308; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 4309; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4310; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 4311; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4312; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 4313; SKIP-CACHE-INV-NEXT: s_endpgm 4314; 4315; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_store: 4316; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4317; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 4318; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4319; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4320; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4321; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4322; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4323; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 4324; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4325; 4326; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_store: 4327; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4328; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 4329; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4330; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4331; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4332; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4333; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4334; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 4335; GFX90A-TGSPLIT-NEXT: s_endpgm 4336 i32 %in, i32 addrspace(1)* %out) { 4337entry: 4338 store atomic i32 %in, i32 addrspace(1)* %out syncscope("agent-one-as") seq_cst, align 4 4339 ret void 4340} 4341 4342define amdgpu_kernel void @global_agent_one_as_monotonic_atomicrmw( 4343; GFX6-LABEL: global_agent_one_as_monotonic_atomicrmw: 4344; GFX6: ; %bb.0: ; %entry 4345; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4346; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 4347; GFX6-NEXT: s_mov_b32 s3, 0x100f000 4348; GFX6-NEXT: s_mov_b32 s2, -1 4349; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4350; GFX6-NEXT: v_mov_b32_e32 v0, s4 4351; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 4352; GFX6-NEXT: s_endpgm 4353; 4354; GFX7-LABEL: global_agent_one_as_monotonic_atomicrmw: 4355; GFX7: ; %bb.0: ; %entry 4356; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4357; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 4358; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4359; GFX7-NEXT: v_mov_b32_e32 v0, s0 4360; GFX7-NEXT: v_mov_b32_e32 v1, s1 4361; GFX7-NEXT: v_mov_b32_e32 v2, s2 4362; GFX7-NEXT: flat_atomic_swap v[0:1], v2 4363; GFX7-NEXT: s_endpgm 4364; 4365; GFX10-WGP-LABEL: global_agent_one_as_monotonic_atomicrmw: 4366; GFX10-WGP: ; %bb.0: ; %entry 4367; GFX10-WGP-NEXT: s_clause 0x1 4368; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 4369; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4370; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4371; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4372; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4373; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 4374; GFX10-WGP-NEXT: s_endpgm 4375; 4376; GFX10-CU-LABEL: global_agent_one_as_monotonic_atomicrmw: 4377; GFX10-CU: ; %bb.0: ; %entry 4378; GFX10-CU-NEXT: s_clause 0x1 4379; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 4380; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4381; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4382; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4383; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4384; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 4385; GFX10-CU-NEXT: s_endpgm 4386; 4387; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_atomicrmw: 4388; SKIP-CACHE-INV: ; %bb.0: ; %entry 4389; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4390; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4391; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4392; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4393; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4394; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4395; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 4396; SKIP-CACHE-INV-NEXT: s_endpgm 4397; 4398; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_atomicrmw: 4399; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4400; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4401; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4402; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4403; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4404; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4405; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 4406; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4407; 4408; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_atomicrmw: 4409; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4410; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4411; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4412; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4413; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4414; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4415; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 4416; GFX90A-TGSPLIT-NEXT: s_endpgm 4417 i32 addrspace(1)* %out, i32 %in) { 4418entry: 4419 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent-one-as") monotonic 4420 ret void 4421} 4422 4423define amdgpu_kernel void @global_agent_one_as_acquire_atomicrmw( 4424; GFX6-LABEL: global_agent_one_as_acquire_atomicrmw: 4425; GFX6: ; %bb.0: ; %entry 4426; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4427; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 4428; GFX6-NEXT: s_mov_b32 s3, 0x100f000 4429; GFX6-NEXT: s_mov_b32 s2, -1 4430; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4431; GFX6-NEXT: v_mov_b32_e32 v0, s4 4432; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 4433; GFX6-NEXT: s_waitcnt vmcnt(0) 4434; GFX6-NEXT: buffer_wbinvl1 4435; GFX6-NEXT: s_endpgm 4436; 4437; GFX7-LABEL: global_agent_one_as_acquire_atomicrmw: 4438; GFX7: ; %bb.0: ; %entry 4439; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4440; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 4441; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4442; GFX7-NEXT: v_mov_b32_e32 v0, s0 4443; GFX7-NEXT: v_mov_b32_e32 v1, s1 4444; GFX7-NEXT: v_mov_b32_e32 v2, s2 4445; GFX7-NEXT: flat_atomic_swap v[0:1], v2 4446; GFX7-NEXT: s_waitcnt vmcnt(0) 4447; GFX7-NEXT: buffer_wbinvl1_vol 4448; GFX7-NEXT: s_endpgm 4449; 4450; GFX10-WGP-LABEL: global_agent_one_as_acquire_atomicrmw: 4451; GFX10-WGP: ; %bb.0: ; %entry 4452; GFX10-WGP-NEXT: s_clause 0x1 4453; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 4454; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4455; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4456; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4457; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4458; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 4459; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4460; GFX10-WGP-NEXT: buffer_gl0_inv 4461; GFX10-WGP-NEXT: buffer_gl1_inv 4462; GFX10-WGP-NEXT: s_endpgm 4463; 4464; GFX10-CU-LABEL: global_agent_one_as_acquire_atomicrmw: 4465; GFX10-CU: ; %bb.0: ; %entry 4466; GFX10-CU-NEXT: s_clause 0x1 4467; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 4468; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4469; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4470; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4471; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4472; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 4473; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4474; GFX10-CU-NEXT: buffer_gl0_inv 4475; GFX10-CU-NEXT: buffer_gl1_inv 4476; GFX10-CU-NEXT: s_endpgm 4477; 4478; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_atomicrmw: 4479; SKIP-CACHE-INV: ; %bb.0: ; %entry 4480; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4481; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4482; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4483; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4484; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4485; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4486; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 4487; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4488; SKIP-CACHE-INV-NEXT: s_endpgm 4489; 4490; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_atomicrmw: 4491; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4492; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4493; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4494; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4495; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4496; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4497; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 4498; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4499; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 4500; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4501; 4502; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_atomicrmw: 4503; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4504; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4505; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4506; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4507; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4508; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4509; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 4510; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4511; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 4512; GFX90A-TGSPLIT-NEXT: s_endpgm 4513 i32 addrspace(1)* %out, i32 %in) { 4514entry: 4515 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent-one-as") acquire 4516 ret void 4517} 4518 4519define amdgpu_kernel void @global_agent_one_as_release_atomicrmw( 4520; GFX6-LABEL: global_agent_one_as_release_atomicrmw: 4521; GFX6: ; %bb.0: ; %entry 4522; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4523; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 4524; GFX6-NEXT: s_mov_b32 s3, 0x100f000 4525; GFX6-NEXT: s_mov_b32 s2, -1 4526; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4527; GFX6-NEXT: v_mov_b32_e32 v0, s4 4528; GFX6-NEXT: s_waitcnt vmcnt(0) 4529; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 4530; GFX6-NEXT: s_endpgm 4531; 4532; GFX7-LABEL: global_agent_one_as_release_atomicrmw: 4533; GFX7: ; %bb.0: ; %entry 4534; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4535; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 4536; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4537; GFX7-NEXT: v_mov_b32_e32 v0, s0 4538; GFX7-NEXT: v_mov_b32_e32 v1, s1 4539; GFX7-NEXT: v_mov_b32_e32 v2, s2 4540; GFX7-NEXT: s_waitcnt vmcnt(0) 4541; GFX7-NEXT: flat_atomic_swap v[0:1], v2 4542; GFX7-NEXT: s_endpgm 4543; 4544; GFX10-WGP-LABEL: global_agent_one_as_release_atomicrmw: 4545; GFX10-WGP: ; %bb.0: ; %entry 4546; GFX10-WGP-NEXT: s_clause 0x1 4547; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 4548; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4549; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4550; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4551; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4552; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4553; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4554; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 4555; GFX10-WGP-NEXT: s_endpgm 4556; 4557; GFX10-CU-LABEL: global_agent_one_as_release_atomicrmw: 4558; GFX10-CU: ; %bb.0: ; %entry 4559; GFX10-CU-NEXT: s_clause 0x1 4560; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 4561; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4562; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4563; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4564; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4565; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4566; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4567; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 4568; GFX10-CU-NEXT: s_endpgm 4569; 4570; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_atomicrmw: 4571; SKIP-CACHE-INV: ; %bb.0: ; %entry 4572; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4573; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4574; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4575; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4576; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4577; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4578; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4579; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 4580; SKIP-CACHE-INV-NEXT: s_endpgm 4581; 4582; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_release_atomicrmw: 4583; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4584; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4585; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4586; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4587; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4588; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4589; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4590; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 4591; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4592; 4593; GFX90A-TGSPLIT-LABEL: global_agent_one_as_release_atomicrmw: 4594; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4595; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4596; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4597; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4598; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4599; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4600; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4601; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 4602; GFX90A-TGSPLIT-NEXT: s_endpgm 4603 i32 addrspace(1)* %out, i32 %in) { 4604entry: 4605 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent-one-as") release 4606 ret void 4607} 4608 4609define amdgpu_kernel void @global_agent_one_as_acq_rel_atomicrmw( 4610; GFX6-LABEL: global_agent_one_as_acq_rel_atomicrmw: 4611; GFX6: ; %bb.0: ; %entry 4612; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4613; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 4614; GFX6-NEXT: s_mov_b32 s3, 0x100f000 4615; GFX6-NEXT: s_mov_b32 s2, -1 4616; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4617; GFX6-NEXT: v_mov_b32_e32 v0, s4 4618; GFX6-NEXT: s_waitcnt vmcnt(0) 4619; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 4620; GFX6-NEXT: s_waitcnt vmcnt(0) 4621; GFX6-NEXT: buffer_wbinvl1 4622; GFX6-NEXT: s_endpgm 4623; 4624; GFX7-LABEL: global_agent_one_as_acq_rel_atomicrmw: 4625; GFX7: ; %bb.0: ; %entry 4626; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4627; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 4628; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4629; GFX7-NEXT: v_mov_b32_e32 v0, s0 4630; GFX7-NEXT: v_mov_b32_e32 v1, s1 4631; GFX7-NEXT: v_mov_b32_e32 v2, s2 4632; GFX7-NEXT: s_waitcnt vmcnt(0) 4633; GFX7-NEXT: flat_atomic_swap v[0:1], v2 4634; GFX7-NEXT: s_waitcnt vmcnt(0) 4635; GFX7-NEXT: buffer_wbinvl1_vol 4636; GFX7-NEXT: s_endpgm 4637; 4638; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_atomicrmw: 4639; GFX10-WGP: ; %bb.0: ; %entry 4640; GFX10-WGP-NEXT: s_clause 0x1 4641; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 4642; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4643; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4644; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4645; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4646; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4647; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4648; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 4649; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4650; GFX10-WGP-NEXT: buffer_gl0_inv 4651; GFX10-WGP-NEXT: buffer_gl1_inv 4652; GFX10-WGP-NEXT: s_endpgm 4653; 4654; GFX10-CU-LABEL: global_agent_one_as_acq_rel_atomicrmw: 4655; GFX10-CU: ; %bb.0: ; %entry 4656; GFX10-CU-NEXT: s_clause 0x1 4657; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 4658; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4659; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4660; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4661; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4662; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4663; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4664; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 4665; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4666; GFX10-CU-NEXT: buffer_gl0_inv 4667; GFX10-CU-NEXT: buffer_gl1_inv 4668; GFX10-CU-NEXT: s_endpgm 4669; 4670; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_atomicrmw: 4671; SKIP-CACHE-INV: ; %bb.0: ; %entry 4672; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4673; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4674; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4675; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4676; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4677; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4678; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4679; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 4680; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4681; SKIP-CACHE-INV-NEXT: s_endpgm 4682; 4683; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_atomicrmw: 4684; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4685; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4686; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4687; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4688; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4689; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4690; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4691; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 4692; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4693; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 4694; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4695; 4696; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_atomicrmw: 4697; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4698; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4699; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4700; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4701; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4702; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4703; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4704; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 4705; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4706; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 4707; GFX90A-TGSPLIT-NEXT: s_endpgm 4708 i32 addrspace(1)* %out, i32 %in) { 4709entry: 4710 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent-one-as") acq_rel 4711 ret void 4712} 4713 4714define amdgpu_kernel void @global_agent_one_as_seq_cst_atomicrmw( 4715; GFX6-LABEL: global_agent_one_as_seq_cst_atomicrmw: 4716; GFX6: ; %bb.0: ; %entry 4717; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4718; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 4719; GFX6-NEXT: s_mov_b32 s3, 0x100f000 4720; GFX6-NEXT: s_mov_b32 s2, -1 4721; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4722; GFX6-NEXT: v_mov_b32_e32 v0, s4 4723; GFX6-NEXT: s_waitcnt vmcnt(0) 4724; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 4725; GFX6-NEXT: s_waitcnt vmcnt(0) 4726; GFX6-NEXT: buffer_wbinvl1 4727; GFX6-NEXT: s_endpgm 4728; 4729; GFX7-LABEL: global_agent_one_as_seq_cst_atomicrmw: 4730; GFX7: ; %bb.0: ; %entry 4731; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4732; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 4733; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4734; GFX7-NEXT: v_mov_b32_e32 v0, s0 4735; GFX7-NEXT: v_mov_b32_e32 v1, s1 4736; GFX7-NEXT: v_mov_b32_e32 v2, s2 4737; GFX7-NEXT: s_waitcnt vmcnt(0) 4738; GFX7-NEXT: flat_atomic_swap v[0:1], v2 4739; GFX7-NEXT: s_waitcnt vmcnt(0) 4740; GFX7-NEXT: buffer_wbinvl1_vol 4741; GFX7-NEXT: s_endpgm 4742; 4743; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_atomicrmw: 4744; GFX10-WGP: ; %bb.0: ; %entry 4745; GFX10-WGP-NEXT: s_clause 0x1 4746; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 4747; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4748; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4749; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4750; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4751; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4752; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4753; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 4754; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4755; GFX10-WGP-NEXT: buffer_gl0_inv 4756; GFX10-WGP-NEXT: buffer_gl1_inv 4757; GFX10-WGP-NEXT: s_endpgm 4758; 4759; GFX10-CU-LABEL: global_agent_one_as_seq_cst_atomicrmw: 4760; GFX10-CU: ; %bb.0: ; %entry 4761; GFX10-CU-NEXT: s_clause 0x1 4762; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 4763; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4764; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4765; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4766; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4767; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4768; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4769; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 4770; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4771; GFX10-CU-NEXT: buffer_gl0_inv 4772; GFX10-CU-NEXT: buffer_gl1_inv 4773; GFX10-CU-NEXT: s_endpgm 4774; 4775; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_atomicrmw: 4776; SKIP-CACHE-INV: ; %bb.0: ; %entry 4777; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4778; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4779; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4780; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4781; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4782; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4783; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4784; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 4785; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4786; SKIP-CACHE-INV-NEXT: s_endpgm 4787; 4788; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_atomicrmw: 4789; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4790; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4791; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4792; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4793; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4794; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4795; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4796; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 4797; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4798; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 4799; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4800; 4801; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_atomicrmw: 4802; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4803; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4804; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4805; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4806; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4807; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4808; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4809; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 4810; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4811; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 4812; GFX90A-TGSPLIT-NEXT: s_endpgm 4813 i32 addrspace(1)* %out, i32 %in) { 4814entry: 4815 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent-one-as") seq_cst 4816 ret void 4817} 4818 4819define amdgpu_kernel void @global_agent_one_as_acquire_ret_atomicrmw( 4820; GFX6-LABEL: global_agent_one_as_acquire_ret_atomicrmw: 4821; GFX6: ; %bb.0: ; %entry 4822; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4823; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 4824; GFX6-NEXT: s_mov_b32 s3, 0x100f000 4825; GFX6-NEXT: s_mov_b32 s2, -1 4826; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4827; GFX6-NEXT: v_mov_b32_e32 v0, s4 4828; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 4829; GFX6-NEXT: s_waitcnt vmcnt(0) 4830; GFX6-NEXT: buffer_wbinvl1 4831; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 4832; GFX6-NEXT: s_endpgm 4833; 4834; GFX7-LABEL: global_agent_one_as_acquire_ret_atomicrmw: 4835; GFX7: ; %bb.0: ; %entry 4836; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4837; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 4838; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4839; GFX7-NEXT: v_mov_b32_e32 v0, s0 4840; GFX7-NEXT: v_mov_b32_e32 v1, s1 4841; GFX7-NEXT: v_mov_b32_e32 v2, s2 4842; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 4843; GFX7-NEXT: s_waitcnt vmcnt(0) 4844; GFX7-NEXT: buffer_wbinvl1_vol 4845; GFX7-NEXT: flat_store_dword v[0:1], v2 4846; GFX7-NEXT: s_endpgm 4847; 4848; GFX10-WGP-LABEL: global_agent_one_as_acquire_ret_atomicrmw: 4849; GFX10-WGP: ; %bb.0: ; %entry 4850; GFX10-WGP-NEXT: s_clause 0x1 4851; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 4852; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4853; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4854; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4855; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4856; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 4857; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4858; GFX10-WGP-NEXT: buffer_gl0_inv 4859; GFX10-WGP-NEXT: buffer_gl1_inv 4860; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 4861; GFX10-WGP-NEXT: s_endpgm 4862; 4863; GFX10-CU-LABEL: global_agent_one_as_acquire_ret_atomicrmw: 4864; GFX10-CU: ; %bb.0: ; %entry 4865; GFX10-CU-NEXT: s_clause 0x1 4866; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 4867; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4868; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4869; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4870; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4871; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 4872; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4873; GFX10-CU-NEXT: buffer_gl0_inv 4874; GFX10-CU-NEXT: buffer_gl1_inv 4875; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 4876; GFX10-CU-NEXT: s_endpgm 4877; 4878; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_ret_atomicrmw: 4879; SKIP-CACHE-INV: ; %bb.0: ; %entry 4880; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4881; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4882; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4883; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4884; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4885; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4886; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 4887; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4888; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 4889; SKIP-CACHE-INV-NEXT: s_endpgm 4890; 4891; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_ret_atomicrmw: 4892; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4893; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4894; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4895; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4896; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4897; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4898; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 4899; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4900; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 4901; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 4902; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4903; 4904; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_ret_atomicrmw: 4905; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4906; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4907; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4908; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4909; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4910; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4911; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 4912; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4913; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 4914; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 4915; GFX90A-TGSPLIT-NEXT: s_endpgm 4916 i32 addrspace(1)* %out, i32 %in) { 4917entry: 4918 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent-one-as") acquire 4919 store i32 %val, i32 addrspace(1)* %out, align 4 4920 ret void 4921} 4922 4923define amdgpu_kernel void @global_agent_one_as_acq_rel_ret_atomicrmw( 4924; GFX6-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: 4925; GFX6: ; %bb.0: ; %entry 4926; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4927; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 4928; GFX6-NEXT: s_mov_b32 s3, 0x100f000 4929; GFX6-NEXT: s_mov_b32 s2, -1 4930; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4931; GFX6-NEXT: v_mov_b32_e32 v0, s4 4932; GFX6-NEXT: s_waitcnt vmcnt(0) 4933; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 4934; GFX6-NEXT: s_waitcnt vmcnt(0) 4935; GFX6-NEXT: buffer_wbinvl1 4936; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 4937; GFX6-NEXT: s_endpgm 4938; 4939; GFX7-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: 4940; GFX7: ; %bb.0: ; %entry 4941; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4942; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 4943; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4944; GFX7-NEXT: v_mov_b32_e32 v0, s0 4945; GFX7-NEXT: v_mov_b32_e32 v1, s1 4946; GFX7-NEXT: v_mov_b32_e32 v2, s2 4947; GFX7-NEXT: s_waitcnt vmcnt(0) 4948; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 4949; GFX7-NEXT: s_waitcnt vmcnt(0) 4950; GFX7-NEXT: buffer_wbinvl1_vol 4951; GFX7-NEXT: flat_store_dword v[0:1], v2 4952; GFX7-NEXT: s_endpgm 4953; 4954; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: 4955; GFX10-WGP: ; %bb.0: ; %entry 4956; GFX10-WGP-NEXT: s_clause 0x1 4957; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 4958; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4959; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4960; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4961; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4962; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4963; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4964; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 4965; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4966; GFX10-WGP-NEXT: buffer_gl0_inv 4967; GFX10-WGP-NEXT: buffer_gl1_inv 4968; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 4969; GFX10-WGP-NEXT: s_endpgm 4970; 4971; GFX10-CU-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: 4972; GFX10-CU: ; %bb.0: ; %entry 4973; GFX10-CU-NEXT: s_clause 0x1 4974; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 4975; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4976; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4977; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4978; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4979; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4980; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4981; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 4982; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4983; GFX10-CU-NEXT: buffer_gl0_inv 4984; GFX10-CU-NEXT: buffer_gl1_inv 4985; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 4986; GFX10-CU-NEXT: s_endpgm 4987; 4988; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: 4989; SKIP-CACHE-INV: ; %bb.0: ; %entry 4990; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4991; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4992; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4993; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4994; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4995; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4996; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4997; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 4998; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4999; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 5000; SKIP-CACHE-INV-NEXT: s_endpgm 5001; 5002; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: 5003; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5004; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5005; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 5006; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5007; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5008; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5009; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5010; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 5011; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5012; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5013; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 5014; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5015; 5016; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: 5017; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5018; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5019; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 5020; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5021; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5022; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5023; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5024; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 5025; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5026; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5027; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 5028; GFX90A-TGSPLIT-NEXT: s_endpgm 5029 i32 addrspace(1)* %out, i32 %in) { 5030entry: 5031 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent-one-as") acq_rel 5032 store i32 %val, i32 addrspace(1)* %out, align 4 5033 ret void 5034} 5035 5036define amdgpu_kernel void @global_agent_one_as_seq_cst_ret_atomicrmw( 5037; GFX6-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: 5038; GFX6: ; %bb.0: ; %entry 5039; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5040; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 5041; GFX6-NEXT: s_mov_b32 s3, 0x100f000 5042; GFX6-NEXT: s_mov_b32 s2, -1 5043; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5044; GFX6-NEXT: v_mov_b32_e32 v0, s4 5045; GFX6-NEXT: s_waitcnt vmcnt(0) 5046; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 5047; GFX6-NEXT: s_waitcnt vmcnt(0) 5048; GFX6-NEXT: buffer_wbinvl1 5049; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 5050; GFX6-NEXT: s_endpgm 5051; 5052; GFX7-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: 5053; GFX7: ; %bb.0: ; %entry 5054; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5055; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 5056; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5057; GFX7-NEXT: v_mov_b32_e32 v0, s0 5058; GFX7-NEXT: v_mov_b32_e32 v1, s1 5059; GFX7-NEXT: v_mov_b32_e32 v2, s2 5060; GFX7-NEXT: s_waitcnt vmcnt(0) 5061; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 5062; GFX7-NEXT: s_waitcnt vmcnt(0) 5063; GFX7-NEXT: buffer_wbinvl1_vol 5064; GFX7-NEXT: flat_store_dword v[0:1], v2 5065; GFX7-NEXT: s_endpgm 5066; 5067; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: 5068; GFX10-WGP: ; %bb.0: ; %entry 5069; GFX10-WGP-NEXT: s_clause 0x1 5070; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 5071; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5072; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 5073; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5074; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 5075; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5076; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5077; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 5078; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5079; GFX10-WGP-NEXT: buffer_gl0_inv 5080; GFX10-WGP-NEXT: buffer_gl1_inv 5081; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 5082; GFX10-WGP-NEXT: s_endpgm 5083; 5084; GFX10-CU-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: 5085; GFX10-CU: ; %bb.0: ; %entry 5086; GFX10-CU-NEXT: s_clause 0x1 5087; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 5088; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5089; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 5090; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5091; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 5092; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5093; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5094; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 5095; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5096; GFX10-CU-NEXT: buffer_gl0_inv 5097; GFX10-CU-NEXT: buffer_gl1_inv 5098; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 5099; GFX10-CU-NEXT: s_endpgm 5100; 5101; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: 5102; SKIP-CACHE-INV: ; %bb.0: ; %entry 5103; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5104; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 5105; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5106; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5107; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5108; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5109; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5110; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 5111; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5112; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 5113; SKIP-CACHE-INV-NEXT: s_endpgm 5114; 5115; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: 5116; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5117; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5118; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 5119; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5120; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5121; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5122; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5123; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 5124; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5125; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5126; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 5127; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5128; 5129; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: 5130; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5131; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5132; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 5133; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5134; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5135; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5136; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5137; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 5138; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5139; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5140; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 5141; GFX90A-TGSPLIT-NEXT: s_endpgm 5142 i32 addrspace(1)* %out, i32 %in) { 5143entry: 5144 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("agent-one-as") seq_cst 5145 store i32 %val, i32 addrspace(1)* %out, align 4 5146 ret void 5147} 5148 5149define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_cmpxchg( 5150; GFX6-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: 5151; GFX6: ; %bb.0: ; %entry 5152; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5153; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 5154; GFX6-NEXT: s_mov_b32 s3, 0x100f000 5155; GFX6-NEXT: s_mov_b32 s2, -1 5156; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5157; GFX6-NEXT: v_mov_b32_e32 v0, s4 5158; GFX6-NEXT: v_mov_b32_e32 v1, s5 5159; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5160; GFX6-NEXT: s_endpgm 5161; 5162; GFX7-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: 5163; GFX7: ; %bb.0: ; %entry 5164; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5165; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 5166; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5167; GFX7-NEXT: s_add_u32 s0, s0, 16 5168; GFX7-NEXT: s_addc_u32 s1, s1, 0 5169; GFX7-NEXT: v_mov_b32_e32 v0, s0 5170; GFX7-NEXT: v_mov_b32_e32 v2, s2 5171; GFX7-NEXT: v_mov_b32_e32 v1, s1 5172; GFX7-NEXT: v_mov_b32_e32 v3, s3 5173; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5174; GFX7-NEXT: s_endpgm 5175; 5176; GFX10-WGP-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: 5177; GFX10-WGP: ; %bb.0: ; %entry 5178; GFX10-WGP-NEXT: s_clause 0x1 5179; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5180; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5181; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 5182; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5183; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5184; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 5185; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5186; GFX10-WGP-NEXT: s_endpgm 5187; 5188; GFX10-CU-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: 5189; GFX10-CU: ; %bb.0: ; %entry 5190; GFX10-CU-NEXT: s_clause 0x1 5191; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5192; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5193; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 5194; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5195; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5196; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 5197; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5198; GFX10-CU-NEXT: s_endpgm 5199; 5200; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: 5201; SKIP-CACHE-INV: ; %bb.0: ; %entry 5202; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5203; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5204; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5205; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5206; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5207; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5208; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5209; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5210; SKIP-CACHE-INV-NEXT: s_endpgm 5211; 5212; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: 5213; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5214; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5215; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5216; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5217; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5218; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5219; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5220; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5221; 5222; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: 5223; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5224; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5225; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5226; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5227; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5228; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5229; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5230; GFX90A-TGSPLIT-NEXT: s_endpgm 5231 i32 addrspace(1)* %out, i32 %in, i32 %old) { 5232entry: 5233 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 5234 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic 5235 ret void 5236} 5237 5238define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_cmpxchg( 5239; GFX6-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: 5240; GFX6: ; %bb.0: ; %entry 5241; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5242; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 5243; GFX6-NEXT: s_mov_b32 s3, 0x100f000 5244; GFX6-NEXT: s_mov_b32 s2, -1 5245; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5246; GFX6-NEXT: v_mov_b32_e32 v0, s4 5247; GFX6-NEXT: v_mov_b32_e32 v1, s5 5248; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5249; GFX6-NEXT: s_waitcnt vmcnt(0) 5250; GFX6-NEXT: buffer_wbinvl1 5251; GFX6-NEXT: s_endpgm 5252; 5253; GFX7-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: 5254; GFX7: ; %bb.0: ; %entry 5255; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5256; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 5257; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5258; GFX7-NEXT: s_add_u32 s0, s0, 16 5259; GFX7-NEXT: s_addc_u32 s1, s1, 0 5260; GFX7-NEXT: v_mov_b32_e32 v0, s0 5261; GFX7-NEXT: v_mov_b32_e32 v2, s2 5262; GFX7-NEXT: v_mov_b32_e32 v1, s1 5263; GFX7-NEXT: v_mov_b32_e32 v3, s3 5264; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5265; GFX7-NEXT: s_waitcnt vmcnt(0) 5266; GFX7-NEXT: buffer_wbinvl1_vol 5267; GFX7-NEXT: s_endpgm 5268; 5269; GFX10-WGP-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: 5270; GFX10-WGP: ; %bb.0: ; %entry 5271; GFX10-WGP-NEXT: s_clause 0x1 5272; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5273; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5274; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 5275; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5276; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5277; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 5278; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5279; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5280; GFX10-WGP-NEXT: buffer_gl0_inv 5281; GFX10-WGP-NEXT: buffer_gl1_inv 5282; GFX10-WGP-NEXT: s_endpgm 5283; 5284; GFX10-CU-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: 5285; GFX10-CU: ; %bb.0: ; %entry 5286; GFX10-CU-NEXT: s_clause 0x1 5287; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5288; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5289; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 5290; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5291; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5292; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 5293; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5294; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5295; GFX10-CU-NEXT: buffer_gl0_inv 5296; GFX10-CU-NEXT: buffer_gl1_inv 5297; GFX10-CU-NEXT: s_endpgm 5298; 5299; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: 5300; SKIP-CACHE-INV: ; %bb.0: ; %entry 5301; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5302; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5303; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5304; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5305; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5306; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5307; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5308; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5309; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5310; SKIP-CACHE-INV-NEXT: s_endpgm 5311; 5312; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: 5313; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5314; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5315; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5316; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5317; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5318; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5319; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5320; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5321; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5322; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5323; 5324; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: 5325; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5326; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5327; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5328; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5329; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5330; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5331; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5332; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5333; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5334; GFX90A-TGSPLIT-NEXT: s_endpgm 5335 i32 addrspace(1)* %out, i32 %in, i32 %old) { 5336entry: 5337 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 5338 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic 5339 ret void 5340} 5341 5342define amdgpu_kernel void @global_agent_one_as_release_monotonic_cmpxchg( 5343; GFX6-LABEL: global_agent_one_as_release_monotonic_cmpxchg: 5344; GFX6: ; %bb.0: ; %entry 5345; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5346; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 5347; GFX6-NEXT: s_mov_b32 s3, 0x100f000 5348; GFX6-NEXT: s_mov_b32 s2, -1 5349; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5350; GFX6-NEXT: v_mov_b32_e32 v0, s4 5351; GFX6-NEXT: v_mov_b32_e32 v1, s5 5352; GFX6-NEXT: s_waitcnt vmcnt(0) 5353; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5354; GFX6-NEXT: s_endpgm 5355; 5356; GFX7-LABEL: global_agent_one_as_release_monotonic_cmpxchg: 5357; GFX7: ; %bb.0: ; %entry 5358; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5359; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 5360; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5361; GFX7-NEXT: s_add_u32 s0, s0, 16 5362; GFX7-NEXT: s_addc_u32 s1, s1, 0 5363; GFX7-NEXT: v_mov_b32_e32 v0, s0 5364; GFX7-NEXT: v_mov_b32_e32 v2, s2 5365; GFX7-NEXT: v_mov_b32_e32 v1, s1 5366; GFX7-NEXT: v_mov_b32_e32 v3, s3 5367; GFX7-NEXT: s_waitcnt vmcnt(0) 5368; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5369; GFX7-NEXT: s_endpgm 5370; 5371; GFX10-WGP-LABEL: global_agent_one_as_release_monotonic_cmpxchg: 5372; GFX10-WGP: ; %bb.0: ; %entry 5373; GFX10-WGP-NEXT: s_clause 0x1 5374; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5375; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5376; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 5377; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5378; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5379; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 5380; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5381; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5382; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5383; GFX10-WGP-NEXT: s_endpgm 5384; 5385; GFX10-CU-LABEL: global_agent_one_as_release_monotonic_cmpxchg: 5386; GFX10-CU: ; %bb.0: ; %entry 5387; GFX10-CU-NEXT: s_clause 0x1 5388; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5389; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5390; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 5391; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5392; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5393; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 5394; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5395; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5396; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5397; GFX10-CU-NEXT: s_endpgm 5398; 5399; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_monotonic_cmpxchg: 5400; SKIP-CACHE-INV: ; %bb.0: ; %entry 5401; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5402; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5403; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5404; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5405; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5406; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5407; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5408; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5409; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5410; SKIP-CACHE-INV-NEXT: s_endpgm 5411; 5412; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_release_monotonic_cmpxchg: 5413; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5414; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5415; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5416; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5417; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5418; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5419; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5420; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5421; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5422; 5423; GFX90A-TGSPLIT-LABEL: global_agent_one_as_release_monotonic_cmpxchg: 5424; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5425; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5426; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5427; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5428; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5429; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5430; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5431; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5432; GFX90A-TGSPLIT-NEXT: s_endpgm 5433 i32 addrspace(1)* %out, i32 %in, i32 %old) { 5434entry: 5435 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 5436 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic 5437 ret void 5438} 5439 5440define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_cmpxchg( 5441; GFX6-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: 5442; GFX6: ; %bb.0: ; %entry 5443; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5444; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 5445; GFX6-NEXT: s_mov_b32 s3, 0x100f000 5446; GFX6-NEXT: s_mov_b32 s2, -1 5447; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5448; GFX6-NEXT: v_mov_b32_e32 v0, s4 5449; GFX6-NEXT: v_mov_b32_e32 v1, s5 5450; GFX6-NEXT: s_waitcnt vmcnt(0) 5451; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5452; GFX6-NEXT: s_waitcnt vmcnt(0) 5453; GFX6-NEXT: buffer_wbinvl1 5454; GFX6-NEXT: s_endpgm 5455; 5456; GFX7-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: 5457; GFX7: ; %bb.0: ; %entry 5458; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5459; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 5460; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5461; GFX7-NEXT: s_add_u32 s0, s0, 16 5462; GFX7-NEXT: s_addc_u32 s1, s1, 0 5463; GFX7-NEXT: v_mov_b32_e32 v0, s0 5464; GFX7-NEXT: v_mov_b32_e32 v2, s2 5465; GFX7-NEXT: v_mov_b32_e32 v1, s1 5466; GFX7-NEXT: v_mov_b32_e32 v3, s3 5467; GFX7-NEXT: s_waitcnt vmcnt(0) 5468; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5469; GFX7-NEXT: s_waitcnt vmcnt(0) 5470; GFX7-NEXT: buffer_wbinvl1_vol 5471; GFX7-NEXT: s_endpgm 5472; 5473; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: 5474; GFX10-WGP: ; %bb.0: ; %entry 5475; GFX10-WGP-NEXT: s_clause 0x1 5476; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5477; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5478; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 5479; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5480; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5481; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 5482; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5483; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5484; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5485; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5486; GFX10-WGP-NEXT: buffer_gl0_inv 5487; GFX10-WGP-NEXT: buffer_gl1_inv 5488; GFX10-WGP-NEXT: s_endpgm 5489; 5490; GFX10-CU-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: 5491; GFX10-CU: ; %bb.0: ; %entry 5492; GFX10-CU-NEXT: s_clause 0x1 5493; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5494; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5495; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 5496; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5497; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5498; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 5499; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5500; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5501; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5502; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5503; GFX10-CU-NEXT: buffer_gl0_inv 5504; GFX10-CU-NEXT: buffer_gl1_inv 5505; GFX10-CU-NEXT: s_endpgm 5506; 5507; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: 5508; SKIP-CACHE-INV: ; %bb.0: ; %entry 5509; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5510; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5511; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5512; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5513; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5514; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5515; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5516; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5517; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5518; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5519; SKIP-CACHE-INV-NEXT: s_endpgm 5520; 5521; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: 5522; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5523; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5524; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5525; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5526; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5527; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5528; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5529; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5530; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5531; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5532; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5533; 5534; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: 5535; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5536; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5537; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5538; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5539; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5540; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5541; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5542; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5543; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5544; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5545; GFX90A-TGSPLIT-NEXT: s_endpgm 5546 i32 addrspace(1)* %out, i32 %in, i32 %old) { 5547entry: 5548 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 5549 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic 5550 ret void 5551} 5552 5553define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_cmpxchg( 5554; GFX6-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: 5555; GFX6: ; %bb.0: ; %entry 5556; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5557; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 5558; GFX6-NEXT: s_mov_b32 s3, 0x100f000 5559; GFX6-NEXT: s_mov_b32 s2, -1 5560; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5561; GFX6-NEXT: v_mov_b32_e32 v0, s4 5562; GFX6-NEXT: v_mov_b32_e32 v1, s5 5563; GFX6-NEXT: s_waitcnt vmcnt(0) 5564; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5565; GFX6-NEXT: s_waitcnt vmcnt(0) 5566; GFX6-NEXT: buffer_wbinvl1 5567; GFX6-NEXT: s_endpgm 5568; 5569; GFX7-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: 5570; GFX7: ; %bb.0: ; %entry 5571; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5572; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 5573; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5574; GFX7-NEXT: s_add_u32 s0, s0, 16 5575; GFX7-NEXT: s_addc_u32 s1, s1, 0 5576; GFX7-NEXT: v_mov_b32_e32 v0, s0 5577; GFX7-NEXT: v_mov_b32_e32 v2, s2 5578; GFX7-NEXT: v_mov_b32_e32 v1, s1 5579; GFX7-NEXT: v_mov_b32_e32 v3, s3 5580; GFX7-NEXT: s_waitcnt vmcnt(0) 5581; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5582; GFX7-NEXT: s_waitcnt vmcnt(0) 5583; GFX7-NEXT: buffer_wbinvl1_vol 5584; GFX7-NEXT: s_endpgm 5585; 5586; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: 5587; GFX10-WGP: ; %bb.0: ; %entry 5588; GFX10-WGP-NEXT: s_clause 0x1 5589; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5590; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5591; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 5592; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5593; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5594; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 5595; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5596; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5597; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5598; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5599; GFX10-WGP-NEXT: buffer_gl0_inv 5600; GFX10-WGP-NEXT: buffer_gl1_inv 5601; GFX10-WGP-NEXT: s_endpgm 5602; 5603; GFX10-CU-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: 5604; GFX10-CU: ; %bb.0: ; %entry 5605; GFX10-CU-NEXT: s_clause 0x1 5606; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5607; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5608; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 5609; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5610; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5611; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 5612; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5613; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5614; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5615; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5616; GFX10-CU-NEXT: buffer_gl0_inv 5617; GFX10-CU-NEXT: buffer_gl1_inv 5618; GFX10-CU-NEXT: s_endpgm 5619; 5620; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: 5621; SKIP-CACHE-INV: ; %bb.0: ; %entry 5622; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5623; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5624; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5625; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5626; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5627; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5628; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5629; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5630; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5631; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5632; SKIP-CACHE-INV-NEXT: s_endpgm 5633; 5634; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: 5635; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5636; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5637; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5638; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5639; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5640; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5641; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5642; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5643; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5644; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5645; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5646; 5647; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: 5648; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5649; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5650; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5651; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5652; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5653; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5654; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5655; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5656; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5657; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5658; GFX90A-TGSPLIT-NEXT: s_endpgm 5659 i32 addrspace(1)* %out, i32 %in, i32 %old) { 5660entry: 5661 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 5662 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic 5663 ret void 5664} 5665 5666define amdgpu_kernel void @global_agent_one_as_acquire_acquire_cmpxchg( 5667; GFX6-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: 5668; GFX6: ; %bb.0: ; %entry 5669; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5670; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 5671; GFX6-NEXT: s_mov_b32 s3, 0x100f000 5672; GFX6-NEXT: s_mov_b32 s2, -1 5673; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5674; GFX6-NEXT: v_mov_b32_e32 v0, s4 5675; GFX6-NEXT: v_mov_b32_e32 v1, s5 5676; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5677; GFX6-NEXT: s_waitcnt vmcnt(0) 5678; GFX6-NEXT: buffer_wbinvl1 5679; GFX6-NEXT: s_endpgm 5680; 5681; GFX7-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: 5682; GFX7: ; %bb.0: ; %entry 5683; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5684; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 5685; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5686; GFX7-NEXT: s_add_u32 s0, s0, 16 5687; GFX7-NEXT: s_addc_u32 s1, s1, 0 5688; GFX7-NEXT: v_mov_b32_e32 v0, s0 5689; GFX7-NEXT: v_mov_b32_e32 v2, s2 5690; GFX7-NEXT: v_mov_b32_e32 v1, s1 5691; GFX7-NEXT: v_mov_b32_e32 v3, s3 5692; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5693; GFX7-NEXT: s_waitcnt vmcnt(0) 5694; GFX7-NEXT: buffer_wbinvl1_vol 5695; GFX7-NEXT: s_endpgm 5696; 5697; GFX10-WGP-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: 5698; GFX10-WGP: ; %bb.0: ; %entry 5699; GFX10-WGP-NEXT: s_clause 0x1 5700; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5701; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5702; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 5703; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5704; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5705; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 5706; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5707; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5708; GFX10-WGP-NEXT: buffer_gl0_inv 5709; GFX10-WGP-NEXT: buffer_gl1_inv 5710; GFX10-WGP-NEXT: s_endpgm 5711; 5712; GFX10-CU-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: 5713; GFX10-CU: ; %bb.0: ; %entry 5714; GFX10-CU-NEXT: s_clause 0x1 5715; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5716; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5717; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 5718; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5719; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5720; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 5721; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5722; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5723; GFX10-CU-NEXT: buffer_gl0_inv 5724; GFX10-CU-NEXT: buffer_gl1_inv 5725; GFX10-CU-NEXT: s_endpgm 5726; 5727; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: 5728; SKIP-CACHE-INV: ; %bb.0: ; %entry 5729; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5730; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5731; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5732; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5733; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5734; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5735; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5736; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5737; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5738; SKIP-CACHE-INV-NEXT: s_endpgm 5739; 5740; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: 5741; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5742; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5743; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5744; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5745; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5746; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5747; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5748; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5749; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5750; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5751; 5752; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: 5753; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5754; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5755; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5756; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5757; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5758; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5759; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5760; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5761; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5762; GFX90A-TGSPLIT-NEXT: s_endpgm 5763 i32 addrspace(1)* %out, i32 %in, i32 %old) { 5764entry: 5765 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 5766 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire 5767 ret void 5768} 5769 5770define amdgpu_kernel void @global_agent_one_as_release_acquire_cmpxchg( 5771; GFX6-LABEL: global_agent_one_as_release_acquire_cmpxchg: 5772; GFX6: ; %bb.0: ; %entry 5773; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5774; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 5775; GFX6-NEXT: s_mov_b32 s3, 0x100f000 5776; GFX6-NEXT: s_mov_b32 s2, -1 5777; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5778; GFX6-NEXT: v_mov_b32_e32 v0, s4 5779; GFX6-NEXT: v_mov_b32_e32 v1, s5 5780; GFX6-NEXT: s_waitcnt vmcnt(0) 5781; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5782; GFX6-NEXT: s_waitcnt vmcnt(0) 5783; GFX6-NEXT: buffer_wbinvl1 5784; GFX6-NEXT: s_endpgm 5785; 5786; GFX7-LABEL: global_agent_one_as_release_acquire_cmpxchg: 5787; GFX7: ; %bb.0: ; %entry 5788; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5789; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 5790; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5791; GFX7-NEXT: s_add_u32 s0, s0, 16 5792; GFX7-NEXT: s_addc_u32 s1, s1, 0 5793; GFX7-NEXT: v_mov_b32_e32 v0, s0 5794; GFX7-NEXT: v_mov_b32_e32 v2, s2 5795; GFX7-NEXT: v_mov_b32_e32 v1, s1 5796; GFX7-NEXT: v_mov_b32_e32 v3, s3 5797; GFX7-NEXT: s_waitcnt vmcnt(0) 5798; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5799; GFX7-NEXT: s_waitcnt vmcnt(0) 5800; GFX7-NEXT: buffer_wbinvl1_vol 5801; GFX7-NEXT: s_endpgm 5802; 5803; GFX10-WGP-LABEL: global_agent_one_as_release_acquire_cmpxchg: 5804; GFX10-WGP: ; %bb.0: ; %entry 5805; GFX10-WGP-NEXT: s_clause 0x1 5806; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5807; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5808; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 5809; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5810; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5811; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 5812; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5813; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5814; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5815; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5816; GFX10-WGP-NEXT: buffer_gl0_inv 5817; GFX10-WGP-NEXT: buffer_gl1_inv 5818; GFX10-WGP-NEXT: s_endpgm 5819; 5820; GFX10-CU-LABEL: global_agent_one_as_release_acquire_cmpxchg: 5821; GFX10-CU: ; %bb.0: ; %entry 5822; GFX10-CU-NEXT: s_clause 0x1 5823; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5824; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5825; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 5826; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5827; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5828; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 5829; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5830; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5831; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5832; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5833; GFX10-CU-NEXT: buffer_gl0_inv 5834; GFX10-CU-NEXT: buffer_gl1_inv 5835; GFX10-CU-NEXT: s_endpgm 5836; 5837; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_acquire_cmpxchg: 5838; SKIP-CACHE-INV: ; %bb.0: ; %entry 5839; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5840; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5841; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5842; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5843; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5844; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5845; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5846; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5847; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5848; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5849; SKIP-CACHE-INV-NEXT: s_endpgm 5850; 5851; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_release_acquire_cmpxchg: 5852; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5853; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5854; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5855; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5856; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5857; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5858; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5859; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5860; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5861; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5862; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5863; 5864; GFX90A-TGSPLIT-LABEL: global_agent_one_as_release_acquire_cmpxchg: 5865; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5866; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5867; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5868; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5869; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5870; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5871; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5872; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5873; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5874; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5875; GFX90A-TGSPLIT-NEXT: s_endpgm 5876 i32 addrspace(1)* %out, i32 %in, i32 %old) { 5877entry: 5878 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 5879 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire 5880 ret void 5881} 5882 5883define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_cmpxchg( 5884; GFX6-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: 5885; GFX6: ; %bb.0: ; %entry 5886; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5887; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 5888; GFX6-NEXT: s_mov_b32 s3, 0x100f000 5889; GFX6-NEXT: s_mov_b32 s2, -1 5890; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5891; GFX6-NEXT: v_mov_b32_e32 v0, s4 5892; GFX6-NEXT: v_mov_b32_e32 v1, s5 5893; GFX6-NEXT: s_waitcnt vmcnt(0) 5894; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5895; GFX6-NEXT: s_waitcnt vmcnt(0) 5896; GFX6-NEXT: buffer_wbinvl1 5897; GFX6-NEXT: s_endpgm 5898; 5899; GFX7-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: 5900; GFX7: ; %bb.0: ; %entry 5901; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5902; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 5903; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5904; GFX7-NEXT: s_add_u32 s0, s0, 16 5905; GFX7-NEXT: s_addc_u32 s1, s1, 0 5906; GFX7-NEXT: v_mov_b32_e32 v0, s0 5907; GFX7-NEXT: v_mov_b32_e32 v2, s2 5908; GFX7-NEXT: v_mov_b32_e32 v1, s1 5909; GFX7-NEXT: v_mov_b32_e32 v3, s3 5910; GFX7-NEXT: s_waitcnt vmcnt(0) 5911; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5912; GFX7-NEXT: s_waitcnt vmcnt(0) 5913; GFX7-NEXT: buffer_wbinvl1_vol 5914; GFX7-NEXT: s_endpgm 5915; 5916; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: 5917; GFX10-WGP: ; %bb.0: ; %entry 5918; GFX10-WGP-NEXT: s_clause 0x1 5919; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5920; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5921; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 5922; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5923; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5924; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 5925; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5926; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5927; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5928; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5929; GFX10-WGP-NEXT: buffer_gl0_inv 5930; GFX10-WGP-NEXT: buffer_gl1_inv 5931; GFX10-WGP-NEXT: s_endpgm 5932; 5933; GFX10-CU-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: 5934; GFX10-CU: ; %bb.0: ; %entry 5935; GFX10-CU-NEXT: s_clause 0x1 5936; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5937; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5938; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 5939; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5940; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5941; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 5942; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5943; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5944; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5945; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5946; GFX10-CU-NEXT: buffer_gl0_inv 5947; GFX10-CU-NEXT: buffer_gl1_inv 5948; GFX10-CU-NEXT: s_endpgm 5949; 5950; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: 5951; SKIP-CACHE-INV: ; %bb.0: ; %entry 5952; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5953; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5954; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5955; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5956; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5957; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5958; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5959; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5960; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5961; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5962; SKIP-CACHE-INV-NEXT: s_endpgm 5963; 5964; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: 5965; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5966; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5967; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5968; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5969; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5970; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5971; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5972; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5973; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5974; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5975; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5976; 5977; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: 5978; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5979; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5980; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5981; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5982; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5983; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5984; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5985; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5986; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5987; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5988; GFX90A-TGSPLIT-NEXT: s_endpgm 5989 i32 addrspace(1)* %out, i32 %in, i32 %old) { 5990entry: 5991 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 5992 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire 5993 ret void 5994} 5995 5996define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_cmpxchg( 5997; GFX6-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: 5998; GFX6: ; %bb.0: ; %entry 5999; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6000; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 6001; GFX6-NEXT: s_mov_b32 s3, 0x100f000 6002; GFX6-NEXT: s_mov_b32 s2, -1 6003; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6004; GFX6-NEXT: v_mov_b32_e32 v0, s4 6005; GFX6-NEXT: v_mov_b32_e32 v1, s5 6006; GFX6-NEXT: s_waitcnt vmcnt(0) 6007; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 6008; GFX6-NEXT: s_waitcnt vmcnt(0) 6009; GFX6-NEXT: buffer_wbinvl1 6010; GFX6-NEXT: s_endpgm 6011; 6012; GFX7-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: 6013; GFX7: ; %bb.0: ; %entry 6014; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6015; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 6016; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6017; GFX7-NEXT: s_add_u32 s0, s0, 16 6018; GFX7-NEXT: s_addc_u32 s1, s1, 0 6019; GFX7-NEXT: v_mov_b32_e32 v0, s0 6020; GFX7-NEXT: v_mov_b32_e32 v2, s2 6021; GFX7-NEXT: v_mov_b32_e32 v1, s1 6022; GFX7-NEXT: v_mov_b32_e32 v3, s3 6023; GFX7-NEXT: s_waitcnt vmcnt(0) 6024; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 6025; GFX7-NEXT: s_waitcnt vmcnt(0) 6026; GFX7-NEXT: buffer_wbinvl1_vol 6027; GFX7-NEXT: s_endpgm 6028; 6029; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: 6030; GFX10-WGP: ; %bb.0: ; %entry 6031; GFX10-WGP-NEXT: s_clause 0x1 6032; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6033; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6034; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 6035; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6036; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 6037; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 6038; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6039; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6040; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 6041; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6042; GFX10-WGP-NEXT: buffer_gl0_inv 6043; GFX10-WGP-NEXT: buffer_gl1_inv 6044; GFX10-WGP-NEXT: s_endpgm 6045; 6046; GFX10-CU-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: 6047; GFX10-CU: ; %bb.0: ; %entry 6048; GFX10-CU-NEXT: s_clause 0x1 6049; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6050; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6051; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 6052; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6053; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 6054; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 6055; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6056; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6057; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 6058; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6059; GFX10-CU-NEXT: buffer_gl0_inv 6060; GFX10-CU-NEXT: buffer_gl1_inv 6061; GFX10-CU-NEXT: s_endpgm 6062; 6063; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: 6064; SKIP-CACHE-INV: ; %bb.0: ; %entry 6065; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 6066; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 6067; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 6068; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 6069; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6070; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 6071; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 6072; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6073; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 6074; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6075; SKIP-CACHE-INV-NEXT: s_endpgm 6076; 6077; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: 6078; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6079; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6080; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6081; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6082; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6083; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6084; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6085; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 6086; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6087; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6088; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6089; 6090; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: 6091; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6092; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6093; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6094; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6095; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6096; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6097; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6098; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 6099; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6100; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6101; GFX90A-TGSPLIT-NEXT: s_endpgm 6102 i32 addrspace(1)* %out, i32 %in, i32 %old) { 6103entry: 6104 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 6105 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire 6106 ret void 6107} 6108 6109define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_cmpxchg( 6110; GFX6-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: 6111; GFX6: ; %bb.0: ; %entry 6112; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6113; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 6114; GFX6-NEXT: s_mov_b32 s3, 0x100f000 6115; GFX6-NEXT: s_mov_b32 s2, -1 6116; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6117; GFX6-NEXT: v_mov_b32_e32 v0, s4 6118; GFX6-NEXT: v_mov_b32_e32 v1, s5 6119; GFX6-NEXT: s_waitcnt vmcnt(0) 6120; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 6121; GFX6-NEXT: s_waitcnt vmcnt(0) 6122; GFX6-NEXT: buffer_wbinvl1 6123; GFX6-NEXT: s_endpgm 6124; 6125; GFX7-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: 6126; GFX7: ; %bb.0: ; %entry 6127; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6128; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 6129; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6130; GFX7-NEXT: s_add_u32 s0, s0, 16 6131; GFX7-NEXT: s_addc_u32 s1, s1, 0 6132; GFX7-NEXT: v_mov_b32_e32 v0, s0 6133; GFX7-NEXT: v_mov_b32_e32 v2, s2 6134; GFX7-NEXT: v_mov_b32_e32 v1, s1 6135; GFX7-NEXT: v_mov_b32_e32 v3, s3 6136; GFX7-NEXT: s_waitcnt vmcnt(0) 6137; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 6138; GFX7-NEXT: s_waitcnt vmcnt(0) 6139; GFX7-NEXT: buffer_wbinvl1_vol 6140; GFX7-NEXT: s_endpgm 6141; 6142; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: 6143; GFX10-WGP: ; %bb.0: ; %entry 6144; GFX10-WGP-NEXT: s_clause 0x1 6145; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6146; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6147; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 6148; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6149; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 6150; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 6151; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6152; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6153; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 6154; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6155; GFX10-WGP-NEXT: buffer_gl0_inv 6156; GFX10-WGP-NEXT: buffer_gl1_inv 6157; GFX10-WGP-NEXT: s_endpgm 6158; 6159; GFX10-CU-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: 6160; GFX10-CU: ; %bb.0: ; %entry 6161; GFX10-CU-NEXT: s_clause 0x1 6162; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6163; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6164; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 6165; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6166; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 6167; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 6168; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6169; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6170; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 6171; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6172; GFX10-CU-NEXT: buffer_gl0_inv 6173; GFX10-CU-NEXT: buffer_gl1_inv 6174; GFX10-CU-NEXT: s_endpgm 6175; 6176; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: 6177; SKIP-CACHE-INV: ; %bb.0: ; %entry 6178; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 6179; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 6180; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 6181; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 6182; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6183; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 6184; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 6185; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6186; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 6187; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6188; SKIP-CACHE-INV-NEXT: s_endpgm 6189; 6190; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: 6191; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6192; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6193; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6194; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6195; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6196; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6197; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6198; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 6199; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6200; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6201; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6202; 6203; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: 6204; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6205; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6206; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6207; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6208; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6209; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6210; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6211; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 6212; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6213; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6214; GFX90A-TGSPLIT-NEXT: s_endpgm 6215 i32 addrspace(1)* %out, i32 %in, i32 %old) { 6216entry: 6217 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 6218 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst 6219 ret void 6220} 6221 6222define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_ret_cmpxchg( 6223; GFX6-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: 6224; GFX6: ; %bb.0: ; %entry 6225; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6226; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 6227; GFX6-NEXT: s_mov_b32 s3, 0x100f000 6228; GFX6-NEXT: s_mov_b32 s2, -1 6229; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6230; GFX6-NEXT: v_mov_b32_e32 v0, s4 6231; GFX6-NEXT: v_mov_b32_e32 v1, s5 6232; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 6233; GFX6-NEXT: s_waitcnt vmcnt(0) 6234; GFX6-NEXT: buffer_wbinvl1 6235; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 6236; GFX6-NEXT: s_endpgm 6237; 6238; GFX7-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: 6239; GFX7: ; %bb.0: ; %entry 6240; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6241; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 6242; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6243; GFX7-NEXT: s_add_u32 s4, s0, 16 6244; GFX7-NEXT: s_addc_u32 s5, s1, 0 6245; GFX7-NEXT: v_mov_b32_e32 v0, s4 6246; GFX7-NEXT: v_mov_b32_e32 v2, s2 6247; GFX7-NEXT: v_mov_b32_e32 v1, s5 6248; GFX7-NEXT: v_mov_b32_e32 v3, s3 6249; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6250; GFX7-NEXT: s_waitcnt vmcnt(0) 6251; GFX7-NEXT: buffer_wbinvl1_vol 6252; GFX7-NEXT: v_mov_b32_e32 v0, s0 6253; GFX7-NEXT: v_mov_b32_e32 v1, s1 6254; GFX7-NEXT: flat_store_dword v[0:1], v2 6255; GFX7-NEXT: s_endpgm 6256; 6257; GFX10-WGP-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: 6258; GFX10-WGP: ; %bb.0: ; %entry 6259; GFX10-WGP-NEXT: s_clause 0x1 6260; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6261; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6262; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 6263; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6264; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 6265; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 6266; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6267; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6268; GFX10-WGP-NEXT: buffer_gl0_inv 6269; GFX10-WGP-NEXT: buffer_gl1_inv 6270; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 6271; GFX10-WGP-NEXT: s_endpgm 6272; 6273; GFX10-CU-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: 6274; GFX10-CU: ; %bb.0: ; %entry 6275; GFX10-CU-NEXT: s_clause 0x1 6276; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6277; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6278; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 6279; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6280; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 6281; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 6282; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6283; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6284; GFX10-CU-NEXT: buffer_gl0_inv 6285; GFX10-CU-NEXT: buffer_gl1_inv 6286; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 6287; GFX10-CU-NEXT: s_endpgm 6288; 6289; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: 6290; SKIP-CACHE-INV: ; %bb.0: ; %entry 6291; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 6292; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 6293; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 6294; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 6295; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6296; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 6297; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 6298; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 6299; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6300; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 6301; SKIP-CACHE-INV-NEXT: s_endpgm 6302; 6303; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: 6304; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6305; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6306; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6307; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6308; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6309; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6310; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6311; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6312; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6313; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6314; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6315; 6316; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: 6317; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6318; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6319; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6320; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6321; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6322; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6323; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6324; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6325; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6326; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6327; GFX90A-TGSPLIT-NEXT: s_endpgm 6328 i32 addrspace(1)* %out, i32 %in, i32 %old) { 6329entry: 6330 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 6331 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic 6332 %val0 = extractvalue { i32, i1 } %val, 0 6333 store i32 %val0, i32 addrspace(1)* %out, align 4 6334 ret void 6335} 6336 6337define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_ret_cmpxchg( 6338; GFX6-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: 6339; GFX6: ; %bb.0: ; %entry 6340; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6341; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 6342; GFX6-NEXT: s_mov_b32 s3, 0x100f000 6343; GFX6-NEXT: s_mov_b32 s2, -1 6344; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6345; GFX6-NEXT: v_mov_b32_e32 v0, s4 6346; GFX6-NEXT: v_mov_b32_e32 v1, s5 6347; GFX6-NEXT: s_waitcnt vmcnt(0) 6348; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 6349; GFX6-NEXT: s_waitcnt vmcnt(0) 6350; GFX6-NEXT: buffer_wbinvl1 6351; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 6352; GFX6-NEXT: s_endpgm 6353; 6354; GFX7-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: 6355; GFX7: ; %bb.0: ; %entry 6356; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6357; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 6358; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6359; GFX7-NEXT: s_add_u32 s4, s0, 16 6360; GFX7-NEXT: s_addc_u32 s5, s1, 0 6361; GFX7-NEXT: v_mov_b32_e32 v0, s4 6362; GFX7-NEXT: v_mov_b32_e32 v2, s2 6363; GFX7-NEXT: v_mov_b32_e32 v1, s5 6364; GFX7-NEXT: v_mov_b32_e32 v3, s3 6365; GFX7-NEXT: s_waitcnt vmcnt(0) 6366; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6367; GFX7-NEXT: s_waitcnt vmcnt(0) 6368; GFX7-NEXT: buffer_wbinvl1_vol 6369; GFX7-NEXT: v_mov_b32_e32 v0, s0 6370; GFX7-NEXT: v_mov_b32_e32 v1, s1 6371; GFX7-NEXT: flat_store_dword v[0:1], v2 6372; GFX7-NEXT: s_endpgm 6373; 6374; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: 6375; GFX10-WGP: ; %bb.0: ; %entry 6376; GFX10-WGP-NEXT: s_clause 0x1 6377; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6378; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6379; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 6380; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6381; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 6382; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 6383; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6384; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6385; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6386; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6387; GFX10-WGP-NEXT: buffer_gl0_inv 6388; GFX10-WGP-NEXT: buffer_gl1_inv 6389; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 6390; GFX10-WGP-NEXT: s_endpgm 6391; 6392; GFX10-CU-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: 6393; GFX10-CU: ; %bb.0: ; %entry 6394; GFX10-CU-NEXT: s_clause 0x1 6395; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6396; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6397; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 6398; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6399; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 6400; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 6401; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6402; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6403; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6404; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6405; GFX10-CU-NEXT: buffer_gl0_inv 6406; GFX10-CU-NEXT: buffer_gl1_inv 6407; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 6408; GFX10-CU-NEXT: s_endpgm 6409; 6410; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: 6411; SKIP-CACHE-INV: ; %bb.0: ; %entry 6412; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 6413; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 6414; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 6415; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 6416; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6417; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 6418; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 6419; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6420; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 6421; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6422; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 6423; SKIP-CACHE-INV-NEXT: s_endpgm 6424; 6425; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: 6426; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6427; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6428; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6429; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6430; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6431; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6432; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6433; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6434; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6435; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6436; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6437; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6438; 6439; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: 6440; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6441; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6442; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6443; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6444; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6445; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6446; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6447; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6448; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6449; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6450; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6451; GFX90A-TGSPLIT-NEXT: s_endpgm 6452 i32 addrspace(1)* %out, i32 %in, i32 %old) { 6453entry: 6454 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 6455 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic 6456 %val0 = extractvalue { i32, i1 } %val, 0 6457 store i32 %val0, i32 addrspace(1)* %out, align 4 6458 ret void 6459} 6460 6461define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_ret_cmpxchg( 6462; GFX6-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: 6463; GFX6: ; %bb.0: ; %entry 6464; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6465; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 6466; GFX6-NEXT: s_mov_b32 s3, 0x100f000 6467; GFX6-NEXT: s_mov_b32 s2, -1 6468; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6469; GFX6-NEXT: v_mov_b32_e32 v0, s4 6470; GFX6-NEXT: v_mov_b32_e32 v1, s5 6471; GFX6-NEXT: s_waitcnt vmcnt(0) 6472; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 6473; GFX6-NEXT: s_waitcnt vmcnt(0) 6474; GFX6-NEXT: buffer_wbinvl1 6475; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 6476; GFX6-NEXT: s_endpgm 6477; 6478; GFX7-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: 6479; GFX7: ; %bb.0: ; %entry 6480; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6481; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 6482; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6483; GFX7-NEXT: s_add_u32 s4, s0, 16 6484; GFX7-NEXT: s_addc_u32 s5, s1, 0 6485; GFX7-NEXT: v_mov_b32_e32 v0, s4 6486; GFX7-NEXT: v_mov_b32_e32 v2, s2 6487; GFX7-NEXT: v_mov_b32_e32 v1, s5 6488; GFX7-NEXT: v_mov_b32_e32 v3, s3 6489; GFX7-NEXT: s_waitcnt vmcnt(0) 6490; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6491; GFX7-NEXT: s_waitcnt vmcnt(0) 6492; GFX7-NEXT: buffer_wbinvl1_vol 6493; GFX7-NEXT: v_mov_b32_e32 v0, s0 6494; GFX7-NEXT: v_mov_b32_e32 v1, s1 6495; GFX7-NEXT: flat_store_dword v[0:1], v2 6496; GFX7-NEXT: s_endpgm 6497; 6498; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: 6499; GFX10-WGP: ; %bb.0: ; %entry 6500; GFX10-WGP-NEXT: s_clause 0x1 6501; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6502; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6503; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 6504; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6505; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 6506; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 6507; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6508; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6509; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6510; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6511; GFX10-WGP-NEXT: buffer_gl0_inv 6512; GFX10-WGP-NEXT: buffer_gl1_inv 6513; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 6514; GFX10-WGP-NEXT: s_endpgm 6515; 6516; GFX10-CU-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: 6517; GFX10-CU: ; %bb.0: ; %entry 6518; GFX10-CU-NEXT: s_clause 0x1 6519; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6520; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6521; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 6522; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6523; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 6524; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 6525; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6526; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6527; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6528; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6529; GFX10-CU-NEXT: buffer_gl0_inv 6530; GFX10-CU-NEXT: buffer_gl1_inv 6531; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 6532; GFX10-CU-NEXT: s_endpgm 6533; 6534; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: 6535; SKIP-CACHE-INV: ; %bb.0: ; %entry 6536; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 6537; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 6538; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 6539; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 6540; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6541; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 6542; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 6543; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6544; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 6545; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6546; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 6547; SKIP-CACHE-INV-NEXT: s_endpgm 6548; 6549; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: 6550; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6551; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6552; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6553; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6554; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6555; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6556; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6557; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6558; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6559; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6560; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6561; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6562; 6563; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: 6564; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6565; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6566; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6567; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6568; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6569; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6570; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6571; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6572; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6573; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6574; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6575; GFX90A-TGSPLIT-NEXT: s_endpgm 6576 i32 addrspace(1)* %out, i32 %in, i32 %old) { 6577entry: 6578 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 6579 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic 6580 %val0 = extractvalue { i32, i1 } %val, 0 6581 store i32 %val0, i32 addrspace(1)* %out, align 4 6582 ret void 6583} 6584 6585define amdgpu_kernel void @global_agent_one_as_acquire_acquire_ret_cmpxchg( 6586; GFX6-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: 6587; GFX6: ; %bb.0: ; %entry 6588; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6589; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 6590; GFX6-NEXT: s_mov_b32 s3, 0x100f000 6591; GFX6-NEXT: s_mov_b32 s2, -1 6592; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6593; GFX6-NEXT: v_mov_b32_e32 v0, s4 6594; GFX6-NEXT: v_mov_b32_e32 v1, s5 6595; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 6596; GFX6-NEXT: s_waitcnt vmcnt(0) 6597; GFX6-NEXT: buffer_wbinvl1 6598; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 6599; GFX6-NEXT: s_endpgm 6600; 6601; GFX7-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: 6602; GFX7: ; %bb.0: ; %entry 6603; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6604; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 6605; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6606; GFX7-NEXT: s_add_u32 s4, s0, 16 6607; GFX7-NEXT: s_addc_u32 s5, s1, 0 6608; GFX7-NEXT: v_mov_b32_e32 v0, s4 6609; GFX7-NEXT: v_mov_b32_e32 v2, s2 6610; GFX7-NEXT: v_mov_b32_e32 v1, s5 6611; GFX7-NEXT: v_mov_b32_e32 v3, s3 6612; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6613; GFX7-NEXT: s_waitcnt vmcnt(0) 6614; GFX7-NEXT: buffer_wbinvl1_vol 6615; GFX7-NEXT: v_mov_b32_e32 v0, s0 6616; GFX7-NEXT: v_mov_b32_e32 v1, s1 6617; GFX7-NEXT: flat_store_dword v[0:1], v2 6618; GFX7-NEXT: s_endpgm 6619; 6620; GFX10-WGP-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: 6621; GFX10-WGP: ; %bb.0: ; %entry 6622; GFX10-WGP-NEXT: s_clause 0x1 6623; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6624; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6625; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 6626; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6627; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 6628; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 6629; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6630; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6631; GFX10-WGP-NEXT: buffer_gl0_inv 6632; GFX10-WGP-NEXT: buffer_gl1_inv 6633; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 6634; GFX10-WGP-NEXT: s_endpgm 6635; 6636; GFX10-CU-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: 6637; GFX10-CU: ; %bb.0: ; %entry 6638; GFX10-CU-NEXT: s_clause 0x1 6639; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6640; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6641; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 6642; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6643; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 6644; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 6645; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6646; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6647; GFX10-CU-NEXT: buffer_gl0_inv 6648; GFX10-CU-NEXT: buffer_gl1_inv 6649; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 6650; GFX10-CU-NEXT: s_endpgm 6651; 6652; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: 6653; SKIP-CACHE-INV: ; %bb.0: ; %entry 6654; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 6655; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 6656; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 6657; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 6658; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6659; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 6660; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 6661; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 6662; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6663; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 6664; SKIP-CACHE-INV-NEXT: s_endpgm 6665; 6666; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: 6667; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6668; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6669; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6670; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6671; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6672; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6673; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6674; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6675; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6676; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6677; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6678; 6679; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: 6680; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6681; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6682; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6683; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6684; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6685; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6686; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6687; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6688; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6689; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6690; GFX90A-TGSPLIT-NEXT: s_endpgm 6691 i32 addrspace(1)* %out, i32 %in, i32 %old) { 6692entry: 6693 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 6694 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire 6695 %val0 = extractvalue { i32, i1 } %val, 0 6696 store i32 %val0, i32 addrspace(1)* %out, align 4 6697 ret void 6698} 6699 6700define amdgpu_kernel void @global_agent_one_as_release_acquire_ret_cmpxchg( 6701; GFX6-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: 6702; GFX6: ; %bb.0: ; %entry 6703; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6704; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 6705; GFX6-NEXT: s_mov_b32 s3, 0x100f000 6706; GFX6-NEXT: s_mov_b32 s2, -1 6707; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6708; GFX6-NEXT: v_mov_b32_e32 v0, s4 6709; GFX6-NEXT: v_mov_b32_e32 v1, s5 6710; GFX6-NEXT: s_waitcnt vmcnt(0) 6711; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 6712; GFX6-NEXT: s_waitcnt vmcnt(0) 6713; GFX6-NEXT: buffer_wbinvl1 6714; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 6715; GFX6-NEXT: s_endpgm 6716; 6717; GFX7-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: 6718; GFX7: ; %bb.0: ; %entry 6719; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6720; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 6721; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6722; GFX7-NEXT: s_add_u32 s4, s0, 16 6723; GFX7-NEXT: s_addc_u32 s5, s1, 0 6724; GFX7-NEXT: v_mov_b32_e32 v0, s4 6725; GFX7-NEXT: v_mov_b32_e32 v2, s2 6726; GFX7-NEXT: v_mov_b32_e32 v1, s5 6727; GFX7-NEXT: v_mov_b32_e32 v3, s3 6728; GFX7-NEXT: s_waitcnt vmcnt(0) 6729; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6730; GFX7-NEXT: s_waitcnt vmcnt(0) 6731; GFX7-NEXT: buffer_wbinvl1_vol 6732; GFX7-NEXT: v_mov_b32_e32 v0, s0 6733; GFX7-NEXT: v_mov_b32_e32 v1, s1 6734; GFX7-NEXT: flat_store_dword v[0:1], v2 6735; GFX7-NEXT: s_endpgm 6736; 6737; GFX10-WGP-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: 6738; GFX10-WGP: ; %bb.0: ; %entry 6739; GFX10-WGP-NEXT: s_clause 0x1 6740; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6741; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6742; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 6743; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6744; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 6745; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 6746; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6747; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6748; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6749; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6750; GFX10-WGP-NEXT: buffer_gl0_inv 6751; GFX10-WGP-NEXT: buffer_gl1_inv 6752; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 6753; GFX10-WGP-NEXT: s_endpgm 6754; 6755; GFX10-CU-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: 6756; GFX10-CU: ; %bb.0: ; %entry 6757; GFX10-CU-NEXT: s_clause 0x1 6758; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6759; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6760; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 6761; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6762; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 6763; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 6764; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6765; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6766; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6767; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6768; GFX10-CU-NEXT: buffer_gl0_inv 6769; GFX10-CU-NEXT: buffer_gl1_inv 6770; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 6771; GFX10-CU-NEXT: s_endpgm 6772; 6773; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: 6774; SKIP-CACHE-INV: ; %bb.0: ; %entry 6775; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 6776; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 6777; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 6778; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 6779; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6780; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 6781; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 6782; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6783; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 6784; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6785; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 6786; SKIP-CACHE-INV-NEXT: s_endpgm 6787; 6788; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: 6789; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6790; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6791; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6792; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6793; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6794; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6795; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6796; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6797; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6798; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6799; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6800; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6801; 6802; GFX90A-TGSPLIT-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: 6803; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6804; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6805; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6806; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6807; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6808; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6809; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6810; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6811; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6812; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6813; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6814; GFX90A-TGSPLIT-NEXT: s_endpgm 6815 i32 addrspace(1)* %out, i32 %in, i32 %old) { 6816entry: 6817 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 6818 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire 6819 %val0 = extractvalue { i32, i1 } %val, 0 6820 store i32 %val0, i32 addrspace(1)* %out, align 4 6821 ret void 6822} 6823 6824define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_ret_cmpxchg( 6825; GFX6-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: 6826; GFX6: ; %bb.0: ; %entry 6827; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6828; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 6829; GFX6-NEXT: s_mov_b32 s3, 0x100f000 6830; GFX6-NEXT: s_mov_b32 s2, -1 6831; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6832; GFX6-NEXT: v_mov_b32_e32 v0, s4 6833; GFX6-NEXT: v_mov_b32_e32 v1, s5 6834; GFX6-NEXT: s_waitcnt vmcnt(0) 6835; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 6836; GFX6-NEXT: s_waitcnt vmcnt(0) 6837; GFX6-NEXT: buffer_wbinvl1 6838; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 6839; GFX6-NEXT: s_endpgm 6840; 6841; GFX7-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: 6842; GFX7: ; %bb.0: ; %entry 6843; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6844; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 6845; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6846; GFX7-NEXT: s_add_u32 s4, s0, 16 6847; GFX7-NEXT: s_addc_u32 s5, s1, 0 6848; GFX7-NEXT: v_mov_b32_e32 v0, s4 6849; GFX7-NEXT: v_mov_b32_e32 v2, s2 6850; GFX7-NEXT: v_mov_b32_e32 v1, s5 6851; GFX7-NEXT: v_mov_b32_e32 v3, s3 6852; GFX7-NEXT: s_waitcnt vmcnt(0) 6853; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6854; GFX7-NEXT: s_waitcnt vmcnt(0) 6855; GFX7-NEXT: buffer_wbinvl1_vol 6856; GFX7-NEXT: v_mov_b32_e32 v0, s0 6857; GFX7-NEXT: v_mov_b32_e32 v1, s1 6858; GFX7-NEXT: flat_store_dword v[0:1], v2 6859; GFX7-NEXT: s_endpgm 6860; 6861; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: 6862; GFX10-WGP: ; %bb.0: ; %entry 6863; GFX10-WGP-NEXT: s_clause 0x1 6864; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6865; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6866; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 6867; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6868; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 6869; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 6870; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6871; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6872; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6873; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6874; GFX10-WGP-NEXT: buffer_gl0_inv 6875; GFX10-WGP-NEXT: buffer_gl1_inv 6876; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 6877; GFX10-WGP-NEXT: s_endpgm 6878; 6879; GFX10-CU-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: 6880; GFX10-CU: ; %bb.0: ; %entry 6881; GFX10-CU-NEXT: s_clause 0x1 6882; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6883; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6884; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 6885; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6886; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 6887; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 6888; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6889; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6890; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6891; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6892; GFX10-CU-NEXT: buffer_gl0_inv 6893; GFX10-CU-NEXT: buffer_gl1_inv 6894; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 6895; GFX10-CU-NEXT: s_endpgm 6896; 6897; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: 6898; SKIP-CACHE-INV: ; %bb.0: ; %entry 6899; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 6900; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 6901; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 6902; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 6903; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6904; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 6905; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 6906; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6907; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 6908; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6909; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 6910; SKIP-CACHE-INV-NEXT: s_endpgm 6911; 6912; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: 6913; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6914; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6915; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6916; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6917; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6918; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6919; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6920; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6921; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6922; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6923; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6924; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6925; 6926; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: 6927; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6928; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6929; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6930; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6931; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6932; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6933; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6934; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6935; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6936; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6937; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6938; GFX90A-TGSPLIT-NEXT: s_endpgm 6939 i32 addrspace(1)* %out, i32 %in, i32 %old) { 6940entry: 6941 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 6942 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire 6943 %val0 = extractvalue { i32, i1 } %val, 0 6944 store i32 %val0, i32 addrspace(1)* %out, align 4 6945 ret void 6946} 6947 6948define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_ret_cmpxchg( 6949; GFX6-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: 6950; GFX6: ; %bb.0: ; %entry 6951; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6952; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 6953; GFX6-NEXT: s_mov_b32 s3, 0x100f000 6954; GFX6-NEXT: s_mov_b32 s2, -1 6955; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6956; GFX6-NEXT: v_mov_b32_e32 v0, s4 6957; GFX6-NEXT: v_mov_b32_e32 v1, s5 6958; GFX6-NEXT: s_waitcnt vmcnt(0) 6959; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 6960; GFX6-NEXT: s_waitcnt vmcnt(0) 6961; GFX6-NEXT: buffer_wbinvl1 6962; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 6963; GFX6-NEXT: s_endpgm 6964; 6965; GFX7-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: 6966; GFX7: ; %bb.0: ; %entry 6967; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6968; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 6969; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6970; GFX7-NEXT: s_add_u32 s4, s0, 16 6971; GFX7-NEXT: s_addc_u32 s5, s1, 0 6972; GFX7-NEXT: v_mov_b32_e32 v0, s4 6973; GFX7-NEXT: v_mov_b32_e32 v2, s2 6974; GFX7-NEXT: v_mov_b32_e32 v1, s5 6975; GFX7-NEXT: v_mov_b32_e32 v3, s3 6976; GFX7-NEXT: s_waitcnt vmcnt(0) 6977; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6978; GFX7-NEXT: s_waitcnt vmcnt(0) 6979; GFX7-NEXT: buffer_wbinvl1_vol 6980; GFX7-NEXT: v_mov_b32_e32 v0, s0 6981; GFX7-NEXT: v_mov_b32_e32 v1, s1 6982; GFX7-NEXT: flat_store_dword v[0:1], v2 6983; GFX7-NEXT: s_endpgm 6984; 6985; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: 6986; GFX10-WGP: ; %bb.0: ; %entry 6987; GFX10-WGP-NEXT: s_clause 0x1 6988; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6989; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6990; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 6991; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6992; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 6993; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 6994; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6995; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6996; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6997; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6998; GFX10-WGP-NEXT: buffer_gl0_inv 6999; GFX10-WGP-NEXT: buffer_gl1_inv 7000; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 7001; GFX10-WGP-NEXT: s_endpgm 7002; 7003; GFX10-CU-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: 7004; GFX10-CU: ; %bb.0: ; %entry 7005; GFX10-CU-NEXT: s_clause 0x1 7006; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 7007; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 7008; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 7009; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 7010; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 7011; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 7012; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 7013; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 7014; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 7015; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 7016; GFX10-CU-NEXT: buffer_gl0_inv 7017; GFX10-CU-NEXT: buffer_gl1_inv 7018; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 7019; GFX10-CU-NEXT: s_endpgm 7020; 7021; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: 7022; SKIP-CACHE-INV: ; %bb.0: ; %entry 7023; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 7024; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 7025; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 7026; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 7027; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 7028; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 7029; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 7030; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7031; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 7032; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7033; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 7034; SKIP-CACHE-INV-NEXT: s_endpgm 7035; 7036; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: 7037; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 7038; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7039; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 7040; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 7041; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7042; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 7043; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7044; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 7045; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7046; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 7047; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 7048; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 7049; 7050; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: 7051; GFX90A-TGSPLIT: ; %bb.0: ; %entry 7052; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7053; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 7054; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 7055; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7056; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 7057; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7058; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 7059; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7060; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 7061; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 7062; GFX90A-TGSPLIT-NEXT: s_endpgm 7063 i32 addrspace(1)* %out, i32 %in, i32 %old) { 7064entry: 7065 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 7066 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire 7067 %val0 = extractvalue { i32, i1 } %val, 0 7068 store i32 %val0, i32 addrspace(1)* %out, align 4 7069 ret void 7070} 7071 7072define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( 7073; GFX6-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: 7074; GFX6: ; %bb.0: ; %entry 7075; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7076; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 7077; GFX6-NEXT: s_mov_b32 s3, 0x100f000 7078; GFX6-NEXT: s_mov_b32 s2, -1 7079; GFX6-NEXT: s_waitcnt lgkmcnt(0) 7080; GFX6-NEXT: v_mov_b32_e32 v0, s4 7081; GFX6-NEXT: v_mov_b32_e32 v1, s5 7082; GFX6-NEXT: s_waitcnt vmcnt(0) 7083; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 7084; GFX6-NEXT: s_waitcnt vmcnt(0) 7085; GFX6-NEXT: buffer_wbinvl1 7086; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 7087; GFX6-NEXT: s_endpgm 7088; 7089; GFX7-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: 7090; GFX7: ; %bb.0: ; %entry 7091; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7092; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 7093; GFX7-NEXT: s_waitcnt lgkmcnt(0) 7094; GFX7-NEXT: s_add_u32 s4, s0, 16 7095; GFX7-NEXT: s_addc_u32 s5, s1, 0 7096; GFX7-NEXT: v_mov_b32_e32 v0, s4 7097; GFX7-NEXT: v_mov_b32_e32 v2, s2 7098; GFX7-NEXT: v_mov_b32_e32 v1, s5 7099; GFX7-NEXT: v_mov_b32_e32 v3, s3 7100; GFX7-NEXT: s_waitcnt vmcnt(0) 7101; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7102; GFX7-NEXT: s_waitcnt vmcnt(0) 7103; GFX7-NEXT: buffer_wbinvl1_vol 7104; GFX7-NEXT: v_mov_b32_e32 v0, s0 7105; GFX7-NEXT: v_mov_b32_e32 v1, s1 7106; GFX7-NEXT: flat_store_dword v[0:1], v2 7107; GFX7-NEXT: s_endpgm 7108; 7109; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: 7110; GFX10-WGP: ; %bb.0: ; %entry 7111; GFX10-WGP-NEXT: s_clause 0x1 7112; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 7113; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 7114; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 7115; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 7116; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 7117; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 7118; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 7119; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 7120; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 7121; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 7122; GFX10-WGP-NEXT: buffer_gl0_inv 7123; GFX10-WGP-NEXT: buffer_gl1_inv 7124; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 7125; GFX10-WGP-NEXT: s_endpgm 7126; 7127; GFX10-CU-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: 7128; GFX10-CU: ; %bb.0: ; %entry 7129; GFX10-CU-NEXT: s_clause 0x1 7130; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 7131; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 7132; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 7133; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 7134; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 7135; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 7136; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 7137; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 7138; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 7139; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 7140; GFX10-CU-NEXT: buffer_gl0_inv 7141; GFX10-CU-NEXT: buffer_gl1_inv 7142; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 7143; GFX10-CU-NEXT: s_endpgm 7144; 7145; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: 7146; SKIP-CACHE-INV: ; %bb.0: ; %entry 7147; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 7148; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 7149; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 7150; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 7151; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 7152; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 7153; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 7154; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7155; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 7156; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7157; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 7158; SKIP-CACHE-INV-NEXT: s_endpgm 7159; 7160; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: 7161; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 7162; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7163; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 7164; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 7165; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7166; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 7167; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7168; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 7169; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7170; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 7171; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 7172; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 7173; 7174; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: 7175; GFX90A-TGSPLIT: ; %bb.0: ; %entry 7176; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7177; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 7178; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 7179; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7180; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 7181; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7182; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 7183; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7184; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 7185; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 7186; GFX90A-TGSPLIT-NEXT: s_endpgm 7187 i32 addrspace(1)* %out, i32 %in, i32 %old) { 7188entry: 7189 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 7190 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst 7191 %val0 = extractvalue { i32, i1 } %val, 0 7192 store i32 %val0, i32 addrspace(1)* %out, align 4 7193 ret void 7194} 7195 7196