1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s 5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s 6; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 -amdgcn-skip-cache-invalidations -verify-machineinstrs < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s 7; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s 8; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+tgsplit -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s 9 10define amdgpu_kernel void @global_system_unordered_load( 11; GFX6-LABEL: global_system_unordered_load: 12; GFX6: ; %bb.0: ; %entry 13; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 14; GFX6-NEXT: s_mov_b32 s3, 0x100f000 15; GFX6-NEXT: s_mov_b32 s2, -1 16; GFX6-NEXT: s_waitcnt lgkmcnt(0) 17; GFX6-NEXT: s_mov_b32 s0, s4 18; GFX6-NEXT: s_mov_b32 s1, s5 19; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 20; GFX6-NEXT: s_mov_b32 s4, s6 21; GFX6-NEXT: s_mov_b32 s5, s7 22; GFX6-NEXT: s_mov_b32 s6, s2 23; GFX6-NEXT: s_mov_b32 s7, s3 24; GFX6-NEXT: s_waitcnt vmcnt(0) 25; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 26; GFX6-NEXT: s_endpgm 27; 28; GFX7-LABEL: global_system_unordered_load: 29; GFX7: ; %bb.0: ; %entry 30; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 31; GFX7-NEXT: s_waitcnt lgkmcnt(0) 32; GFX7-NEXT: v_mov_b32_e32 v0, s0 33; GFX7-NEXT: v_mov_b32_e32 v1, s1 34; GFX7-NEXT: flat_load_dword v0, v[0:1] 35; GFX7-NEXT: v_mov_b32_e32 v2, s2 36; GFX7-NEXT: v_mov_b32_e32 v3, s3 37; GFX7-NEXT: s_waitcnt vmcnt(0) 38; GFX7-NEXT: flat_store_dword v[2:3], v0 39; GFX7-NEXT: s_endpgm 40; 41; GFX10-WGP-LABEL: global_system_unordered_load: 42; GFX10-WGP: ; %bb.0: ; %entry 43; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 44; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 45; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 46; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] 47; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 48; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 49; GFX10-WGP-NEXT: s_endpgm 50; 51; GFX10-CU-LABEL: global_system_unordered_load: 52; GFX10-CU: ; %bb.0: ; %entry 53; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 54; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 55; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 56; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] 57; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 58; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 59; GFX10-CU-NEXT: s_endpgm 60; 61; SKIP-CACHE-INV-LABEL: global_system_unordered_load: 62; SKIP-CACHE-INV: ; %bb.0: ; %entry 63; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 64; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 65; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 66; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 67; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 68; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 69; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 70; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 71; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 72; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 73; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 74; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 75; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 76; SKIP-CACHE-INV-NEXT: s_endpgm 77; 78; GFX90A-NOTTGSPLIT-LABEL: global_system_unordered_load: 79; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 80; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 81; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 82; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 83; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] 84; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 85; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 86; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 87; 88; GFX90A-TGSPLIT-LABEL: global_system_unordered_load: 89; GFX90A-TGSPLIT: ; %bb.0: ; %entry 90; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 91; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 92; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 93; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] 94; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 95; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 96; GFX90A-TGSPLIT-NEXT: s_endpgm 97 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 98entry: 99 %val = load atomic i32, i32 addrspace(1)* %in unordered, align 4 100 store i32 %val, i32 addrspace(1)* %out 101 ret void 102} 103 104define amdgpu_kernel void @global_system_monotonic_load( 105; GFX6-LABEL: global_system_monotonic_load: 106; GFX6: ; %bb.0: ; %entry 107; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 108; GFX6-NEXT: s_mov_b32 s3, 0x100f000 109; GFX6-NEXT: s_mov_b32 s2, -1 110; GFX6-NEXT: s_waitcnt lgkmcnt(0) 111; GFX6-NEXT: s_mov_b32 s0, s4 112; GFX6-NEXT: s_mov_b32 s1, s5 113; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 114; GFX6-NEXT: s_mov_b32 s4, s6 115; GFX6-NEXT: s_mov_b32 s5, s7 116; GFX6-NEXT: s_mov_b32 s6, s2 117; GFX6-NEXT: s_mov_b32 s7, s3 118; GFX6-NEXT: s_waitcnt vmcnt(0) 119; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 120; GFX6-NEXT: s_endpgm 121; 122; GFX7-LABEL: global_system_monotonic_load: 123; GFX7: ; %bb.0: ; %entry 124; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 125; GFX7-NEXT: s_waitcnt lgkmcnt(0) 126; GFX7-NEXT: v_mov_b32_e32 v0, s0 127; GFX7-NEXT: v_mov_b32_e32 v1, s1 128; GFX7-NEXT: flat_load_dword v0, v[0:1] glc 129; GFX7-NEXT: v_mov_b32_e32 v2, s2 130; GFX7-NEXT: v_mov_b32_e32 v3, s3 131; GFX7-NEXT: s_waitcnt vmcnt(0) 132; GFX7-NEXT: flat_store_dword v[2:3], v0 133; GFX7-NEXT: s_endpgm 134; 135; GFX10-WGP-LABEL: global_system_monotonic_load: 136; GFX10-WGP: ; %bb.0: ; %entry 137; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 138; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 139; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 140; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 141; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 142; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 143; GFX10-WGP-NEXT: s_endpgm 144; 145; GFX10-CU-LABEL: global_system_monotonic_load: 146; GFX10-CU: ; %bb.0: ; %entry 147; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 148; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 149; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 150; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 151; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 152; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 153; GFX10-CU-NEXT: s_endpgm 154; 155; SKIP-CACHE-INV-LABEL: global_system_monotonic_load: 156; SKIP-CACHE-INV: ; %bb.0: ; %entry 157; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 158; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 159; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 160; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 161; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 162; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 163; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 164; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 165; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 166; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 167; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 168; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 169; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 170; SKIP-CACHE-INV-NEXT: s_endpgm 171; 172; GFX90A-NOTTGSPLIT-LABEL: global_system_monotonic_load: 173; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 174; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 175; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 176; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 177; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 178; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 179; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 180; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 181; 182; GFX90A-TGSPLIT-LABEL: global_system_monotonic_load: 183; GFX90A-TGSPLIT: ; %bb.0: ; %entry 184; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 185; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 186; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 187; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 188; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 189; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 190; GFX90A-TGSPLIT-NEXT: s_endpgm 191 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 192entry: 193 %val = load atomic i32, i32 addrspace(1)* %in monotonic, align 4 194 store i32 %val, i32 addrspace(1)* %out 195 ret void 196} 197 198define amdgpu_kernel void @global_system_acquire_load( 199; GFX6-LABEL: global_system_acquire_load: 200; GFX6: ; %bb.0: ; %entry 201; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 202; GFX6-NEXT: s_mov_b32 s3, 0x100f000 203; GFX6-NEXT: s_mov_b32 s2, -1 204; GFX6-NEXT: s_waitcnt lgkmcnt(0) 205; GFX6-NEXT: s_mov_b32 s0, s4 206; GFX6-NEXT: s_mov_b32 s1, s5 207; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 208; GFX6-NEXT: s_waitcnt vmcnt(0) 209; GFX6-NEXT: buffer_wbinvl1 210; GFX6-NEXT: s_mov_b32 s4, s6 211; GFX6-NEXT: s_mov_b32 s5, s7 212; GFX6-NEXT: s_mov_b32 s6, s2 213; GFX6-NEXT: s_mov_b32 s7, s3 214; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 215; GFX6-NEXT: s_endpgm 216; 217; GFX7-LABEL: global_system_acquire_load: 218; GFX7: ; %bb.0: ; %entry 219; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 220; GFX7-NEXT: s_waitcnt lgkmcnt(0) 221; GFX7-NEXT: v_mov_b32_e32 v0, s0 222; GFX7-NEXT: v_mov_b32_e32 v1, s1 223; GFX7-NEXT: flat_load_dword v0, v[0:1] glc 224; GFX7-NEXT: s_waitcnt vmcnt(0) 225; GFX7-NEXT: buffer_wbinvl1_vol 226; GFX7-NEXT: v_mov_b32_e32 v2, s2 227; GFX7-NEXT: v_mov_b32_e32 v3, s3 228; GFX7-NEXT: flat_store_dword v[2:3], v0 229; GFX7-NEXT: s_endpgm 230; 231; GFX10-WGP-LABEL: global_system_acquire_load: 232; GFX10-WGP: ; %bb.0: ; %entry 233; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 234; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 235; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 236; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 237; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 238; GFX10-WGP-NEXT: buffer_gl0_inv 239; GFX10-WGP-NEXT: buffer_gl1_inv 240; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 241; GFX10-WGP-NEXT: s_endpgm 242; 243; GFX10-CU-LABEL: global_system_acquire_load: 244; GFX10-CU: ; %bb.0: ; %entry 245; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 246; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 247; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 248; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 249; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 250; GFX10-CU-NEXT: buffer_gl0_inv 251; GFX10-CU-NEXT: buffer_gl1_inv 252; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 253; GFX10-CU-NEXT: s_endpgm 254; 255; SKIP-CACHE-INV-LABEL: global_system_acquire_load: 256; SKIP-CACHE-INV: ; %bb.0: ; %entry 257; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 258; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 259; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 260; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 261; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 262; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 263; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 264; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 265; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 266; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 267; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 268; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 269; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 270; SKIP-CACHE-INV-NEXT: s_endpgm 271; 272; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_load: 273; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 274; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 275; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 276; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 277; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 278; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 279; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 280; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 281; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 282; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 283; 284; GFX90A-TGSPLIT-LABEL: global_system_acquire_load: 285; GFX90A-TGSPLIT: ; %bb.0: ; %entry 286; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 287; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 288; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 289; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 290; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 291; GFX90A-TGSPLIT-NEXT: buffer_invl2 292; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 293; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 294; GFX90A-TGSPLIT-NEXT: s_endpgm 295 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 296entry: 297 %val = load atomic i32, i32 addrspace(1)* %in acquire, align 4 298 store i32 %val, i32 addrspace(1)* %out 299 ret void 300} 301 302define amdgpu_kernel void @global_system_seq_cst_load( 303; GFX6-LABEL: global_system_seq_cst_load: 304; GFX6: ; %bb.0: ; %entry 305; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 306; GFX6-NEXT: s_mov_b32 s3, 0x100f000 307; GFX6-NEXT: s_mov_b32 s2, -1 308; GFX6-NEXT: s_waitcnt lgkmcnt(0) 309; GFX6-NEXT: s_mov_b32 s0, s4 310; GFX6-NEXT: s_mov_b32 s1, s5 311; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 312; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 313; GFX6-NEXT: s_waitcnt vmcnt(0) 314; GFX6-NEXT: buffer_wbinvl1 315; GFX6-NEXT: s_mov_b32 s4, s6 316; GFX6-NEXT: s_mov_b32 s5, s7 317; GFX6-NEXT: s_mov_b32 s6, s2 318; GFX6-NEXT: s_mov_b32 s7, s3 319; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 320; GFX6-NEXT: s_endpgm 321; 322; GFX7-LABEL: global_system_seq_cst_load: 323; GFX7: ; %bb.0: ; %entry 324; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 325; GFX7-NEXT: s_waitcnt lgkmcnt(0) 326; GFX7-NEXT: v_mov_b32_e32 v0, s0 327; GFX7-NEXT: v_mov_b32_e32 v1, s1 328; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 329; GFX7-NEXT: flat_load_dword v0, v[0:1] glc 330; GFX7-NEXT: s_waitcnt vmcnt(0) 331; GFX7-NEXT: buffer_wbinvl1_vol 332; GFX7-NEXT: v_mov_b32_e32 v2, s2 333; GFX7-NEXT: v_mov_b32_e32 v3, s3 334; GFX7-NEXT: flat_store_dword v[2:3], v0 335; GFX7-NEXT: s_endpgm 336; 337; GFX10-WGP-LABEL: global_system_seq_cst_load: 338; GFX10-WGP: ; %bb.0: ; %entry 339; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 340; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 341; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 342; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 343; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 344; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 345; GFX10-WGP-NEXT: buffer_gl0_inv 346; GFX10-WGP-NEXT: buffer_gl1_inv 347; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 348; GFX10-WGP-NEXT: s_endpgm 349; 350; GFX10-CU-LABEL: global_system_seq_cst_load: 351; GFX10-CU: ; %bb.0: ; %entry 352; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 353; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 354; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 355; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 356; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 357; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 358; GFX10-CU-NEXT: buffer_gl0_inv 359; GFX10-CU-NEXT: buffer_gl1_inv 360; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 361; GFX10-CU-NEXT: s_endpgm 362; 363; SKIP-CACHE-INV-LABEL: global_system_seq_cst_load: 364; SKIP-CACHE-INV: ; %bb.0: ; %entry 365; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 366; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 367; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 368; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 369; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 370; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 371; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 372; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 373; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 374; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 375; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 376; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 377; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 378; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 379; SKIP-CACHE-INV-NEXT: s_endpgm 380; 381; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_load: 382; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 383; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 384; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 385; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 386; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 387; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 388; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 389; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 390; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 391; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 392; 393; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_load: 394; GFX90A-TGSPLIT: ; %bb.0: ; %entry 395; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 396; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 397; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 398; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 399; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 400; GFX90A-TGSPLIT-NEXT: buffer_invl2 401; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 402; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 403; GFX90A-TGSPLIT-NEXT: s_endpgm 404 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 405entry: 406 %val = load atomic i32, i32 addrspace(1)* %in seq_cst, align 4 407 store i32 %val, i32 addrspace(1)* %out 408 ret void 409} 410 411define amdgpu_kernel void @global_system_unordered_store( 412; GFX6-LABEL: global_system_unordered_store: 413; GFX6: ; %bb.0: ; %entry 414; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 415; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 416; GFX6-NEXT: s_mov_b32 s3, 0x100f000 417; GFX6-NEXT: s_mov_b32 s2, -1 418; GFX6-NEXT: s_waitcnt lgkmcnt(0) 419; GFX6-NEXT: v_mov_b32_e32 v0, s6 420; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 421; GFX6-NEXT: s_endpgm 422; 423; GFX7-LABEL: global_system_unordered_store: 424; GFX7: ; %bb.0: ; %entry 425; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 426; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 427; GFX7-NEXT: s_waitcnt lgkmcnt(0) 428; GFX7-NEXT: v_mov_b32_e32 v2, s2 429; GFX7-NEXT: v_mov_b32_e32 v0, s0 430; GFX7-NEXT: v_mov_b32_e32 v1, s1 431; GFX7-NEXT: flat_store_dword v[0:1], v2 432; GFX7-NEXT: s_endpgm 433; 434; GFX10-WGP-LABEL: global_system_unordered_store: 435; GFX10-WGP: ; %bb.0: ; %entry 436; GFX10-WGP-NEXT: s_clause 0x1 437; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 438; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 439; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 440; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 441; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 442; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 443; GFX10-WGP-NEXT: s_endpgm 444; 445; GFX10-CU-LABEL: global_system_unordered_store: 446; GFX10-CU: ; %bb.0: ; %entry 447; GFX10-CU-NEXT: s_clause 0x1 448; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 449; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 450; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 451; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 452; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 453; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 454; GFX10-CU-NEXT: s_endpgm 455; 456; SKIP-CACHE-INV-LABEL: global_system_unordered_store: 457; SKIP-CACHE-INV: ; %bb.0: ; %entry 458; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 459; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 460; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 461; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 462; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 463; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 464; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 465; SKIP-CACHE-INV-NEXT: s_endpgm 466; 467; GFX90A-NOTTGSPLIT-LABEL: global_system_unordered_store: 468; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 469; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 470; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 471; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 472; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 473; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 474; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 475; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 476; 477; GFX90A-TGSPLIT-LABEL: global_system_unordered_store: 478; GFX90A-TGSPLIT: ; %bb.0: ; %entry 479; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 480; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 481; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 482; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 483; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 484; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 485; GFX90A-TGSPLIT-NEXT: s_endpgm 486 i32 %in, i32 addrspace(1)* %out) { 487entry: 488 store atomic i32 %in, i32 addrspace(1)* %out unordered, align 4 489 ret void 490} 491 492define amdgpu_kernel void @global_system_monotonic_store( 493; GFX6-LABEL: global_system_monotonic_store: 494; GFX6: ; %bb.0: ; %entry 495; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 496; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 497; GFX6-NEXT: s_mov_b32 s3, 0x100f000 498; GFX6-NEXT: s_mov_b32 s2, -1 499; GFX6-NEXT: s_waitcnt lgkmcnt(0) 500; GFX6-NEXT: v_mov_b32_e32 v0, s6 501; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 502; GFX6-NEXT: s_endpgm 503; 504; GFX7-LABEL: global_system_monotonic_store: 505; GFX7: ; %bb.0: ; %entry 506; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 507; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 508; GFX7-NEXT: s_waitcnt lgkmcnt(0) 509; GFX7-NEXT: v_mov_b32_e32 v2, s2 510; GFX7-NEXT: v_mov_b32_e32 v0, s0 511; GFX7-NEXT: v_mov_b32_e32 v1, s1 512; GFX7-NEXT: flat_store_dword v[0:1], v2 513; GFX7-NEXT: s_endpgm 514; 515; GFX10-WGP-LABEL: global_system_monotonic_store: 516; GFX10-WGP: ; %bb.0: ; %entry 517; GFX10-WGP-NEXT: s_clause 0x1 518; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 519; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 520; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 521; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 522; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 523; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 524; GFX10-WGP-NEXT: s_endpgm 525; 526; GFX10-CU-LABEL: global_system_monotonic_store: 527; GFX10-CU: ; %bb.0: ; %entry 528; GFX10-CU-NEXT: s_clause 0x1 529; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 530; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 531; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 532; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 533; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 534; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 535; GFX10-CU-NEXT: s_endpgm 536; 537; SKIP-CACHE-INV-LABEL: global_system_monotonic_store: 538; SKIP-CACHE-INV: ; %bb.0: ; %entry 539; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 540; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 541; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 542; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 543; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 544; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 545; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 546; SKIP-CACHE-INV-NEXT: s_endpgm 547; 548; GFX90A-NOTTGSPLIT-LABEL: global_system_monotonic_store: 549; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 550; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 551; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 552; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 553; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 554; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 555; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 556; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 557; 558; GFX90A-TGSPLIT-LABEL: global_system_monotonic_store: 559; GFX90A-TGSPLIT: ; %bb.0: ; %entry 560; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 561; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 562; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 563; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 564; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 565; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 566; GFX90A-TGSPLIT-NEXT: s_endpgm 567 i32 %in, i32 addrspace(1)* %out) { 568entry: 569 store atomic i32 %in, i32 addrspace(1)* %out monotonic, align 4 570 ret void 571} 572 573define amdgpu_kernel void @global_system_release_store( 574; GFX6-LABEL: global_system_release_store: 575; GFX6: ; %bb.0: ; %entry 576; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 577; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 578; GFX6-NEXT: s_mov_b32 s3, 0x100f000 579; GFX6-NEXT: s_mov_b32 s2, -1 580; GFX6-NEXT: s_waitcnt lgkmcnt(0) 581; GFX6-NEXT: v_mov_b32_e32 v0, s6 582; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 583; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 584; GFX6-NEXT: s_endpgm 585; 586; GFX7-LABEL: global_system_release_store: 587; GFX7: ; %bb.0: ; %entry 588; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 589; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 590; GFX7-NEXT: s_waitcnt lgkmcnt(0) 591; GFX7-NEXT: v_mov_b32_e32 v2, s2 592; GFX7-NEXT: v_mov_b32_e32 v0, s0 593; GFX7-NEXT: v_mov_b32_e32 v1, s1 594; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 595; GFX7-NEXT: flat_store_dword v[0:1], v2 596; GFX7-NEXT: s_endpgm 597; 598; GFX10-WGP-LABEL: global_system_release_store: 599; GFX10-WGP: ; %bb.0: ; %entry 600; GFX10-WGP-NEXT: s_clause 0x1 601; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 602; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 603; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 604; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 605; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 606; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 607; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 608; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 609; GFX10-WGP-NEXT: s_endpgm 610; 611; GFX10-CU-LABEL: global_system_release_store: 612; GFX10-CU: ; %bb.0: ; %entry 613; GFX10-CU-NEXT: s_clause 0x1 614; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 615; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 616; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 617; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 618; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 619; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 620; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 621; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 622; GFX10-CU-NEXT: s_endpgm 623; 624; SKIP-CACHE-INV-LABEL: global_system_release_store: 625; SKIP-CACHE-INV: ; %bb.0: ; %entry 626; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 627; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 628; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 629; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 630; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 631; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 632; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 633; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 634; SKIP-CACHE-INV-NEXT: s_endpgm 635; 636; GFX90A-NOTTGSPLIT-LABEL: global_system_release_store: 637; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 638; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 639; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 640; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 641; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 642; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 643; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 644; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 645; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 646; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 647; 648; GFX90A-TGSPLIT-LABEL: global_system_release_store: 649; GFX90A-TGSPLIT: ; %bb.0: ; %entry 650; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 651; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 652; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 653; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 654; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 655; GFX90A-TGSPLIT-NEXT: buffer_wbl2 656; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 657; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 658; GFX90A-TGSPLIT-NEXT: s_endpgm 659 i32 %in, i32 addrspace(1)* %out) { 660entry: 661 store atomic i32 %in, i32 addrspace(1)* %out release, align 4 662 ret void 663} 664 665define amdgpu_kernel void @global_system_seq_cst_store( 666; GFX6-LABEL: global_system_seq_cst_store: 667; GFX6: ; %bb.0: ; %entry 668; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 669; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 670; GFX6-NEXT: s_mov_b32 s3, 0x100f000 671; GFX6-NEXT: s_mov_b32 s2, -1 672; GFX6-NEXT: s_waitcnt lgkmcnt(0) 673; GFX6-NEXT: v_mov_b32_e32 v0, s6 674; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 675; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 676; GFX6-NEXT: s_endpgm 677; 678; GFX7-LABEL: global_system_seq_cst_store: 679; GFX7: ; %bb.0: ; %entry 680; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 681; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 682; GFX7-NEXT: s_waitcnt lgkmcnt(0) 683; GFX7-NEXT: v_mov_b32_e32 v2, s2 684; GFX7-NEXT: v_mov_b32_e32 v0, s0 685; GFX7-NEXT: v_mov_b32_e32 v1, s1 686; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 687; GFX7-NEXT: flat_store_dword v[0:1], v2 688; GFX7-NEXT: s_endpgm 689; 690; GFX10-WGP-LABEL: global_system_seq_cst_store: 691; GFX10-WGP: ; %bb.0: ; %entry 692; GFX10-WGP-NEXT: s_clause 0x1 693; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 694; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 695; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 696; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 697; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 698; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 699; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 700; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 701; GFX10-WGP-NEXT: s_endpgm 702; 703; GFX10-CU-LABEL: global_system_seq_cst_store: 704; GFX10-CU: ; %bb.0: ; %entry 705; GFX10-CU-NEXT: s_clause 0x1 706; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 707; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 708; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 709; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 710; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 711; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 712; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 713; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 714; GFX10-CU-NEXT: s_endpgm 715; 716; SKIP-CACHE-INV-LABEL: global_system_seq_cst_store: 717; SKIP-CACHE-INV: ; %bb.0: ; %entry 718; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 719; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 720; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 721; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 722; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 723; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 724; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 725; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 726; SKIP-CACHE-INV-NEXT: s_endpgm 727; 728; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_store: 729; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 730; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 731; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 732; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 733; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 734; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 735; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 736; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 737; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 738; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 739; 740; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_store: 741; GFX90A-TGSPLIT: ; %bb.0: ; %entry 742; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 743; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 744; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 745; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 746; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 747; GFX90A-TGSPLIT-NEXT: buffer_wbl2 748; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 749; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 750; GFX90A-TGSPLIT-NEXT: s_endpgm 751 i32 %in, i32 addrspace(1)* %out) { 752entry: 753 store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4 754 ret void 755} 756 757define amdgpu_kernel void @global_system_monotonic_atomicrmw( 758; GFX6-LABEL: global_system_monotonic_atomicrmw: 759; GFX6: ; %bb.0: ; %entry 760; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 761; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 762; GFX6-NEXT: s_mov_b32 s3, 0x100f000 763; GFX6-NEXT: s_mov_b32 s2, -1 764; GFX6-NEXT: s_waitcnt lgkmcnt(0) 765; GFX6-NEXT: v_mov_b32_e32 v0, s4 766; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 767; GFX6-NEXT: s_endpgm 768; 769; GFX7-LABEL: global_system_monotonic_atomicrmw: 770; GFX7: ; %bb.0: ; %entry 771; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 772; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 773; GFX7-NEXT: s_waitcnt lgkmcnt(0) 774; GFX7-NEXT: v_mov_b32_e32 v0, s0 775; GFX7-NEXT: v_mov_b32_e32 v1, s1 776; GFX7-NEXT: v_mov_b32_e32 v2, s2 777; GFX7-NEXT: flat_atomic_swap v[0:1], v2 778; GFX7-NEXT: s_endpgm 779; 780; GFX10-WGP-LABEL: global_system_monotonic_atomicrmw: 781; GFX10-WGP: ; %bb.0: ; %entry 782; GFX10-WGP-NEXT: s_clause 0x1 783; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 784; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 785; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 786; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 787; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 788; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 789; GFX10-WGP-NEXT: s_endpgm 790; 791; GFX10-CU-LABEL: global_system_monotonic_atomicrmw: 792; GFX10-CU: ; %bb.0: ; %entry 793; GFX10-CU-NEXT: s_clause 0x1 794; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 795; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 796; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 797; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 798; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 799; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 800; GFX10-CU-NEXT: s_endpgm 801; 802; SKIP-CACHE-INV-LABEL: global_system_monotonic_atomicrmw: 803; SKIP-CACHE-INV: ; %bb.0: ; %entry 804; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 805; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 806; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 807; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 808; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 809; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 810; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 811; SKIP-CACHE-INV-NEXT: s_endpgm 812; 813; GFX90A-NOTTGSPLIT-LABEL: global_system_monotonic_atomicrmw: 814; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 815; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 816; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 817; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 818; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 819; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 820; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 821; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 822; 823; GFX90A-TGSPLIT-LABEL: global_system_monotonic_atomicrmw: 824; GFX90A-TGSPLIT: ; %bb.0: ; %entry 825; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 826; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 827; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 828; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 829; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 830; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 831; GFX90A-TGSPLIT-NEXT: s_endpgm 832 i32 addrspace(1)* %out, i32 %in) { 833entry: 834 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in monotonic 835 ret void 836} 837 838define amdgpu_kernel void @global_system_acquire_atomicrmw( 839; GFX6-LABEL: global_system_acquire_atomicrmw: 840; GFX6: ; %bb.0: ; %entry 841; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 842; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 843; GFX6-NEXT: s_mov_b32 s3, 0x100f000 844; GFX6-NEXT: s_mov_b32 s2, -1 845; GFX6-NEXT: s_waitcnt lgkmcnt(0) 846; GFX6-NEXT: v_mov_b32_e32 v0, s4 847; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 848; GFX6-NEXT: s_waitcnt vmcnt(0) 849; GFX6-NEXT: buffer_wbinvl1 850; GFX6-NEXT: s_endpgm 851; 852; GFX7-LABEL: global_system_acquire_atomicrmw: 853; GFX7: ; %bb.0: ; %entry 854; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 855; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 856; GFX7-NEXT: s_waitcnt lgkmcnt(0) 857; GFX7-NEXT: v_mov_b32_e32 v0, s0 858; GFX7-NEXT: v_mov_b32_e32 v1, s1 859; GFX7-NEXT: v_mov_b32_e32 v2, s2 860; GFX7-NEXT: flat_atomic_swap v[0:1], v2 861; GFX7-NEXT: s_waitcnt vmcnt(0) 862; GFX7-NEXT: buffer_wbinvl1_vol 863; GFX7-NEXT: s_endpgm 864; 865; GFX10-WGP-LABEL: global_system_acquire_atomicrmw: 866; GFX10-WGP: ; %bb.0: ; %entry 867; GFX10-WGP-NEXT: s_clause 0x1 868; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 869; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 870; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 871; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 872; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 873; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 874; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 875; GFX10-WGP-NEXT: buffer_gl0_inv 876; GFX10-WGP-NEXT: buffer_gl1_inv 877; GFX10-WGP-NEXT: s_endpgm 878; 879; GFX10-CU-LABEL: global_system_acquire_atomicrmw: 880; GFX10-CU: ; %bb.0: ; %entry 881; GFX10-CU-NEXT: s_clause 0x1 882; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 883; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 884; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 885; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 886; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 887; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 888; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 889; GFX10-CU-NEXT: buffer_gl0_inv 890; GFX10-CU-NEXT: buffer_gl1_inv 891; GFX10-CU-NEXT: s_endpgm 892; 893; SKIP-CACHE-INV-LABEL: global_system_acquire_atomicrmw: 894; SKIP-CACHE-INV: ; %bb.0: ; %entry 895; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 896; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 897; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 898; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 899; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 900; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 901; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 902; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 903; SKIP-CACHE-INV-NEXT: s_endpgm 904; 905; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_atomicrmw: 906; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 907; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 908; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 909; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 910; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 911; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 912; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 913; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 914; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 915; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 916; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 917; 918; GFX90A-TGSPLIT-LABEL: global_system_acquire_atomicrmw: 919; GFX90A-TGSPLIT: ; %bb.0: ; %entry 920; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 921; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 922; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 923; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 924; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 925; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 926; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 927; GFX90A-TGSPLIT-NEXT: buffer_invl2 928; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 929; GFX90A-TGSPLIT-NEXT: s_endpgm 930 i32 addrspace(1)* %out, i32 %in) { 931entry: 932 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in acquire 933 ret void 934} 935 936define amdgpu_kernel void @global_system_release_atomicrmw( 937; GFX6-LABEL: global_system_release_atomicrmw: 938; GFX6: ; %bb.0: ; %entry 939; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 940; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 941; GFX6-NEXT: s_mov_b32 s3, 0x100f000 942; GFX6-NEXT: s_mov_b32 s2, -1 943; GFX6-NEXT: s_waitcnt lgkmcnt(0) 944; GFX6-NEXT: v_mov_b32_e32 v0, s4 945; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 946; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 947; GFX6-NEXT: s_endpgm 948; 949; GFX7-LABEL: global_system_release_atomicrmw: 950; GFX7: ; %bb.0: ; %entry 951; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 952; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 953; GFX7-NEXT: s_waitcnt lgkmcnt(0) 954; GFX7-NEXT: v_mov_b32_e32 v0, s0 955; GFX7-NEXT: v_mov_b32_e32 v1, s1 956; GFX7-NEXT: v_mov_b32_e32 v2, s2 957; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 958; GFX7-NEXT: flat_atomic_swap v[0:1], v2 959; GFX7-NEXT: s_endpgm 960; 961; GFX10-WGP-LABEL: global_system_release_atomicrmw: 962; GFX10-WGP: ; %bb.0: ; %entry 963; GFX10-WGP-NEXT: s_clause 0x1 964; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 965; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 966; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 967; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 968; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 969; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 970; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 971; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 972; GFX10-WGP-NEXT: s_endpgm 973; 974; GFX10-CU-LABEL: global_system_release_atomicrmw: 975; GFX10-CU: ; %bb.0: ; %entry 976; GFX10-CU-NEXT: s_clause 0x1 977; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 978; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 979; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 980; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 981; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 982; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 983; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 984; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 985; GFX10-CU-NEXT: s_endpgm 986; 987; SKIP-CACHE-INV-LABEL: global_system_release_atomicrmw: 988; SKIP-CACHE-INV: ; %bb.0: ; %entry 989; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 990; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 991; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 992; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 993; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 994; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 995; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 996; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 997; SKIP-CACHE-INV-NEXT: s_endpgm 998; 999; GFX90A-NOTTGSPLIT-LABEL: global_system_release_atomicrmw: 1000; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1001; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1002; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1003; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1004; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1005; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1006; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 1007; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1008; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1009; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1010; 1011; GFX90A-TGSPLIT-LABEL: global_system_release_atomicrmw: 1012; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1013; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1014; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1015; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1016; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1017; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1018; GFX90A-TGSPLIT-NEXT: buffer_wbl2 1019; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1020; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1021; GFX90A-TGSPLIT-NEXT: s_endpgm 1022 i32 addrspace(1)* %out, i32 %in) { 1023entry: 1024 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in release 1025 ret void 1026} 1027 1028define amdgpu_kernel void @global_system_acq_rel_atomicrmw( 1029; GFX6-LABEL: global_system_acq_rel_atomicrmw: 1030; GFX6: ; %bb.0: ; %entry 1031; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1032; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 1033; GFX6-NEXT: s_mov_b32 s3, 0x100f000 1034; GFX6-NEXT: s_mov_b32 s2, -1 1035; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1036; GFX6-NEXT: v_mov_b32_e32 v0, s4 1037; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1038; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 1039; GFX6-NEXT: s_waitcnt vmcnt(0) 1040; GFX6-NEXT: buffer_wbinvl1 1041; GFX6-NEXT: s_endpgm 1042; 1043; GFX7-LABEL: global_system_acq_rel_atomicrmw: 1044; GFX7: ; %bb.0: ; %entry 1045; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1046; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 1047; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1048; GFX7-NEXT: v_mov_b32_e32 v0, s0 1049; GFX7-NEXT: v_mov_b32_e32 v1, s1 1050; GFX7-NEXT: v_mov_b32_e32 v2, s2 1051; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1052; GFX7-NEXT: flat_atomic_swap v[0:1], v2 1053; GFX7-NEXT: s_waitcnt vmcnt(0) 1054; GFX7-NEXT: buffer_wbinvl1_vol 1055; GFX7-NEXT: s_endpgm 1056; 1057; GFX10-WGP-LABEL: global_system_acq_rel_atomicrmw: 1058; GFX10-WGP: ; %bb.0: ; %entry 1059; GFX10-WGP-NEXT: s_clause 0x1 1060; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 1061; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1062; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1063; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1064; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1065; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1066; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1067; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 1068; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1069; GFX10-WGP-NEXT: buffer_gl0_inv 1070; GFX10-WGP-NEXT: buffer_gl1_inv 1071; GFX10-WGP-NEXT: s_endpgm 1072; 1073; GFX10-CU-LABEL: global_system_acq_rel_atomicrmw: 1074; GFX10-CU: ; %bb.0: ; %entry 1075; GFX10-CU-NEXT: s_clause 0x1 1076; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 1077; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1078; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1079; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1080; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1081; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1082; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1083; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 1084; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1085; GFX10-CU-NEXT: buffer_gl0_inv 1086; GFX10-CU-NEXT: buffer_gl1_inv 1087; GFX10-CU-NEXT: s_endpgm 1088; 1089; SKIP-CACHE-INV-LABEL: global_system_acq_rel_atomicrmw: 1090; SKIP-CACHE-INV: ; %bb.0: ; %entry 1091; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1092; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1093; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1094; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1095; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1096; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1097; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1098; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 1099; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 1100; SKIP-CACHE-INV-NEXT: s_endpgm 1101; 1102; GFX90A-NOTTGSPLIT-LABEL: global_system_acq_rel_atomicrmw: 1103; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1104; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1105; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1106; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1107; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1108; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1109; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 1110; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1111; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1112; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 1113; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 1114; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 1115; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1116; 1117; GFX90A-TGSPLIT-LABEL: global_system_acq_rel_atomicrmw: 1118; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1119; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1120; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1121; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1122; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1123; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1124; GFX90A-TGSPLIT-NEXT: buffer_wbl2 1125; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1126; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1127; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1128; GFX90A-TGSPLIT-NEXT: buffer_invl2 1129; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 1130; GFX90A-TGSPLIT-NEXT: s_endpgm 1131 i32 addrspace(1)* %out, i32 %in) { 1132entry: 1133 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in acq_rel 1134 ret void 1135} 1136 1137define amdgpu_kernel void @global_system_seq_cst_atomicrmw( 1138; GFX6-LABEL: global_system_seq_cst_atomicrmw: 1139; GFX6: ; %bb.0: ; %entry 1140; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1141; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 1142; GFX6-NEXT: s_mov_b32 s3, 0x100f000 1143; GFX6-NEXT: s_mov_b32 s2, -1 1144; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1145; GFX6-NEXT: v_mov_b32_e32 v0, s4 1146; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1147; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 1148; GFX6-NEXT: s_waitcnt vmcnt(0) 1149; GFX6-NEXT: buffer_wbinvl1 1150; GFX6-NEXT: s_endpgm 1151; 1152; GFX7-LABEL: global_system_seq_cst_atomicrmw: 1153; GFX7: ; %bb.0: ; %entry 1154; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1155; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 1156; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1157; GFX7-NEXT: v_mov_b32_e32 v0, s0 1158; GFX7-NEXT: v_mov_b32_e32 v1, s1 1159; GFX7-NEXT: v_mov_b32_e32 v2, s2 1160; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1161; GFX7-NEXT: flat_atomic_swap v[0:1], v2 1162; GFX7-NEXT: s_waitcnt vmcnt(0) 1163; GFX7-NEXT: buffer_wbinvl1_vol 1164; GFX7-NEXT: s_endpgm 1165; 1166; GFX10-WGP-LABEL: global_system_seq_cst_atomicrmw: 1167; GFX10-WGP: ; %bb.0: ; %entry 1168; GFX10-WGP-NEXT: s_clause 0x1 1169; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 1170; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1171; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1172; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1173; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1174; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1175; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1176; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 1177; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1178; GFX10-WGP-NEXT: buffer_gl0_inv 1179; GFX10-WGP-NEXT: buffer_gl1_inv 1180; GFX10-WGP-NEXT: s_endpgm 1181; 1182; GFX10-CU-LABEL: global_system_seq_cst_atomicrmw: 1183; GFX10-CU: ; %bb.0: ; %entry 1184; GFX10-CU-NEXT: s_clause 0x1 1185; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 1186; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1187; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1188; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1189; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1190; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1191; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1192; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 1193; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1194; GFX10-CU-NEXT: buffer_gl0_inv 1195; GFX10-CU-NEXT: buffer_gl1_inv 1196; GFX10-CU-NEXT: s_endpgm 1197; 1198; SKIP-CACHE-INV-LABEL: global_system_seq_cst_atomicrmw: 1199; SKIP-CACHE-INV: ; %bb.0: ; %entry 1200; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1201; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1202; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1203; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1204; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1205; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1206; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1207; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 1208; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 1209; SKIP-CACHE-INV-NEXT: s_endpgm 1210; 1211; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_atomicrmw: 1212; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1213; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1214; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1215; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1216; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1217; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1218; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 1219; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1220; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1221; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 1222; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 1223; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 1224; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1225; 1226; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_atomicrmw: 1227; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1228; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1229; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1230; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1231; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1232; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1233; GFX90A-TGSPLIT-NEXT: buffer_wbl2 1234; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1235; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1236; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1237; GFX90A-TGSPLIT-NEXT: buffer_invl2 1238; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 1239; GFX90A-TGSPLIT-NEXT: s_endpgm 1240 i32 addrspace(1)* %out, i32 %in) { 1241entry: 1242 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst 1243 ret void 1244} 1245 1246define amdgpu_kernel void @global_system_acquire_ret_atomicrmw( 1247; GFX6-LABEL: global_system_acquire_ret_atomicrmw: 1248; GFX6: ; %bb.0: ; %entry 1249; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1250; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 1251; GFX6-NEXT: s_mov_b32 s3, 0x100f000 1252; GFX6-NEXT: s_mov_b32 s2, -1 1253; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1254; GFX6-NEXT: v_mov_b32_e32 v0, s4 1255; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 1256; GFX6-NEXT: s_waitcnt vmcnt(0) 1257; GFX6-NEXT: buffer_wbinvl1 1258; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 1259; GFX6-NEXT: s_endpgm 1260; 1261; GFX7-LABEL: global_system_acquire_ret_atomicrmw: 1262; GFX7: ; %bb.0: ; %entry 1263; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1264; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 1265; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1266; GFX7-NEXT: v_mov_b32_e32 v0, s0 1267; GFX7-NEXT: v_mov_b32_e32 v1, s1 1268; GFX7-NEXT: v_mov_b32_e32 v2, s2 1269; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 1270; GFX7-NEXT: s_waitcnt vmcnt(0) 1271; GFX7-NEXT: buffer_wbinvl1_vol 1272; GFX7-NEXT: flat_store_dword v[0:1], v2 1273; GFX7-NEXT: s_endpgm 1274; 1275; GFX10-WGP-LABEL: global_system_acquire_ret_atomicrmw: 1276; GFX10-WGP: ; %bb.0: ; %entry 1277; GFX10-WGP-NEXT: s_clause 0x1 1278; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 1279; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1280; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1281; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1282; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1283; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1284; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 1285; GFX10-WGP-NEXT: buffer_gl0_inv 1286; GFX10-WGP-NEXT: buffer_gl1_inv 1287; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 1288; GFX10-WGP-NEXT: s_endpgm 1289; 1290; GFX10-CU-LABEL: global_system_acquire_ret_atomicrmw: 1291; GFX10-CU: ; %bb.0: ; %entry 1292; GFX10-CU-NEXT: s_clause 0x1 1293; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 1294; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1295; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1296; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1297; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1298; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1299; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 1300; GFX10-CU-NEXT: buffer_gl0_inv 1301; GFX10-CU-NEXT: buffer_gl1_inv 1302; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 1303; GFX10-CU-NEXT: s_endpgm 1304; 1305; SKIP-CACHE-INV-LABEL: global_system_acquire_ret_atomicrmw: 1306; SKIP-CACHE-INV: ; %bb.0: ; %entry 1307; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1308; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1309; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1310; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1311; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1312; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1313; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 1314; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 1315; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 1316; SKIP-CACHE-INV-NEXT: s_endpgm 1317; 1318; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_ret_atomicrmw: 1319; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1320; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1321; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1322; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1323; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1324; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1325; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1326; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 1327; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 1328; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 1329; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 1330; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1331; 1332; GFX90A-TGSPLIT-LABEL: global_system_acquire_ret_atomicrmw: 1333; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1334; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1335; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1336; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1337; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1338; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1339; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1340; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1341; GFX90A-TGSPLIT-NEXT: buffer_invl2 1342; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 1343; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 1344; GFX90A-TGSPLIT-NEXT: s_endpgm 1345 i32 addrspace(1)* %out, i32 %in) { 1346entry: 1347 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in acquire 1348 store i32 %val, i32 addrspace(1)* %out, align 4 1349 ret void 1350} 1351 1352define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw( 1353; GFX6-LABEL: global_system_acq_rel_ret_atomicrmw: 1354; GFX6: ; %bb.0: ; %entry 1355; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1356; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 1357; GFX6-NEXT: s_mov_b32 s3, 0x100f000 1358; GFX6-NEXT: s_mov_b32 s2, -1 1359; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1360; GFX6-NEXT: v_mov_b32_e32 v0, s4 1361; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1362; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 1363; GFX6-NEXT: s_waitcnt vmcnt(0) 1364; GFX6-NEXT: buffer_wbinvl1 1365; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 1366; GFX6-NEXT: s_endpgm 1367; 1368; GFX7-LABEL: global_system_acq_rel_ret_atomicrmw: 1369; GFX7: ; %bb.0: ; %entry 1370; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1371; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 1372; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1373; GFX7-NEXT: v_mov_b32_e32 v0, s0 1374; GFX7-NEXT: v_mov_b32_e32 v1, s1 1375; GFX7-NEXT: v_mov_b32_e32 v2, s2 1376; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1377; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 1378; GFX7-NEXT: s_waitcnt vmcnt(0) 1379; GFX7-NEXT: buffer_wbinvl1_vol 1380; GFX7-NEXT: flat_store_dword v[0:1], v2 1381; GFX7-NEXT: s_endpgm 1382; 1383; GFX10-WGP-LABEL: global_system_acq_rel_ret_atomicrmw: 1384; GFX10-WGP: ; %bb.0: ; %entry 1385; GFX10-WGP-NEXT: s_clause 0x1 1386; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 1387; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1388; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1389; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1390; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1391; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1392; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1393; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1394; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 1395; GFX10-WGP-NEXT: buffer_gl0_inv 1396; GFX10-WGP-NEXT: buffer_gl1_inv 1397; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 1398; GFX10-WGP-NEXT: s_endpgm 1399; 1400; GFX10-CU-LABEL: global_system_acq_rel_ret_atomicrmw: 1401; GFX10-CU: ; %bb.0: ; %entry 1402; GFX10-CU-NEXT: s_clause 0x1 1403; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 1404; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1405; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1406; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1407; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1408; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1409; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1410; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1411; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 1412; GFX10-CU-NEXT: buffer_gl0_inv 1413; GFX10-CU-NEXT: buffer_gl1_inv 1414; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 1415; GFX10-CU-NEXT: s_endpgm 1416; 1417; SKIP-CACHE-INV-LABEL: global_system_acq_rel_ret_atomicrmw: 1418; SKIP-CACHE-INV: ; %bb.0: ; %entry 1419; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1420; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1421; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1422; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1423; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1424; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1425; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1426; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 1427; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 1428; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 1429; SKIP-CACHE-INV-NEXT: s_endpgm 1430; 1431; GFX90A-NOTTGSPLIT-LABEL: global_system_acq_rel_ret_atomicrmw: 1432; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1433; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1434; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1435; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1436; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1437; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1438; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 1439; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1440; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1441; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 1442; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 1443; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 1444; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 1445; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1446; 1447; GFX90A-TGSPLIT-LABEL: global_system_acq_rel_ret_atomicrmw: 1448; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1449; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1450; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1451; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1452; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1453; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1454; GFX90A-TGSPLIT-NEXT: buffer_wbl2 1455; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1456; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1457; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1458; GFX90A-TGSPLIT-NEXT: buffer_invl2 1459; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 1460; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 1461; GFX90A-TGSPLIT-NEXT: s_endpgm 1462 i32 addrspace(1)* %out, i32 %in) { 1463entry: 1464 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in acq_rel 1465 store i32 %val, i32 addrspace(1)* %out, align 4 1466 ret void 1467} 1468 1469define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw( 1470; GFX6-LABEL: global_system_seq_cst_ret_atomicrmw: 1471; GFX6: ; %bb.0: ; %entry 1472; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1473; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 1474; GFX6-NEXT: s_mov_b32 s3, 0x100f000 1475; GFX6-NEXT: s_mov_b32 s2, -1 1476; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1477; GFX6-NEXT: v_mov_b32_e32 v0, s4 1478; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1479; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 1480; GFX6-NEXT: s_waitcnt vmcnt(0) 1481; GFX6-NEXT: buffer_wbinvl1 1482; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 1483; GFX6-NEXT: s_endpgm 1484; 1485; GFX7-LABEL: global_system_seq_cst_ret_atomicrmw: 1486; GFX7: ; %bb.0: ; %entry 1487; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1488; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 1489; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1490; GFX7-NEXT: v_mov_b32_e32 v0, s0 1491; GFX7-NEXT: v_mov_b32_e32 v1, s1 1492; GFX7-NEXT: v_mov_b32_e32 v2, s2 1493; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1494; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 1495; GFX7-NEXT: s_waitcnt vmcnt(0) 1496; GFX7-NEXT: buffer_wbinvl1_vol 1497; GFX7-NEXT: flat_store_dword v[0:1], v2 1498; GFX7-NEXT: s_endpgm 1499; 1500; GFX10-WGP-LABEL: global_system_seq_cst_ret_atomicrmw: 1501; GFX10-WGP: ; %bb.0: ; %entry 1502; GFX10-WGP-NEXT: s_clause 0x1 1503; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 1504; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1505; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1506; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1507; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1508; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1509; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1510; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1511; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 1512; GFX10-WGP-NEXT: buffer_gl0_inv 1513; GFX10-WGP-NEXT: buffer_gl1_inv 1514; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 1515; GFX10-WGP-NEXT: s_endpgm 1516; 1517; GFX10-CU-LABEL: global_system_seq_cst_ret_atomicrmw: 1518; GFX10-CU: ; %bb.0: ; %entry 1519; GFX10-CU-NEXT: s_clause 0x1 1520; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 1521; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1522; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1523; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1524; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1525; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1526; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1527; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1528; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 1529; GFX10-CU-NEXT: buffer_gl0_inv 1530; GFX10-CU-NEXT: buffer_gl1_inv 1531; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 1532; GFX10-CU-NEXT: s_endpgm 1533; 1534; SKIP-CACHE-INV-LABEL: global_system_seq_cst_ret_atomicrmw: 1535; SKIP-CACHE-INV: ; %bb.0: ; %entry 1536; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1537; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1538; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1539; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1540; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1541; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1542; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1543; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 1544; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 1545; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 1546; SKIP-CACHE-INV-NEXT: s_endpgm 1547; 1548; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_ret_atomicrmw: 1549; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1550; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1551; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1552; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1553; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1554; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1555; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 1556; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1557; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1558; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 1559; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 1560; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 1561; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 1562; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1563; 1564; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_ret_atomicrmw: 1565; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1566; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1567; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1568; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1569; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1570; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1571; GFX90A-TGSPLIT-NEXT: buffer_wbl2 1572; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1573; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1574; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1575; GFX90A-TGSPLIT-NEXT: buffer_invl2 1576; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 1577; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 1578; GFX90A-TGSPLIT-NEXT: s_endpgm 1579 i32 addrspace(1)* %out, i32 %in) { 1580entry: 1581 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst 1582 store i32 %val, i32 addrspace(1)* %out, align 4 1583 ret void 1584} 1585 1586define amdgpu_kernel void @global_system_monotonic_monotonic_cmpxchg( 1587; GFX6-LABEL: global_system_monotonic_monotonic_cmpxchg: 1588; GFX6: ; %bb.0: ; %entry 1589; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1590; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 1591; GFX6-NEXT: s_mov_b32 s3, 0x100f000 1592; GFX6-NEXT: s_mov_b32 s2, -1 1593; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1594; GFX6-NEXT: v_mov_b32_e32 v0, s4 1595; GFX6-NEXT: v_mov_b32_e32 v1, s5 1596; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 1597; GFX6-NEXT: s_endpgm 1598; 1599; GFX7-LABEL: global_system_monotonic_monotonic_cmpxchg: 1600; GFX7: ; %bb.0: ; %entry 1601; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1602; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 1603; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1604; GFX7-NEXT: s_add_u32 s0, s0, 16 1605; GFX7-NEXT: s_addc_u32 s1, s1, 0 1606; GFX7-NEXT: v_mov_b32_e32 v0, s0 1607; GFX7-NEXT: v_mov_b32_e32 v2, s2 1608; GFX7-NEXT: v_mov_b32_e32 v1, s1 1609; GFX7-NEXT: v_mov_b32_e32 v3, s3 1610; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 1611; GFX7-NEXT: s_endpgm 1612; 1613; GFX10-WGP-LABEL: global_system_monotonic_monotonic_cmpxchg: 1614; GFX10-WGP: ; %bb.0: ; %entry 1615; GFX10-WGP-NEXT: s_clause 0x1 1616; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1617; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1618; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 1619; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1620; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1621; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1622; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1623; GFX10-WGP-NEXT: s_endpgm 1624; 1625; GFX10-CU-LABEL: global_system_monotonic_monotonic_cmpxchg: 1626; GFX10-CU: ; %bb.0: ; %entry 1627; GFX10-CU-NEXT: s_clause 0x1 1628; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1629; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1630; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 1631; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1632; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1633; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1634; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1635; GFX10-CU-NEXT: s_endpgm 1636; 1637; SKIP-CACHE-INV-LABEL: global_system_monotonic_monotonic_cmpxchg: 1638; SKIP-CACHE-INV: ; %bb.0: ; %entry 1639; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1640; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1641; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1642; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1643; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1644; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1645; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1646; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1647; SKIP-CACHE-INV-NEXT: s_endpgm 1648; 1649; GFX90A-NOTTGSPLIT-LABEL: global_system_monotonic_monotonic_cmpxchg: 1650; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1651; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1652; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 1653; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 1654; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1655; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 1656; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 1657; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1658; 1659; GFX90A-TGSPLIT-LABEL: global_system_monotonic_monotonic_cmpxchg: 1660; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1661; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1662; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 1663; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 1664; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1665; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 1666; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 1667; GFX90A-TGSPLIT-NEXT: s_endpgm 1668 i32 addrspace(1)* %out, i32 %in, i32 %old) { 1669entry: 1670 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 1671 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in monotonic monotonic 1672 ret void 1673} 1674 1675define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg( 1676; GFX6-LABEL: global_system_acquire_monotonic_cmpxchg: 1677; GFX6: ; %bb.0: ; %entry 1678; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1679; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 1680; GFX6-NEXT: s_mov_b32 s3, 0x100f000 1681; GFX6-NEXT: s_mov_b32 s2, -1 1682; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1683; GFX6-NEXT: v_mov_b32_e32 v0, s4 1684; GFX6-NEXT: v_mov_b32_e32 v1, s5 1685; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 1686; GFX6-NEXT: s_waitcnt vmcnt(0) 1687; GFX6-NEXT: buffer_wbinvl1 1688; GFX6-NEXT: s_endpgm 1689; 1690; GFX7-LABEL: global_system_acquire_monotonic_cmpxchg: 1691; GFX7: ; %bb.0: ; %entry 1692; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1693; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 1694; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1695; GFX7-NEXT: s_add_u32 s0, s0, 16 1696; GFX7-NEXT: s_addc_u32 s1, s1, 0 1697; GFX7-NEXT: v_mov_b32_e32 v0, s0 1698; GFX7-NEXT: v_mov_b32_e32 v2, s2 1699; GFX7-NEXT: v_mov_b32_e32 v1, s1 1700; GFX7-NEXT: v_mov_b32_e32 v3, s3 1701; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 1702; GFX7-NEXT: s_waitcnt vmcnt(0) 1703; GFX7-NEXT: buffer_wbinvl1_vol 1704; GFX7-NEXT: s_endpgm 1705; 1706; GFX10-WGP-LABEL: global_system_acquire_monotonic_cmpxchg: 1707; GFX10-WGP: ; %bb.0: ; %entry 1708; GFX10-WGP-NEXT: s_clause 0x1 1709; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1710; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1711; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 1712; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1713; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1714; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1715; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1716; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1717; GFX10-WGP-NEXT: buffer_gl0_inv 1718; GFX10-WGP-NEXT: buffer_gl1_inv 1719; GFX10-WGP-NEXT: s_endpgm 1720; 1721; GFX10-CU-LABEL: global_system_acquire_monotonic_cmpxchg: 1722; GFX10-CU: ; %bb.0: ; %entry 1723; GFX10-CU-NEXT: s_clause 0x1 1724; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1725; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1726; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 1727; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1728; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1729; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1730; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1731; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1732; GFX10-CU-NEXT: buffer_gl0_inv 1733; GFX10-CU-NEXT: buffer_gl1_inv 1734; GFX10-CU-NEXT: s_endpgm 1735; 1736; SKIP-CACHE-INV-LABEL: global_system_acquire_monotonic_cmpxchg: 1737; SKIP-CACHE-INV: ; %bb.0: ; %entry 1738; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1739; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1740; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1741; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1742; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1743; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1744; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1745; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1746; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 1747; SKIP-CACHE-INV-NEXT: s_endpgm 1748; 1749; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_monotonic_cmpxchg: 1750; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1751; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1752; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 1753; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 1754; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1755; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 1756; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 1757; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 1758; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 1759; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 1760; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1761; 1762; GFX90A-TGSPLIT-LABEL: global_system_acquire_monotonic_cmpxchg: 1763; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1764; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1765; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 1766; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 1767; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1768; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 1769; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 1770; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1771; GFX90A-TGSPLIT-NEXT: buffer_invl2 1772; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 1773; GFX90A-TGSPLIT-NEXT: s_endpgm 1774 i32 addrspace(1)* %out, i32 %in, i32 %old) { 1775entry: 1776 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 1777 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acquire monotonic 1778 ret void 1779} 1780 1781define amdgpu_kernel void @global_system_release_monotonic_cmpxchg( 1782; GFX6-LABEL: global_system_release_monotonic_cmpxchg: 1783; GFX6: ; %bb.0: ; %entry 1784; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1785; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 1786; GFX6-NEXT: s_mov_b32 s3, 0x100f000 1787; GFX6-NEXT: s_mov_b32 s2, -1 1788; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1789; GFX6-NEXT: v_mov_b32_e32 v0, s4 1790; GFX6-NEXT: v_mov_b32_e32 v1, s5 1791; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1792; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 1793; GFX6-NEXT: s_endpgm 1794; 1795; GFX7-LABEL: global_system_release_monotonic_cmpxchg: 1796; GFX7: ; %bb.0: ; %entry 1797; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1798; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 1799; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1800; GFX7-NEXT: s_add_u32 s0, s0, 16 1801; GFX7-NEXT: s_addc_u32 s1, s1, 0 1802; GFX7-NEXT: v_mov_b32_e32 v0, s0 1803; GFX7-NEXT: v_mov_b32_e32 v2, s2 1804; GFX7-NEXT: v_mov_b32_e32 v1, s1 1805; GFX7-NEXT: v_mov_b32_e32 v3, s3 1806; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1807; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 1808; GFX7-NEXT: s_endpgm 1809; 1810; GFX10-WGP-LABEL: global_system_release_monotonic_cmpxchg: 1811; GFX10-WGP: ; %bb.0: ; %entry 1812; GFX10-WGP-NEXT: s_clause 0x1 1813; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1814; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1815; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 1816; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1817; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1818; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1819; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1820; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1821; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1822; GFX10-WGP-NEXT: s_endpgm 1823; 1824; GFX10-CU-LABEL: global_system_release_monotonic_cmpxchg: 1825; GFX10-CU: ; %bb.0: ; %entry 1826; GFX10-CU-NEXT: s_clause 0x1 1827; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1828; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1829; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 1830; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1831; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1832; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1833; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1834; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1835; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1836; GFX10-CU-NEXT: s_endpgm 1837; 1838; SKIP-CACHE-INV-LABEL: global_system_release_monotonic_cmpxchg: 1839; SKIP-CACHE-INV: ; %bb.0: ; %entry 1840; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1841; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1842; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1843; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1844; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1845; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1846; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1847; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1848; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1849; SKIP-CACHE-INV-NEXT: s_endpgm 1850; 1851; GFX90A-NOTTGSPLIT-LABEL: global_system_release_monotonic_cmpxchg: 1852; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1853; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1854; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 1855; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 1856; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1857; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 1858; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 1859; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1860; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 1861; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1862; 1863; GFX90A-TGSPLIT-LABEL: global_system_release_monotonic_cmpxchg: 1864; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1865; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1866; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 1867; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 1868; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1869; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 1870; GFX90A-TGSPLIT-NEXT: buffer_wbl2 1871; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1872; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 1873; GFX90A-TGSPLIT-NEXT: s_endpgm 1874 i32 addrspace(1)* %out, i32 %in, i32 %old) { 1875entry: 1876 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 1877 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in release monotonic 1878 ret void 1879} 1880 1881define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg( 1882; GFX6-LABEL: global_system_acq_rel_monotonic_cmpxchg: 1883; GFX6: ; %bb.0: ; %entry 1884; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1885; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 1886; GFX6-NEXT: s_mov_b32 s3, 0x100f000 1887; GFX6-NEXT: s_mov_b32 s2, -1 1888; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1889; GFX6-NEXT: v_mov_b32_e32 v0, s4 1890; GFX6-NEXT: v_mov_b32_e32 v1, s5 1891; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1892; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 1893; GFX6-NEXT: s_waitcnt vmcnt(0) 1894; GFX6-NEXT: buffer_wbinvl1 1895; GFX6-NEXT: s_endpgm 1896; 1897; GFX7-LABEL: global_system_acq_rel_monotonic_cmpxchg: 1898; GFX7: ; %bb.0: ; %entry 1899; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1900; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 1901; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1902; GFX7-NEXT: s_add_u32 s0, s0, 16 1903; GFX7-NEXT: s_addc_u32 s1, s1, 0 1904; GFX7-NEXT: v_mov_b32_e32 v0, s0 1905; GFX7-NEXT: v_mov_b32_e32 v2, s2 1906; GFX7-NEXT: v_mov_b32_e32 v1, s1 1907; GFX7-NEXT: v_mov_b32_e32 v3, s3 1908; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1909; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 1910; GFX7-NEXT: s_waitcnt vmcnt(0) 1911; GFX7-NEXT: buffer_wbinvl1_vol 1912; GFX7-NEXT: s_endpgm 1913; 1914; GFX10-WGP-LABEL: global_system_acq_rel_monotonic_cmpxchg: 1915; GFX10-WGP: ; %bb.0: ; %entry 1916; GFX10-WGP-NEXT: s_clause 0x1 1917; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1918; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1919; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 1920; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1921; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1922; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1923; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1924; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1925; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1926; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1927; GFX10-WGP-NEXT: buffer_gl0_inv 1928; GFX10-WGP-NEXT: buffer_gl1_inv 1929; GFX10-WGP-NEXT: s_endpgm 1930; 1931; GFX10-CU-LABEL: global_system_acq_rel_monotonic_cmpxchg: 1932; GFX10-CU: ; %bb.0: ; %entry 1933; GFX10-CU-NEXT: s_clause 0x1 1934; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1935; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1936; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 1937; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1938; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1939; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1940; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1941; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1942; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1943; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1944; GFX10-CU-NEXT: buffer_gl0_inv 1945; GFX10-CU-NEXT: buffer_gl1_inv 1946; GFX10-CU-NEXT: s_endpgm 1947; 1948; SKIP-CACHE-INV-LABEL: global_system_acq_rel_monotonic_cmpxchg: 1949; SKIP-CACHE-INV: ; %bb.0: ; %entry 1950; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1951; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1952; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1953; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1954; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1955; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1956; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1957; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1958; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1959; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 1960; SKIP-CACHE-INV-NEXT: s_endpgm 1961; 1962; GFX90A-NOTTGSPLIT-LABEL: global_system_acq_rel_monotonic_cmpxchg: 1963; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1964; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1965; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 1966; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 1967; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1968; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 1969; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 1970; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1971; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 1972; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 1973; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 1974; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 1975; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1976; 1977; GFX90A-TGSPLIT-LABEL: global_system_acq_rel_monotonic_cmpxchg: 1978; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1979; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1980; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 1981; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 1982; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1983; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 1984; GFX90A-TGSPLIT-NEXT: buffer_wbl2 1985; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1986; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 1987; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1988; GFX90A-TGSPLIT-NEXT: buffer_invl2 1989; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 1990; GFX90A-TGSPLIT-NEXT: s_endpgm 1991 i32 addrspace(1)* %out, i32 %in, i32 %old) { 1992entry: 1993 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 1994 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acq_rel monotonic 1995 ret void 1996} 1997 1998define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg( 1999; GFX6-LABEL: global_system_seq_cst_monotonic_cmpxchg: 2000; GFX6: ; %bb.0: ; %entry 2001; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2002; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 2003; GFX6-NEXT: s_mov_b32 s3, 0x100f000 2004; GFX6-NEXT: s_mov_b32 s2, -1 2005; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2006; GFX6-NEXT: v_mov_b32_e32 v0, s4 2007; GFX6-NEXT: v_mov_b32_e32 v1, s5 2008; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2009; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 2010; GFX6-NEXT: s_waitcnt vmcnt(0) 2011; GFX6-NEXT: buffer_wbinvl1 2012; GFX6-NEXT: s_endpgm 2013; 2014; GFX7-LABEL: global_system_seq_cst_monotonic_cmpxchg: 2015; GFX7: ; %bb.0: ; %entry 2016; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2017; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2018; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2019; GFX7-NEXT: s_add_u32 s0, s0, 16 2020; GFX7-NEXT: s_addc_u32 s1, s1, 0 2021; GFX7-NEXT: v_mov_b32_e32 v0, s0 2022; GFX7-NEXT: v_mov_b32_e32 v2, s2 2023; GFX7-NEXT: v_mov_b32_e32 v1, s1 2024; GFX7-NEXT: v_mov_b32_e32 v3, s3 2025; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2026; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 2027; GFX7-NEXT: s_waitcnt vmcnt(0) 2028; GFX7-NEXT: buffer_wbinvl1_vol 2029; GFX7-NEXT: s_endpgm 2030; 2031; GFX10-WGP-LABEL: global_system_seq_cst_monotonic_cmpxchg: 2032; GFX10-WGP: ; %bb.0: ; %entry 2033; GFX10-WGP-NEXT: s_clause 0x1 2034; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2035; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2036; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2037; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2038; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2039; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2040; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2041; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2042; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2043; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2044; GFX10-WGP-NEXT: buffer_gl0_inv 2045; GFX10-WGP-NEXT: buffer_gl1_inv 2046; GFX10-WGP-NEXT: s_endpgm 2047; 2048; GFX10-CU-LABEL: global_system_seq_cst_monotonic_cmpxchg: 2049; GFX10-CU: ; %bb.0: ; %entry 2050; GFX10-CU-NEXT: s_clause 0x1 2051; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2052; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2053; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2054; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2055; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2056; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2057; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2058; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2059; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2060; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2061; GFX10-CU-NEXT: buffer_gl0_inv 2062; GFX10-CU-NEXT: buffer_gl1_inv 2063; GFX10-CU-NEXT: s_endpgm 2064; 2065; SKIP-CACHE-INV-LABEL: global_system_seq_cst_monotonic_cmpxchg: 2066; SKIP-CACHE-INV: ; %bb.0: ; %entry 2067; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2068; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2069; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2070; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2071; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2072; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2073; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2074; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2075; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 2076; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2077; SKIP-CACHE-INV-NEXT: s_endpgm 2078; 2079; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_monotonic_cmpxchg: 2080; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2081; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2082; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2083; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2084; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2085; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2086; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 2087; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2088; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2089; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2090; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 2091; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2092; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2093; 2094; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_monotonic_cmpxchg: 2095; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2096; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2097; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2098; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2099; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2100; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2101; GFX90A-TGSPLIT-NEXT: buffer_wbl2 2102; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2103; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2104; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2105; GFX90A-TGSPLIT-NEXT: buffer_invl2 2106; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2107; GFX90A-TGSPLIT-NEXT: s_endpgm 2108 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2109entry: 2110 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2111 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst monotonic 2112 ret void 2113} 2114 2115define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg( 2116; GFX6-LABEL: global_system_acquire_acquire_cmpxchg: 2117; GFX6: ; %bb.0: ; %entry 2118; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2119; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 2120; GFX6-NEXT: s_mov_b32 s3, 0x100f000 2121; GFX6-NEXT: s_mov_b32 s2, -1 2122; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2123; GFX6-NEXT: v_mov_b32_e32 v0, s4 2124; GFX6-NEXT: v_mov_b32_e32 v1, s5 2125; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 2126; GFX6-NEXT: s_waitcnt vmcnt(0) 2127; GFX6-NEXT: buffer_wbinvl1 2128; GFX6-NEXT: s_endpgm 2129; 2130; GFX7-LABEL: global_system_acquire_acquire_cmpxchg: 2131; GFX7: ; %bb.0: ; %entry 2132; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2133; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2134; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2135; GFX7-NEXT: s_add_u32 s0, s0, 16 2136; GFX7-NEXT: s_addc_u32 s1, s1, 0 2137; GFX7-NEXT: v_mov_b32_e32 v0, s0 2138; GFX7-NEXT: v_mov_b32_e32 v2, s2 2139; GFX7-NEXT: v_mov_b32_e32 v1, s1 2140; GFX7-NEXT: v_mov_b32_e32 v3, s3 2141; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 2142; GFX7-NEXT: s_waitcnt vmcnt(0) 2143; GFX7-NEXT: buffer_wbinvl1_vol 2144; GFX7-NEXT: s_endpgm 2145; 2146; GFX10-WGP-LABEL: global_system_acquire_acquire_cmpxchg: 2147; GFX10-WGP: ; %bb.0: ; %entry 2148; GFX10-WGP-NEXT: s_clause 0x1 2149; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2150; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2151; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2152; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2153; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2154; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2155; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2156; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2157; GFX10-WGP-NEXT: buffer_gl0_inv 2158; GFX10-WGP-NEXT: buffer_gl1_inv 2159; GFX10-WGP-NEXT: s_endpgm 2160; 2161; GFX10-CU-LABEL: global_system_acquire_acquire_cmpxchg: 2162; GFX10-CU: ; %bb.0: ; %entry 2163; GFX10-CU-NEXT: s_clause 0x1 2164; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2165; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2166; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2167; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2168; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2169; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2170; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2171; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2172; GFX10-CU-NEXT: buffer_gl0_inv 2173; GFX10-CU-NEXT: buffer_gl1_inv 2174; GFX10-CU-NEXT: s_endpgm 2175; 2176; SKIP-CACHE-INV-LABEL: global_system_acquire_acquire_cmpxchg: 2177; SKIP-CACHE-INV: ; %bb.0: ; %entry 2178; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2179; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2180; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2181; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2182; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2183; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2184; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2185; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 2186; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2187; SKIP-CACHE-INV-NEXT: s_endpgm 2188; 2189; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_acquire_cmpxchg: 2190; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2191; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2192; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2193; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2194; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2195; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2196; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2197; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2198; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 2199; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2200; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2201; 2202; GFX90A-TGSPLIT-LABEL: global_system_acquire_acquire_cmpxchg: 2203; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2204; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2205; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2206; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2207; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2208; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2209; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2210; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2211; GFX90A-TGSPLIT-NEXT: buffer_invl2 2212; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2213; GFX90A-TGSPLIT-NEXT: s_endpgm 2214 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2215entry: 2216 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2217 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acquire acquire 2218 ret void 2219} 2220 2221define amdgpu_kernel void @global_system_release_acquire_cmpxchg( 2222; GFX6-LABEL: global_system_release_acquire_cmpxchg: 2223; GFX6: ; %bb.0: ; %entry 2224; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2225; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 2226; GFX6-NEXT: s_mov_b32 s3, 0x100f000 2227; GFX6-NEXT: s_mov_b32 s2, -1 2228; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2229; GFX6-NEXT: v_mov_b32_e32 v0, s4 2230; GFX6-NEXT: v_mov_b32_e32 v1, s5 2231; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2232; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 2233; GFX6-NEXT: s_waitcnt vmcnt(0) 2234; GFX6-NEXT: buffer_wbinvl1 2235; GFX6-NEXT: s_endpgm 2236; 2237; GFX7-LABEL: global_system_release_acquire_cmpxchg: 2238; GFX7: ; %bb.0: ; %entry 2239; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2240; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2241; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2242; GFX7-NEXT: s_add_u32 s0, s0, 16 2243; GFX7-NEXT: s_addc_u32 s1, s1, 0 2244; GFX7-NEXT: v_mov_b32_e32 v0, s0 2245; GFX7-NEXT: v_mov_b32_e32 v2, s2 2246; GFX7-NEXT: v_mov_b32_e32 v1, s1 2247; GFX7-NEXT: v_mov_b32_e32 v3, s3 2248; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2249; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 2250; GFX7-NEXT: s_waitcnt vmcnt(0) 2251; GFX7-NEXT: buffer_wbinvl1_vol 2252; GFX7-NEXT: s_endpgm 2253; 2254; GFX10-WGP-LABEL: global_system_release_acquire_cmpxchg: 2255; GFX10-WGP: ; %bb.0: ; %entry 2256; GFX10-WGP-NEXT: s_clause 0x1 2257; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2258; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2259; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2260; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2261; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2262; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2263; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2264; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2265; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2266; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2267; GFX10-WGP-NEXT: buffer_gl0_inv 2268; GFX10-WGP-NEXT: buffer_gl1_inv 2269; GFX10-WGP-NEXT: s_endpgm 2270; 2271; GFX10-CU-LABEL: global_system_release_acquire_cmpxchg: 2272; GFX10-CU: ; %bb.0: ; %entry 2273; GFX10-CU-NEXT: s_clause 0x1 2274; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2275; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2276; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2277; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2278; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2279; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2280; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2281; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2282; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2283; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2284; GFX10-CU-NEXT: buffer_gl0_inv 2285; GFX10-CU-NEXT: buffer_gl1_inv 2286; GFX10-CU-NEXT: s_endpgm 2287; 2288; SKIP-CACHE-INV-LABEL: global_system_release_acquire_cmpxchg: 2289; SKIP-CACHE-INV: ; %bb.0: ; %entry 2290; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2291; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2292; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2293; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2294; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2295; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2296; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2297; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2298; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 2299; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2300; SKIP-CACHE-INV-NEXT: s_endpgm 2301; 2302; GFX90A-NOTTGSPLIT-LABEL: global_system_release_acquire_cmpxchg: 2303; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2304; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2305; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2306; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2307; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2308; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2309; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 2310; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2311; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2312; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2313; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 2314; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2315; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2316; 2317; GFX90A-TGSPLIT-LABEL: global_system_release_acquire_cmpxchg: 2318; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2319; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2320; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2321; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2322; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2323; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2324; GFX90A-TGSPLIT-NEXT: buffer_wbl2 2325; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2326; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2327; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2328; GFX90A-TGSPLIT-NEXT: buffer_invl2 2329; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2330; GFX90A-TGSPLIT-NEXT: s_endpgm 2331 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2332entry: 2333 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2334 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in release acquire 2335 ret void 2336} 2337 2338define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg( 2339; GFX6-LABEL: global_system_acq_rel_acquire_cmpxchg: 2340; GFX6: ; %bb.0: ; %entry 2341; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2342; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 2343; GFX6-NEXT: s_mov_b32 s3, 0x100f000 2344; GFX6-NEXT: s_mov_b32 s2, -1 2345; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2346; GFX6-NEXT: v_mov_b32_e32 v0, s4 2347; GFX6-NEXT: v_mov_b32_e32 v1, s5 2348; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2349; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 2350; GFX6-NEXT: s_waitcnt vmcnt(0) 2351; GFX6-NEXT: buffer_wbinvl1 2352; GFX6-NEXT: s_endpgm 2353; 2354; GFX7-LABEL: global_system_acq_rel_acquire_cmpxchg: 2355; GFX7: ; %bb.0: ; %entry 2356; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2357; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2358; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2359; GFX7-NEXT: s_add_u32 s0, s0, 16 2360; GFX7-NEXT: s_addc_u32 s1, s1, 0 2361; GFX7-NEXT: v_mov_b32_e32 v0, s0 2362; GFX7-NEXT: v_mov_b32_e32 v2, s2 2363; GFX7-NEXT: v_mov_b32_e32 v1, s1 2364; GFX7-NEXT: v_mov_b32_e32 v3, s3 2365; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2366; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 2367; GFX7-NEXT: s_waitcnt vmcnt(0) 2368; GFX7-NEXT: buffer_wbinvl1_vol 2369; GFX7-NEXT: s_endpgm 2370; 2371; GFX10-WGP-LABEL: global_system_acq_rel_acquire_cmpxchg: 2372; GFX10-WGP: ; %bb.0: ; %entry 2373; GFX10-WGP-NEXT: s_clause 0x1 2374; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2375; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2376; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2377; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2378; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2379; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2380; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2381; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2382; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2383; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2384; GFX10-WGP-NEXT: buffer_gl0_inv 2385; GFX10-WGP-NEXT: buffer_gl1_inv 2386; GFX10-WGP-NEXT: s_endpgm 2387; 2388; GFX10-CU-LABEL: global_system_acq_rel_acquire_cmpxchg: 2389; GFX10-CU: ; %bb.0: ; %entry 2390; GFX10-CU-NEXT: s_clause 0x1 2391; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2392; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2393; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2394; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2395; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2396; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2397; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2398; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2399; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2400; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2401; GFX10-CU-NEXT: buffer_gl0_inv 2402; GFX10-CU-NEXT: buffer_gl1_inv 2403; GFX10-CU-NEXT: s_endpgm 2404; 2405; SKIP-CACHE-INV-LABEL: global_system_acq_rel_acquire_cmpxchg: 2406; SKIP-CACHE-INV: ; %bb.0: ; %entry 2407; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2408; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2409; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2410; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2411; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2412; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2413; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2414; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2415; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 2416; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2417; SKIP-CACHE-INV-NEXT: s_endpgm 2418; 2419; GFX90A-NOTTGSPLIT-LABEL: global_system_acq_rel_acquire_cmpxchg: 2420; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2421; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2422; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2423; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2424; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2425; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2426; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 2427; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2428; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2429; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2430; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 2431; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2432; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2433; 2434; GFX90A-TGSPLIT-LABEL: global_system_acq_rel_acquire_cmpxchg: 2435; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2436; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2437; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2438; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2439; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2440; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2441; GFX90A-TGSPLIT-NEXT: buffer_wbl2 2442; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2443; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2444; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2445; GFX90A-TGSPLIT-NEXT: buffer_invl2 2446; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2447; GFX90A-TGSPLIT-NEXT: s_endpgm 2448 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2449entry: 2450 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2451 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acq_rel acquire 2452 ret void 2453} 2454 2455define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg( 2456; GFX6-LABEL: global_system_seq_cst_acquire_cmpxchg: 2457; GFX6: ; %bb.0: ; %entry 2458; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2459; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 2460; GFX6-NEXT: s_mov_b32 s3, 0x100f000 2461; GFX6-NEXT: s_mov_b32 s2, -1 2462; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2463; GFX6-NEXT: v_mov_b32_e32 v0, s4 2464; GFX6-NEXT: v_mov_b32_e32 v1, s5 2465; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2466; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 2467; GFX6-NEXT: s_waitcnt vmcnt(0) 2468; GFX6-NEXT: buffer_wbinvl1 2469; GFX6-NEXT: s_endpgm 2470; 2471; GFX7-LABEL: global_system_seq_cst_acquire_cmpxchg: 2472; GFX7: ; %bb.0: ; %entry 2473; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2474; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2475; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2476; GFX7-NEXT: s_add_u32 s0, s0, 16 2477; GFX7-NEXT: s_addc_u32 s1, s1, 0 2478; GFX7-NEXT: v_mov_b32_e32 v0, s0 2479; GFX7-NEXT: v_mov_b32_e32 v2, s2 2480; GFX7-NEXT: v_mov_b32_e32 v1, s1 2481; GFX7-NEXT: v_mov_b32_e32 v3, s3 2482; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2483; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 2484; GFX7-NEXT: s_waitcnt vmcnt(0) 2485; GFX7-NEXT: buffer_wbinvl1_vol 2486; GFX7-NEXT: s_endpgm 2487; 2488; GFX10-WGP-LABEL: global_system_seq_cst_acquire_cmpxchg: 2489; GFX10-WGP: ; %bb.0: ; %entry 2490; GFX10-WGP-NEXT: s_clause 0x1 2491; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2492; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2493; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2494; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2495; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2496; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2497; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2498; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2499; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2500; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2501; GFX10-WGP-NEXT: buffer_gl0_inv 2502; GFX10-WGP-NEXT: buffer_gl1_inv 2503; GFX10-WGP-NEXT: s_endpgm 2504; 2505; GFX10-CU-LABEL: global_system_seq_cst_acquire_cmpxchg: 2506; GFX10-CU: ; %bb.0: ; %entry 2507; GFX10-CU-NEXT: s_clause 0x1 2508; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2509; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2510; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2511; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2512; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2513; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2514; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2515; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2516; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2517; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2518; GFX10-CU-NEXT: buffer_gl0_inv 2519; GFX10-CU-NEXT: buffer_gl1_inv 2520; GFX10-CU-NEXT: s_endpgm 2521; 2522; SKIP-CACHE-INV-LABEL: global_system_seq_cst_acquire_cmpxchg: 2523; SKIP-CACHE-INV: ; %bb.0: ; %entry 2524; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2525; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2526; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2527; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2528; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2529; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2530; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2531; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2532; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 2533; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2534; SKIP-CACHE-INV-NEXT: s_endpgm 2535; 2536; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_acquire_cmpxchg: 2537; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2538; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2539; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2540; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2541; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2542; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2543; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 2544; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2545; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2546; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2547; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 2548; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2549; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2550; 2551; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_acquire_cmpxchg: 2552; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2553; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2554; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2555; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2556; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2557; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2558; GFX90A-TGSPLIT-NEXT: buffer_wbl2 2559; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2560; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2561; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2562; GFX90A-TGSPLIT-NEXT: buffer_invl2 2563; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2564; GFX90A-TGSPLIT-NEXT: s_endpgm 2565 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2566entry: 2567 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2568 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst acquire 2569 ret void 2570} 2571 2572define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg( 2573; GFX6-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 2574; GFX6: ; %bb.0: ; %entry 2575; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2576; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 2577; GFX6-NEXT: s_mov_b32 s3, 0x100f000 2578; GFX6-NEXT: s_mov_b32 s2, -1 2579; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2580; GFX6-NEXT: v_mov_b32_e32 v0, s4 2581; GFX6-NEXT: v_mov_b32_e32 v1, s5 2582; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2583; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 2584; GFX6-NEXT: s_waitcnt vmcnt(0) 2585; GFX6-NEXT: buffer_wbinvl1 2586; GFX6-NEXT: s_endpgm 2587; 2588; GFX7-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 2589; GFX7: ; %bb.0: ; %entry 2590; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2591; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2592; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2593; GFX7-NEXT: s_add_u32 s0, s0, 16 2594; GFX7-NEXT: s_addc_u32 s1, s1, 0 2595; GFX7-NEXT: v_mov_b32_e32 v0, s0 2596; GFX7-NEXT: v_mov_b32_e32 v2, s2 2597; GFX7-NEXT: v_mov_b32_e32 v1, s1 2598; GFX7-NEXT: v_mov_b32_e32 v3, s3 2599; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2600; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 2601; GFX7-NEXT: s_waitcnt vmcnt(0) 2602; GFX7-NEXT: buffer_wbinvl1_vol 2603; GFX7-NEXT: s_endpgm 2604; 2605; GFX10-WGP-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 2606; GFX10-WGP: ; %bb.0: ; %entry 2607; GFX10-WGP-NEXT: s_clause 0x1 2608; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2609; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2610; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2611; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2612; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2613; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2614; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2615; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2616; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2617; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2618; GFX10-WGP-NEXT: buffer_gl0_inv 2619; GFX10-WGP-NEXT: buffer_gl1_inv 2620; GFX10-WGP-NEXT: s_endpgm 2621; 2622; GFX10-CU-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 2623; GFX10-CU: ; %bb.0: ; %entry 2624; GFX10-CU-NEXT: s_clause 0x1 2625; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2626; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2627; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2628; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2629; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2630; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2631; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2632; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2633; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2634; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2635; GFX10-CU-NEXT: buffer_gl0_inv 2636; GFX10-CU-NEXT: buffer_gl1_inv 2637; GFX10-CU-NEXT: s_endpgm 2638; 2639; SKIP-CACHE-INV-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 2640; SKIP-CACHE-INV: ; %bb.0: ; %entry 2641; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2642; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2643; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2644; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2645; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2646; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2647; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2648; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2649; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 2650; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2651; SKIP-CACHE-INV-NEXT: s_endpgm 2652; 2653; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 2654; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2655; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2656; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2657; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2658; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2659; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2660; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 2661; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2662; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2663; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2664; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 2665; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2666; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2667; 2668; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 2669; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2670; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2671; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2672; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2673; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2674; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2675; GFX90A-TGSPLIT-NEXT: buffer_wbl2 2676; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2677; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 2678; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2679; GFX90A-TGSPLIT-NEXT: buffer_invl2 2680; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2681; GFX90A-TGSPLIT-NEXT: s_endpgm 2682 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2683entry: 2684 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2685 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 2686 ret void 2687} 2688 2689define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg( 2690; GFX6-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 2691; GFX6: ; %bb.0: ; %entry 2692; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2693; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 2694; GFX6-NEXT: s_mov_b32 s3, 0x100f000 2695; GFX6-NEXT: s_mov_b32 s2, -1 2696; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2697; GFX6-NEXT: v_mov_b32_e32 v0, s4 2698; GFX6-NEXT: v_mov_b32_e32 v1, s5 2699; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 2700; GFX6-NEXT: s_waitcnt vmcnt(0) 2701; GFX6-NEXT: buffer_wbinvl1 2702; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 2703; GFX6-NEXT: s_endpgm 2704; 2705; GFX7-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 2706; GFX7: ; %bb.0: ; %entry 2707; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2708; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2709; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2710; GFX7-NEXT: s_add_u32 s4, s0, 16 2711; GFX7-NEXT: s_addc_u32 s5, s1, 0 2712; GFX7-NEXT: v_mov_b32_e32 v0, s4 2713; GFX7-NEXT: v_mov_b32_e32 v2, s2 2714; GFX7-NEXT: v_mov_b32_e32 v1, s5 2715; GFX7-NEXT: v_mov_b32_e32 v3, s3 2716; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 2717; GFX7-NEXT: s_waitcnt vmcnt(0) 2718; GFX7-NEXT: buffer_wbinvl1_vol 2719; GFX7-NEXT: v_mov_b32_e32 v0, s0 2720; GFX7-NEXT: v_mov_b32_e32 v1, s1 2721; GFX7-NEXT: flat_store_dword v[0:1], v2 2722; GFX7-NEXT: s_endpgm 2723; 2724; GFX10-WGP-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 2725; GFX10-WGP: ; %bb.0: ; %entry 2726; GFX10-WGP-NEXT: s_clause 0x1 2727; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2728; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2729; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2730; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2731; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2732; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2733; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2734; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 2735; GFX10-WGP-NEXT: buffer_gl0_inv 2736; GFX10-WGP-NEXT: buffer_gl1_inv 2737; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 2738; GFX10-WGP-NEXT: s_endpgm 2739; 2740; GFX10-CU-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 2741; GFX10-CU: ; %bb.0: ; %entry 2742; GFX10-CU-NEXT: s_clause 0x1 2743; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2744; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2745; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2746; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2747; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2748; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2749; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2750; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 2751; GFX10-CU-NEXT: buffer_gl0_inv 2752; GFX10-CU-NEXT: buffer_gl1_inv 2753; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 2754; GFX10-CU-NEXT: s_endpgm 2755; 2756; SKIP-CACHE-INV-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 2757; SKIP-CACHE-INV: ; %bb.0: ; %entry 2758; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2759; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2760; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2761; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2762; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2763; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2764; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2765; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 2766; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2767; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 2768; SKIP-CACHE-INV-NEXT: s_endpgm 2769; 2770; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 2771; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2772; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2773; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2774; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2775; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2776; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2777; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 2778; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2779; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 2780; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2781; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 2782; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2783; 2784; GFX90A-TGSPLIT-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 2785; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2786; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2787; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2788; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2789; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2790; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2791; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 2792; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2793; GFX90A-TGSPLIT-NEXT: buffer_invl2 2794; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2795; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 2796; GFX90A-TGSPLIT-NEXT: s_endpgm 2797 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2798entry: 2799 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2800 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acquire monotonic 2801 %val0 = extractvalue { i32, i1 } %val, 0 2802 store i32 %val0, i32 addrspace(1)* %out, align 4 2803 ret void 2804} 2805 2806define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg( 2807; GFX6-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 2808; GFX6: ; %bb.0: ; %entry 2809; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2810; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 2811; GFX6-NEXT: s_mov_b32 s3, 0x100f000 2812; GFX6-NEXT: s_mov_b32 s2, -1 2813; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2814; GFX6-NEXT: v_mov_b32_e32 v0, s4 2815; GFX6-NEXT: v_mov_b32_e32 v1, s5 2816; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2817; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 2818; GFX6-NEXT: s_waitcnt vmcnt(0) 2819; GFX6-NEXT: buffer_wbinvl1 2820; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 2821; GFX6-NEXT: s_endpgm 2822; 2823; GFX7-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 2824; GFX7: ; %bb.0: ; %entry 2825; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2826; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2827; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2828; GFX7-NEXT: s_add_u32 s4, s0, 16 2829; GFX7-NEXT: s_addc_u32 s5, s1, 0 2830; GFX7-NEXT: v_mov_b32_e32 v0, s4 2831; GFX7-NEXT: v_mov_b32_e32 v2, s2 2832; GFX7-NEXT: v_mov_b32_e32 v1, s5 2833; GFX7-NEXT: v_mov_b32_e32 v3, s3 2834; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2835; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 2836; GFX7-NEXT: s_waitcnt vmcnt(0) 2837; GFX7-NEXT: buffer_wbinvl1_vol 2838; GFX7-NEXT: v_mov_b32_e32 v0, s0 2839; GFX7-NEXT: v_mov_b32_e32 v1, s1 2840; GFX7-NEXT: flat_store_dword v[0:1], v2 2841; GFX7-NEXT: s_endpgm 2842; 2843; GFX10-WGP-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 2844; GFX10-WGP: ; %bb.0: ; %entry 2845; GFX10-WGP-NEXT: s_clause 0x1 2846; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2847; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2848; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2849; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2850; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2851; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2852; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2853; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2854; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2855; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 2856; GFX10-WGP-NEXT: buffer_gl0_inv 2857; GFX10-WGP-NEXT: buffer_gl1_inv 2858; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 2859; GFX10-WGP-NEXT: s_endpgm 2860; 2861; GFX10-CU-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 2862; GFX10-CU: ; %bb.0: ; %entry 2863; GFX10-CU-NEXT: s_clause 0x1 2864; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2865; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2866; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2867; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2868; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2869; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2870; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2871; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2872; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2873; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 2874; GFX10-CU-NEXT: buffer_gl0_inv 2875; GFX10-CU-NEXT: buffer_gl1_inv 2876; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 2877; GFX10-CU-NEXT: s_endpgm 2878; 2879; SKIP-CACHE-INV-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 2880; SKIP-CACHE-INV: ; %bb.0: ; %entry 2881; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2882; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2883; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2884; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2885; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2886; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2887; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2888; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2889; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 2890; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2891; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 2892; SKIP-CACHE-INV-NEXT: s_endpgm 2893; 2894; GFX90A-NOTTGSPLIT-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 2895; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2896; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2897; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2898; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2899; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2900; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2901; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 2902; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2903; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 2904; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2905; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 2906; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2907; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 2908; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2909; 2910; GFX90A-TGSPLIT-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 2911; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2912; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2913; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 2914; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 2915; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2916; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 2917; GFX90A-TGSPLIT-NEXT: buffer_wbl2 2918; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2919; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 2920; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2921; GFX90A-TGSPLIT-NEXT: buffer_invl2 2922; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2923; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 2924; GFX90A-TGSPLIT-NEXT: s_endpgm 2925 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2926entry: 2927 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2928 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acq_rel monotonic 2929 %val0 = extractvalue { i32, i1 } %val, 0 2930 store i32 %val0, i32 addrspace(1)* %out, align 4 2931 ret void 2932} 2933 2934define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg( 2935; GFX6-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 2936; GFX6: ; %bb.0: ; %entry 2937; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2938; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 2939; GFX6-NEXT: s_mov_b32 s3, 0x100f000 2940; GFX6-NEXT: s_mov_b32 s2, -1 2941; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2942; GFX6-NEXT: v_mov_b32_e32 v0, s4 2943; GFX6-NEXT: v_mov_b32_e32 v1, s5 2944; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2945; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 2946; GFX6-NEXT: s_waitcnt vmcnt(0) 2947; GFX6-NEXT: buffer_wbinvl1 2948; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 2949; GFX6-NEXT: s_endpgm 2950; 2951; GFX7-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 2952; GFX7: ; %bb.0: ; %entry 2953; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2954; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2955; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2956; GFX7-NEXT: s_add_u32 s4, s0, 16 2957; GFX7-NEXT: s_addc_u32 s5, s1, 0 2958; GFX7-NEXT: v_mov_b32_e32 v0, s4 2959; GFX7-NEXT: v_mov_b32_e32 v2, s2 2960; GFX7-NEXT: v_mov_b32_e32 v1, s5 2961; GFX7-NEXT: v_mov_b32_e32 v3, s3 2962; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2963; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 2964; GFX7-NEXT: s_waitcnt vmcnt(0) 2965; GFX7-NEXT: buffer_wbinvl1_vol 2966; GFX7-NEXT: v_mov_b32_e32 v0, s0 2967; GFX7-NEXT: v_mov_b32_e32 v1, s1 2968; GFX7-NEXT: flat_store_dword v[0:1], v2 2969; GFX7-NEXT: s_endpgm 2970; 2971; GFX10-WGP-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 2972; GFX10-WGP: ; %bb.0: ; %entry 2973; GFX10-WGP-NEXT: s_clause 0x1 2974; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2975; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2976; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2977; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2978; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2979; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2980; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2981; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2982; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2983; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 2984; GFX10-WGP-NEXT: buffer_gl0_inv 2985; GFX10-WGP-NEXT: buffer_gl1_inv 2986; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 2987; GFX10-WGP-NEXT: s_endpgm 2988; 2989; GFX10-CU-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 2990; GFX10-CU: ; %bb.0: ; %entry 2991; GFX10-CU-NEXT: s_clause 0x1 2992; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2993; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2994; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2995; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2996; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2997; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2998; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2999; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3000; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 3001; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3002; GFX10-CU-NEXT: buffer_gl0_inv 3003; GFX10-CU-NEXT: buffer_gl1_inv 3004; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 3005; GFX10-CU-NEXT: s_endpgm 3006; 3007; SKIP-CACHE-INV-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 3008; SKIP-CACHE-INV: ; %bb.0: ; %entry 3009; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3010; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 3011; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 3012; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 3013; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3014; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3015; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3016; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3017; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 3018; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3019; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3020; SKIP-CACHE-INV-NEXT: s_endpgm 3021; 3022; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 3023; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3024; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3025; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3026; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3027; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3028; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3029; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 3030; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3031; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3032; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3033; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 3034; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 3035; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3036; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3037; 3038; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 3039; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3040; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3041; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3042; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3043; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3044; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3045; GFX90A-TGSPLIT-NEXT: buffer_wbl2 3046; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3047; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3048; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3049; GFX90A-TGSPLIT-NEXT: buffer_invl2 3050; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 3051; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3052; GFX90A-TGSPLIT-NEXT: s_endpgm 3053 i32 addrspace(1)* %out, i32 %in, i32 %old) { 3054entry: 3055 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 3056 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst monotonic 3057 %val0 = extractvalue { i32, i1 } %val, 0 3058 store i32 %val0, i32 addrspace(1)* %out, align 4 3059 ret void 3060} 3061 3062define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg( 3063; GFX6-LABEL: global_system_acquire_acquire_ret_cmpxchg: 3064; GFX6: ; %bb.0: ; %entry 3065; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3066; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 3067; GFX6-NEXT: s_mov_b32 s3, 0x100f000 3068; GFX6-NEXT: s_mov_b32 s2, -1 3069; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3070; GFX6-NEXT: v_mov_b32_e32 v0, s4 3071; GFX6-NEXT: v_mov_b32_e32 v1, s5 3072; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 3073; GFX6-NEXT: s_waitcnt vmcnt(0) 3074; GFX6-NEXT: buffer_wbinvl1 3075; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 3076; GFX6-NEXT: s_endpgm 3077; 3078; GFX7-LABEL: global_system_acquire_acquire_ret_cmpxchg: 3079; GFX7: ; %bb.0: ; %entry 3080; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3081; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 3082; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3083; GFX7-NEXT: s_add_u32 s4, s0, 16 3084; GFX7-NEXT: s_addc_u32 s5, s1, 0 3085; GFX7-NEXT: v_mov_b32_e32 v0, s4 3086; GFX7-NEXT: v_mov_b32_e32 v2, s2 3087; GFX7-NEXT: v_mov_b32_e32 v1, s5 3088; GFX7-NEXT: v_mov_b32_e32 v3, s3 3089; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 3090; GFX7-NEXT: s_waitcnt vmcnt(0) 3091; GFX7-NEXT: buffer_wbinvl1_vol 3092; GFX7-NEXT: v_mov_b32_e32 v0, s0 3093; GFX7-NEXT: v_mov_b32_e32 v1, s1 3094; GFX7-NEXT: flat_store_dword v[0:1], v2 3095; GFX7-NEXT: s_endpgm 3096; 3097; GFX10-WGP-LABEL: global_system_acquire_acquire_ret_cmpxchg: 3098; GFX10-WGP: ; %bb.0: ; %entry 3099; GFX10-WGP-NEXT: s_clause 0x1 3100; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3101; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 3102; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 3103; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3104; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3105; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3106; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 3107; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3108; GFX10-WGP-NEXT: buffer_gl0_inv 3109; GFX10-WGP-NEXT: buffer_gl1_inv 3110; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 3111; GFX10-WGP-NEXT: s_endpgm 3112; 3113; GFX10-CU-LABEL: global_system_acquire_acquire_ret_cmpxchg: 3114; GFX10-CU: ; %bb.0: ; %entry 3115; GFX10-CU-NEXT: s_clause 0x1 3116; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3117; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 3118; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 3119; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3120; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3121; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3122; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 3123; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3124; GFX10-CU-NEXT: buffer_gl0_inv 3125; GFX10-CU-NEXT: buffer_gl1_inv 3126; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 3127; GFX10-CU-NEXT: s_endpgm 3128; 3129; SKIP-CACHE-INV-LABEL: global_system_acquire_acquire_ret_cmpxchg: 3130; SKIP-CACHE-INV: ; %bb.0: ; %entry 3131; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3132; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 3133; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 3134; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 3135; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3136; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3137; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3138; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 3139; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3140; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3141; SKIP-CACHE-INV-NEXT: s_endpgm 3142; 3143; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_acquire_ret_cmpxchg: 3144; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3145; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3146; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3147; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3148; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3149; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3150; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3151; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3152; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 3153; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 3154; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3155; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3156; 3157; GFX90A-TGSPLIT-LABEL: global_system_acquire_acquire_ret_cmpxchg: 3158; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3159; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3160; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3161; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3162; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3163; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3164; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3165; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3166; GFX90A-TGSPLIT-NEXT: buffer_invl2 3167; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 3168; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3169; GFX90A-TGSPLIT-NEXT: s_endpgm 3170 i32 addrspace(1)* %out, i32 %in, i32 %old) { 3171entry: 3172 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 3173 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acquire acquire 3174 %val0 = extractvalue { i32, i1 } %val, 0 3175 store i32 %val0, i32 addrspace(1)* %out, align 4 3176 ret void 3177} 3178 3179define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg( 3180; GFX6-LABEL: global_system_release_acquire_ret_cmpxchg: 3181; GFX6: ; %bb.0: ; %entry 3182; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3183; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 3184; GFX6-NEXT: s_mov_b32 s3, 0x100f000 3185; GFX6-NEXT: s_mov_b32 s2, -1 3186; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3187; GFX6-NEXT: v_mov_b32_e32 v0, s4 3188; GFX6-NEXT: v_mov_b32_e32 v1, s5 3189; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3190; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 3191; GFX6-NEXT: s_waitcnt vmcnt(0) 3192; GFX6-NEXT: buffer_wbinvl1 3193; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 3194; GFX6-NEXT: s_endpgm 3195; 3196; GFX7-LABEL: global_system_release_acquire_ret_cmpxchg: 3197; GFX7: ; %bb.0: ; %entry 3198; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3199; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 3200; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3201; GFX7-NEXT: s_add_u32 s4, s0, 16 3202; GFX7-NEXT: s_addc_u32 s5, s1, 0 3203; GFX7-NEXT: v_mov_b32_e32 v0, s4 3204; GFX7-NEXT: v_mov_b32_e32 v2, s2 3205; GFX7-NEXT: v_mov_b32_e32 v1, s5 3206; GFX7-NEXT: v_mov_b32_e32 v3, s3 3207; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3208; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 3209; GFX7-NEXT: s_waitcnt vmcnt(0) 3210; GFX7-NEXT: buffer_wbinvl1_vol 3211; GFX7-NEXT: v_mov_b32_e32 v0, s0 3212; GFX7-NEXT: v_mov_b32_e32 v1, s1 3213; GFX7-NEXT: flat_store_dword v[0:1], v2 3214; GFX7-NEXT: s_endpgm 3215; 3216; GFX10-WGP-LABEL: global_system_release_acquire_ret_cmpxchg: 3217; GFX10-WGP: ; %bb.0: ; %entry 3218; GFX10-WGP-NEXT: s_clause 0x1 3219; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3220; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 3221; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 3222; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3223; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3224; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3225; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3226; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3227; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 3228; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3229; GFX10-WGP-NEXT: buffer_gl0_inv 3230; GFX10-WGP-NEXT: buffer_gl1_inv 3231; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 3232; GFX10-WGP-NEXT: s_endpgm 3233; 3234; GFX10-CU-LABEL: global_system_release_acquire_ret_cmpxchg: 3235; GFX10-CU: ; %bb.0: ; %entry 3236; GFX10-CU-NEXT: s_clause 0x1 3237; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3238; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 3239; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 3240; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3241; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3242; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3243; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3244; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3245; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 3246; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3247; GFX10-CU-NEXT: buffer_gl0_inv 3248; GFX10-CU-NEXT: buffer_gl1_inv 3249; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 3250; GFX10-CU-NEXT: s_endpgm 3251; 3252; SKIP-CACHE-INV-LABEL: global_system_release_acquire_ret_cmpxchg: 3253; SKIP-CACHE-INV: ; %bb.0: ; %entry 3254; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3255; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 3256; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 3257; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 3258; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3259; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3260; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3261; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3262; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 3263; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3264; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3265; SKIP-CACHE-INV-NEXT: s_endpgm 3266; 3267; GFX90A-NOTTGSPLIT-LABEL: global_system_release_acquire_ret_cmpxchg: 3268; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3269; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3270; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3271; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3272; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3273; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3274; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 3275; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3276; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3277; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3278; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 3279; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 3280; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3281; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3282; 3283; GFX90A-TGSPLIT-LABEL: global_system_release_acquire_ret_cmpxchg: 3284; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3285; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3286; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3287; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3288; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3289; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3290; GFX90A-TGSPLIT-NEXT: buffer_wbl2 3291; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3292; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3293; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3294; GFX90A-TGSPLIT-NEXT: buffer_invl2 3295; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 3296; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3297; GFX90A-TGSPLIT-NEXT: s_endpgm 3298 i32 addrspace(1)* %out, i32 %in, i32 %old) { 3299entry: 3300 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 3301 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in release acquire 3302 %val0 = extractvalue { i32, i1 } %val, 0 3303 store i32 %val0, i32 addrspace(1)* %out, align 4 3304 ret void 3305} 3306 3307define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg( 3308; GFX6-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 3309; GFX6: ; %bb.0: ; %entry 3310; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3311; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 3312; GFX6-NEXT: s_mov_b32 s3, 0x100f000 3313; GFX6-NEXT: s_mov_b32 s2, -1 3314; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3315; GFX6-NEXT: v_mov_b32_e32 v0, s4 3316; GFX6-NEXT: v_mov_b32_e32 v1, s5 3317; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3318; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 3319; GFX6-NEXT: s_waitcnt vmcnt(0) 3320; GFX6-NEXT: buffer_wbinvl1 3321; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 3322; GFX6-NEXT: s_endpgm 3323; 3324; GFX7-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 3325; GFX7: ; %bb.0: ; %entry 3326; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3327; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 3328; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3329; GFX7-NEXT: s_add_u32 s4, s0, 16 3330; GFX7-NEXT: s_addc_u32 s5, s1, 0 3331; GFX7-NEXT: v_mov_b32_e32 v0, s4 3332; GFX7-NEXT: v_mov_b32_e32 v2, s2 3333; GFX7-NEXT: v_mov_b32_e32 v1, s5 3334; GFX7-NEXT: v_mov_b32_e32 v3, s3 3335; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3336; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 3337; GFX7-NEXT: s_waitcnt vmcnt(0) 3338; GFX7-NEXT: buffer_wbinvl1_vol 3339; GFX7-NEXT: v_mov_b32_e32 v0, s0 3340; GFX7-NEXT: v_mov_b32_e32 v1, s1 3341; GFX7-NEXT: flat_store_dword v[0:1], v2 3342; GFX7-NEXT: s_endpgm 3343; 3344; GFX10-WGP-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 3345; GFX10-WGP: ; %bb.0: ; %entry 3346; GFX10-WGP-NEXT: s_clause 0x1 3347; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3348; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 3349; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 3350; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3351; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3352; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3353; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3354; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3355; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 3356; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3357; GFX10-WGP-NEXT: buffer_gl0_inv 3358; GFX10-WGP-NEXT: buffer_gl1_inv 3359; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 3360; GFX10-WGP-NEXT: s_endpgm 3361; 3362; GFX10-CU-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 3363; GFX10-CU: ; %bb.0: ; %entry 3364; GFX10-CU-NEXT: s_clause 0x1 3365; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3366; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 3367; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 3368; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3369; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3370; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3371; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3372; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3373; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 3374; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3375; GFX10-CU-NEXT: buffer_gl0_inv 3376; GFX10-CU-NEXT: buffer_gl1_inv 3377; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 3378; GFX10-CU-NEXT: s_endpgm 3379; 3380; SKIP-CACHE-INV-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 3381; SKIP-CACHE-INV: ; %bb.0: ; %entry 3382; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3383; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 3384; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 3385; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 3386; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3387; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3388; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3389; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3390; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 3391; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3392; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3393; SKIP-CACHE-INV-NEXT: s_endpgm 3394; 3395; GFX90A-NOTTGSPLIT-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 3396; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3397; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3398; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3399; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3400; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3401; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3402; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 3403; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3404; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3405; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3406; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 3407; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 3408; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3409; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3410; 3411; GFX90A-TGSPLIT-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 3412; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3413; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3414; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3415; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3416; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3417; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3418; GFX90A-TGSPLIT-NEXT: buffer_wbl2 3419; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3420; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3421; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3422; GFX90A-TGSPLIT-NEXT: buffer_invl2 3423; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 3424; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3425; GFX90A-TGSPLIT-NEXT: s_endpgm 3426 i32 addrspace(1)* %out, i32 %in, i32 %old) { 3427entry: 3428 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 3429 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acq_rel acquire 3430 %val0 = extractvalue { i32, i1 } %val, 0 3431 store i32 %val0, i32 addrspace(1)* %out, align 4 3432 ret void 3433} 3434 3435define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg( 3436; GFX6-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 3437; GFX6: ; %bb.0: ; %entry 3438; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3439; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 3440; GFX6-NEXT: s_mov_b32 s3, 0x100f000 3441; GFX6-NEXT: s_mov_b32 s2, -1 3442; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3443; GFX6-NEXT: v_mov_b32_e32 v0, s4 3444; GFX6-NEXT: v_mov_b32_e32 v1, s5 3445; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3446; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 3447; GFX6-NEXT: s_waitcnt vmcnt(0) 3448; GFX6-NEXT: buffer_wbinvl1 3449; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 3450; GFX6-NEXT: s_endpgm 3451; 3452; GFX7-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 3453; GFX7: ; %bb.0: ; %entry 3454; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3455; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 3456; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3457; GFX7-NEXT: s_add_u32 s4, s0, 16 3458; GFX7-NEXT: s_addc_u32 s5, s1, 0 3459; GFX7-NEXT: v_mov_b32_e32 v0, s4 3460; GFX7-NEXT: v_mov_b32_e32 v2, s2 3461; GFX7-NEXT: v_mov_b32_e32 v1, s5 3462; GFX7-NEXT: v_mov_b32_e32 v3, s3 3463; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3464; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 3465; GFX7-NEXT: s_waitcnt vmcnt(0) 3466; GFX7-NEXT: buffer_wbinvl1_vol 3467; GFX7-NEXT: v_mov_b32_e32 v0, s0 3468; GFX7-NEXT: v_mov_b32_e32 v1, s1 3469; GFX7-NEXT: flat_store_dword v[0:1], v2 3470; GFX7-NEXT: s_endpgm 3471; 3472; GFX10-WGP-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 3473; GFX10-WGP: ; %bb.0: ; %entry 3474; GFX10-WGP-NEXT: s_clause 0x1 3475; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3476; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 3477; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 3478; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3479; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3480; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3481; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3482; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3483; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 3484; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3485; GFX10-WGP-NEXT: buffer_gl0_inv 3486; GFX10-WGP-NEXT: buffer_gl1_inv 3487; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 3488; GFX10-WGP-NEXT: s_endpgm 3489; 3490; GFX10-CU-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 3491; GFX10-CU: ; %bb.0: ; %entry 3492; GFX10-CU-NEXT: s_clause 0x1 3493; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3494; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 3495; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 3496; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3497; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3498; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3499; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3500; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3501; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 3502; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3503; GFX10-CU-NEXT: buffer_gl0_inv 3504; GFX10-CU-NEXT: buffer_gl1_inv 3505; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 3506; GFX10-CU-NEXT: s_endpgm 3507; 3508; SKIP-CACHE-INV-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 3509; SKIP-CACHE-INV: ; %bb.0: ; %entry 3510; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3511; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 3512; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 3513; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 3514; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3515; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3516; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3517; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3518; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 3519; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3520; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3521; SKIP-CACHE-INV-NEXT: s_endpgm 3522; 3523; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 3524; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3525; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3526; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3527; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3528; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3529; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3530; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 3531; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3532; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3533; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3534; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 3535; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 3536; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3537; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3538; 3539; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 3540; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3541; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3542; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3543; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3544; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3545; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3546; GFX90A-TGSPLIT-NEXT: buffer_wbl2 3547; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3548; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3549; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3550; GFX90A-TGSPLIT-NEXT: buffer_invl2 3551; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 3552; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3553; GFX90A-TGSPLIT-NEXT: s_endpgm 3554 i32 addrspace(1)* %out, i32 %in, i32 %old) { 3555entry: 3556 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 3557 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst acquire 3558 %val0 = extractvalue { i32, i1 } %val, 0 3559 store i32 %val0, i32 addrspace(1)* %out, align 4 3560 ret void 3561} 3562 3563define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg( 3564; GFX6-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 3565; GFX6: ; %bb.0: ; %entry 3566; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3567; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 3568; GFX6-NEXT: s_mov_b32 s3, 0x100f000 3569; GFX6-NEXT: s_mov_b32 s2, -1 3570; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3571; GFX6-NEXT: v_mov_b32_e32 v0, s4 3572; GFX6-NEXT: v_mov_b32_e32 v1, s5 3573; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3574; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 3575; GFX6-NEXT: s_waitcnt vmcnt(0) 3576; GFX6-NEXT: buffer_wbinvl1 3577; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 3578; GFX6-NEXT: s_endpgm 3579; 3580; GFX7-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 3581; GFX7: ; %bb.0: ; %entry 3582; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3583; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 3584; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3585; GFX7-NEXT: s_add_u32 s4, s0, 16 3586; GFX7-NEXT: s_addc_u32 s5, s1, 0 3587; GFX7-NEXT: v_mov_b32_e32 v0, s4 3588; GFX7-NEXT: v_mov_b32_e32 v2, s2 3589; GFX7-NEXT: v_mov_b32_e32 v1, s5 3590; GFX7-NEXT: v_mov_b32_e32 v3, s3 3591; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3592; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 3593; GFX7-NEXT: s_waitcnt vmcnt(0) 3594; GFX7-NEXT: buffer_wbinvl1_vol 3595; GFX7-NEXT: v_mov_b32_e32 v0, s0 3596; GFX7-NEXT: v_mov_b32_e32 v1, s1 3597; GFX7-NEXT: flat_store_dword v[0:1], v2 3598; GFX7-NEXT: s_endpgm 3599; 3600; GFX10-WGP-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 3601; GFX10-WGP: ; %bb.0: ; %entry 3602; GFX10-WGP-NEXT: s_clause 0x1 3603; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3604; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 3605; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 3606; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3607; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3608; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3609; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3610; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3611; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 3612; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3613; GFX10-WGP-NEXT: buffer_gl0_inv 3614; GFX10-WGP-NEXT: buffer_gl1_inv 3615; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 3616; GFX10-WGP-NEXT: s_endpgm 3617; 3618; GFX10-CU-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 3619; GFX10-CU: ; %bb.0: ; %entry 3620; GFX10-CU-NEXT: s_clause 0x1 3621; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3622; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 3623; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 3624; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3625; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3626; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3627; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3628; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3629; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 3630; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3631; GFX10-CU-NEXT: buffer_gl0_inv 3632; GFX10-CU-NEXT: buffer_gl1_inv 3633; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 3634; GFX10-CU-NEXT: s_endpgm 3635; 3636; SKIP-CACHE-INV-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 3637; SKIP-CACHE-INV: ; %bb.0: ; %entry 3638; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3639; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 3640; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 3641; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 3642; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3643; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3644; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3645; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3646; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 3647; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3648; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3649; SKIP-CACHE-INV-NEXT: s_endpgm 3650; 3651; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 3652; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3653; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3654; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3655; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3656; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3657; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3658; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 3659; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3660; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3661; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3662; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 3663; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 3664; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3665; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3666; 3667; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 3668; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3669; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3670; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 3671; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 3672; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3673; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 3674; GFX90A-TGSPLIT-NEXT: buffer_wbl2 3675; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3676; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 3677; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3678; GFX90A-TGSPLIT-NEXT: buffer_invl2 3679; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 3680; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 3681; GFX90A-TGSPLIT-NEXT: s_endpgm 3682 i32 addrspace(1)* %out, i32 %in, i32 %old) { 3683entry: 3684 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 3685 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 3686 %val0 = extractvalue { i32, i1 } %val, 0 3687 store i32 %val0, i32 addrspace(1)* %out, align 4 3688 ret void 3689} 3690 3691define amdgpu_kernel void @global_system_one_as_unordered_load( 3692; GFX6-LABEL: global_system_one_as_unordered_load: 3693; GFX6: ; %bb.0: ; %entry 3694; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 3695; GFX6-NEXT: s_mov_b32 s3, 0x100f000 3696; GFX6-NEXT: s_mov_b32 s2, -1 3697; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3698; GFX6-NEXT: s_mov_b32 s0, s4 3699; GFX6-NEXT: s_mov_b32 s1, s5 3700; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 3701; GFX6-NEXT: s_mov_b32 s4, s6 3702; GFX6-NEXT: s_mov_b32 s5, s7 3703; GFX6-NEXT: s_mov_b32 s6, s2 3704; GFX6-NEXT: s_mov_b32 s7, s3 3705; GFX6-NEXT: s_waitcnt vmcnt(0) 3706; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 3707; GFX6-NEXT: s_endpgm 3708; 3709; GFX7-LABEL: global_system_one_as_unordered_load: 3710; GFX7: ; %bb.0: ; %entry 3711; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3712; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3713; GFX7-NEXT: v_mov_b32_e32 v0, s0 3714; GFX7-NEXT: v_mov_b32_e32 v1, s1 3715; GFX7-NEXT: flat_load_dword v0, v[0:1] 3716; GFX7-NEXT: v_mov_b32_e32 v2, s2 3717; GFX7-NEXT: v_mov_b32_e32 v3, s3 3718; GFX7-NEXT: s_waitcnt vmcnt(0) 3719; GFX7-NEXT: flat_store_dword v[2:3], v0 3720; GFX7-NEXT: s_endpgm 3721; 3722; GFX10-WGP-LABEL: global_system_one_as_unordered_load: 3723; GFX10-WGP: ; %bb.0: ; %entry 3724; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3725; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3726; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3727; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] 3728; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3729; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 3730; GFX10-WGP-NEXT: s_endpgm 3731; 3732; GFX10-CU-LABEL: global_system_one_as_unordered_load: 3733; GFX10-CU: ; %bb.0: ; %entry 3734; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3735; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3736; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3737; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] 3738; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3739; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 3740; GFX10-CU-NEXT: s_endpgm 3741; 3742; SKIP-CACHE-INV-LABEL: global_system_one_as_unordered_load: 3743; SKIP-CACHE-INV: ; %bb.0: ; %entry 3744; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 3745; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 3746; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 3747; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3748; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 3749; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 3750; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 3751; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 3752; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 3753; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 3754; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 3755; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3756; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3757; SKIP-CACHE-INV-NEXT: s_endpgm 3758; 3759; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_unordered_load: 3760; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3761; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3762; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3763; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3764; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] 3765; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3766; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 3767; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3768; 3769; GFX90A-TGSPLIT-LABEL: global_system_one_as_unordered_load: 3770; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3771; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3772; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3773; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3774; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] 3775; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3776; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 3777; GFX90A-TGSPLIT-NEXT: s_endpgm 3778 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 3779entry: 3780 %val = load atomic i32, i32 addrspace(1)* %in syncscope("one-as") unordered, align 4 3781 store i32 %val, i32 addrspace(1)* %out 3782 ret void 3783} 3784 3785define amdgpu_kernel void @global_system_one_as_monotonic_load( 3786; GFX6-LABEL: global_system_one_as_monotonic_load: 3787; GFX6: ; %bb.0: ; %entry 3788; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 3789; GFX6-NEXT: s_mov_b32 s3, 0x100f000 3790; GFX6-NEXT: s_mov_b32 s2, -1 3791; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3792; GFX6-NEXT: s_mov_b32 s0, s4 3793; GFX6-NEXT: s_mov_b32 s1, s5 3794; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 3795; GFX6-NEXT: s_mov_b32 s4, s6 3796; GFX6-NEXT: s_mov_b32 s5, s7 3797; GFX6-NEXT: s_mov_b32 s6, s2 3798; GFX6-NEXT: s_mov_b32 s7, s3 3799; GFX6-NEXT: s_waitcnt vmcnt(0) 3800; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 3801; GFX6-NEXT: s_endpgm 3802; 3803; GFX7-LABEL: global_system_one_as_monotonic_load: 3804; GFX7: ; %bb.0: ; %entry 3805; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3806; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3807; GFX7-NEXT: v_mov_b32_e32 v0, s0 3808; GFX7-NEXT: v_mov_b32_e32 v1, s1 3809; GFX7-NEXT: flat_load_dword v0, v[0:1] glc 3810; GFX7-NEXT: v_mov_b32_e32 v2, s2 3811; GFX7-NEXT: v_mov_b32_e32 v3, s3 3812; GFX7-NEXT: s_waitcnt vmcnt(0) 3813; GFX7-NEXT: flat_store_dword v[2:3], v0 3814; GFX7-NEXT: s_endpgm 3815; 3816; GFX10-WGP-LABEL: global_system_one_as_monotonic_load: 3817; GFX10-WGP: ; %bb.0: ; %entry 3818; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3819; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3820; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3821; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 3822; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3823; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 3824; GFX10-WGP-NEXT: s_endpgm 3825; 3826; GFX10-CU-LABEL: global_system_one_as_monotonic_load: 3827; GFX10-CU: ; %bb.0: ; %entry 3828; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3829; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3830; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3831; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 3832; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3833; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 3834; GFX10-CU-NEXT: s_endpgm 3835; 3836; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_load: 3837; SKIP-CACHE-INV: ; %bb.0: ; %entry 3838; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 3839; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 3840; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 3841; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3842; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 3843; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 3844; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 3845; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 3846; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 3847; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 3848; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 3849; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3850; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3851; SKIP-CACHE-INV-NEXT: s_endpgm 3852; 3853; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_load: 3854; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3855; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3856; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3857; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3858; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 3859; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3860; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 3861; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3862; 3863; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_load: 3864; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3865; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3866; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3867; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3868; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 3869; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3870; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 3871; GFX90A-TGSPLIT-NEXT: s_endpgm 3872 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 3873entry: 3874 %val = load atomic i32, i32 addrspace(1)* %in syncscope("one-as") monotonic, align 4 3875 store i32 %val, i32 addrspace(1)* %out 3876 ret void 3877} 3878 3879define amdgpu_kernel void @global_system_one_as_acquire_load( 3880; GFX6-LABEL: global_system_one_as_acquire_load: 3881; GFX6: ; %bb.0: ; %entry 3882; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 3883; GFX6-NEXT: s_mov_b32 s3, 0x100f000 3884; GFX6-NEXT: s_mov_b32 s2, -1 3885; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3886; GFX6-NEXT: s_mov_b32 s0, s4 3887; GFX6-NEXT: s_mov_b32 s1, s5 3888; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 3889; GFX6-NEXT: s_waitcnt vmcnt(0) 3890; GFX6-NEXT: buffer_wbinvl1 3891; GFX6-NEXT: s_mov_b32 s4, s6 3892; GFX6-NEXT: s_mov_b32 s5, s7 3893; GFX6-NEXT: s_mov_b32 s6, s2 3894; GFX6-NEXT: s_mov_b32 s7, s3 3895; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 3896; GFX6-NEXT: s_endpgm 3897; 3898; GFX7-LABEL: global_system_one_as_acquire_load: 3899; GFX7: ; %bb.0: ; %entry 3900; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3901; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3902; GFX7-NEXT: v_mov_b32_e32 v0, s0 3903; GFX7-NEXT: v_mov_b32_e32 v1, s1 3904; GFX7-NEXT: flat_load_dword v0, v[0:1] glc 3905; GFX7-NEXT: s_waitcnt vmcnt(0) 3906; GFX7-NEXT: buffer_wbinvl1_vol 3907; GFX7-NEXT: v_mov_b32_e32 v2, s2 3908; GFX7-NEXT: v_mov_b32_e32 v3, s3 3909; GFX7-NEXT: flat_store_dword v[2:3], v0 3910; GFX7-NEXT: s_endpgm 3911; 3912; GFX10-WGP-LABEL: global_system_one_as_acquire_load: 3913; GFX10-WGP: ; %bb.0: ; %entry 3914; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3915; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3916; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3917; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 3918; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3919; GFX10-WGP-NEXT: buffer_gl0_inv 3920; GFX10-WGP-NEXT: buffer_gl1_inv 3921; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 3922; GFX10-WGP-NEXT: s_endpgm 3923; 3924; GFX10-CU-LABEL: global_system_one_as_acquire_load: 3925; GFX10-CU: ; %bb.0: ; %entry 3926; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3927; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3928; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3929; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 3930; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3931; GFX10-CU-NEXT: buffer_gl0_inv 3932; GFX10-CU-NEXT: buffer_gl1_inv 3933; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 3934; GFX10-CU-NEXT: s_endpgm 3935; 3936; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_load: 3937; SKIP-CACHE-INV: ; %bb.0: ; %entry 3938; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 3939; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 3940; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 3941; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3942; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 3943; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 3944; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 3945; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3946; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 3947; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 3948; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 3949; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 3950; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3951; SKIP-CACHE-INV-NEXT: s_endpgm 3952; 3953; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_load: 3954; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3955; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3956; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3957; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3958; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 3959; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3960; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 3961; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 3962; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 3963; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3964; 3965; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_load: 3966; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3967; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3968; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3969; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3970; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 3971; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3972; GFX90A-TGSPLIT-NEXT: buffer_invl2 3973; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 3974; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 3975; GFX90A-TGSPLIT-NEXT: s_endpgm 3976 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 3977entry: 3978 %val = load atomic i32, i32 addrspace(1)* %in syncscope("one-as") acquire, align 4 3979 store i32 %val, i32 addrspace(1)* %out 3980 ret void 3981} 3982 3983define amdgpu_kernel void @global_system_one_as_seq_cst_load( 3984; GFX6-LABEL: global_system_one_as_seq_cst_load: 3985; GFX6: ; %bb.0: ; %entry 3986; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 3987; GFX6-NEXT: s_mov_b32 s3, 0x100f000 3988; GFX6-NEXT: s_mov_b32 s2, -1 3989; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3990; GFX6-NEXT: s_mov_b32 s0, s4 3991; GFX6-NEXT: s_mov_b32 s1, s5 3992; GFX6-NEXT: s_waitcnt vmcnt(0) 3993; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 3994; GFX6-NEXT: s_waitcnt vmcnt(0) 3995; GFX6-NEXT: buffer_wbinvl1 3996; GFX6-NEXT: s_mov_b32 s4, s6 3997; GFX6-NEXT: s_mov_b32 s5, s7 3998; GFX6-NEXT: s_mov_b32 s6, s2 3999; GFX6-NEXT: s_mov_b32 s7, s3 4000; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 4001; GFX6-NEXT: s_endpgm 4002; 4003; GFX7-LABEL: global_system_one_as_seq_cst_load: 4004; GFX7: ; %bb.0: ; %entry 4005; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4006; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4007; GFX7-NEXT: v_mov_b32_e32 v0, s0 4008; GFX7-NEXT: v_mov_b32_e32 v1, s1 4009; GFX7-NEXT: s_waitcnt vmcnt(0) 4010; GFX7-NEXT: flat_load_dword v0, v[0:1] glc 4011; GFX7-NEXT: s_waitcnt vmcnt(0) 4012; GFX7-NEXT: buffer_wbinvl1_vol 4013; GFX7-NEXT: v_mov_b32_e32 v2, s2 4014; GFX7-NEXT: v_mov_b32_e32 v3, s3 4015; GFX7-NEXT: flat_store_dword v[2:3], v0 4016; GFX7-NEXT: s_endpgm 4017; 4018; GFX10-WGP-LABEL: global_system_one_as_seq_cst_load: 4019; GFX10-WGP: ; %bb.0: ; %entry 4020; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4021; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4022; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4023; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4024; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 4025; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4026; GFX10-WGP-NEXT: buffer_gl0_inv 4027; GFX10-WGP-NEXT: buffer_gl1_inv 4028; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 4029; GFX10-WGP-NEXT: s_endpgm 4030; 4031; GFX10-CU-LABEL: global_system_one_as_seq_cst_load: 4032; GFX10-CU: ; %bb.0: ; %entry 4033; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4034; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4035; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4036; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4037; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 4038; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4039; GFX10-CU-NEXT: buffer_gl0_inv 4040; GFX10-CU-NEXT: buffer_gl1_inv 4041; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 4042; GFX10-CU-NEXT: s_endpgm 4043; 4044; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_load: 4045; SKIP-CACHE-INV: ; %bb.0: ; %entry 4046; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 4047; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 4048; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 4049; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4050; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 4051; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 4052; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4053; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 4054; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4055; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 4056; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 4057; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 4058; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 4059; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 4060; SKIP-CACHE-INV-NEXT: s_endpgm 4061; 4062; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_load: 4063; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4064; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4065; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4066; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4067; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 4068; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4069; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 4070; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 4071; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 4072; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4073; 4074; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_load: 4075; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4076; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4077; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4078; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4079; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc 4080; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4081; GFX90A-TGSPLIT-NEXT: buffer_invl2 4082; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 4083; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] 4084; GFX90A-TGSPLIT-NEXT: s_endpgm 4085 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 4086entry: 4087 %val = load atomic i32, i32 addrspace(1)* %in syncscope("one-as") seq_cst, align 4 4088 store i32 %val, i32 addrspace(1)* %out 4089 ret void 4090} 4091 4092define amdgpu_kernel void @global_system_one_as_unordered_store( 4093; GFX6-LABEL: global_system_one_as_unordered_store: 4094; GFX6: ; %bb.0: ; %entry 4095; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 4096; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 4097; GFX6-NEXT: s_mov_b32 s3, 0x100f000 4098; GFX6-NEXT: s_mov_b32 s2, -1 4099; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4100; GFX6-NEXT: v_mov_b32_e32 v0, s6 4101; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 4102; GFX6-NEXT: s_endpgm 4103; 4104; GFX7-LABEL: global_system_one_as_unordered_store: 4105; GFX7: ; %bb.0: ; %entry 4106; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 4107; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 4108; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4109; GFX7-NEXT: v_mov_b32_e32 v2, s2 4110; GFX7-NEXT: v_mov_b32_e32 v0, s0 4111; GFX7-NEXT: v_mov_b32_e32 v1, s1 4112; GFX7-NEXT: flat_store_dword v[0:1], v2 4113; GFX7-NEXT: s_endpgm 4114; 4115; GFX10-WGP-LABEL: global_system_one_as_unordered_store: 4116; GFX10-WGP: ; %bb.0: ; %entry 4117; GFX10-WGP-NEXT: s_clause 0x1 4118; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 4119; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4120; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4121; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4122; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4123; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 4124; GFX10-WGP-NEXT: s_endpgm 4125; 4126; GFX10-CU-LABEL: global_system_one_as_unordered_store: 4127; GFX10-CU: ; %bb.0: ; %entry 4128; GFX10-CU-NEXT: s_clause 0x1 4129; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 4130; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4131; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4132; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4133; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4134; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 4135; GFX10-CU-NEXT: s_endpgm 4136; 4137; SKIP-CACHE-INV-LABEL: global_system_one_as_unordered_store: 4138; SKIP-CACHE-INV: ; %bb.0: ; %entry 4139; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 4140; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4141; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 4142; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 4143; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4144; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 4145; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 4146; SKIP-CACHE-INV-NEXT: s_endpgm 4147; 4148; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_unordered_store: 4149; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4150; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 4151; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4152; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4153; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4154; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4155; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 4156; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4157; 4158; GFX90A-TGSPLIT-LABEL: global_system_one_as_unordered_store: 4159; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4160; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 4161; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4162; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4163; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4164; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4165; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 4166; GFX90A-TGSPLIT-NEXT: s_endpgm 4167 i32 %in, i32 addrspace(1)* %out) { 4168entry: 4169 store atomic i32 %in, i32 addrspace(1)* %out syncscope("one-as") unordered, align 4 4170 ret void 4171} 4172 4173define amdgpu_kernel void @global_system_one_as_monotonic_store( 4174; GFX6-LABEL: global_system_one_as_monotonic_store: 4175; GFX6: ; %bb.0: ; %entry 4176; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 4177; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 4178; GFX6-NEXT: s_mov_b32 s3, 0x100f000 4179; GFX6-NEXT: s_mov_b32 s2, -1 4180; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4181; GFX6-NEXT: v_mov_b32_e32 v0, s6 4182; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 4183; GFX6-NEXT: s_endpgm 4184; 4185; GFX7-LABEL: global_system_one_as_monotonic_store: 4186; GFX7: ; %bb.0: ; %entry 4187; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 4188; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 4189; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4190; GFX7-NEXT: v_mov_b32_e32 v2, s2 4191; GFX7-NEXT: v_mov_b32_e32 v0, s0 4192; GFX7-NEXT: v_mov_b32_e32 v1, s1 4193; GFX7-NEXT: flat_store_dword v[0:1], v2 4194; GFX7-NEXT: s_endpgm 4195; 4196; GFX10-WGP-LABEL: global_system_one_as_monotonic_store: 4197; GFX10-WGP: ; %bb.0: ; %entry 4198; GFX10-WGP-NEXT: s_clause 0x1 4199; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 4200; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4201; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4202; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4203; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4204; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 4205; GFX10-WGP-NEXT: s_endpgm 4206; 4207; GFX10-CU-LABEL: global_system_one_as_monotonic_store: 4208; GFX10-CU: ; %bb.0: ; %entry 4209; GFX10-CU-NEXT: s_clause 0x1 4210; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 4211; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4212; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4213; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4214; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4215; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 4216; GFX10-CU-NEXT: s_endpgm 4217; 4218; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_store: 4219; SKIP-CACHE-INV: ; %bb.0: ; %entry 4220; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 4221; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4222; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 4223; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 4224; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4225; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 4226; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 4227; SKIP-CACHE-INV-NEXT: s_endpgm 4228; 4229; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_store: 4230; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4231; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 4232; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4233; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4234; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4235; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4236; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 4237; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4238; 4239; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_store: 4240; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4241; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 4242; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4243; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4244; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4245; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4246; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 4247; GFX90A-TGSPLIT-NEXT: s_endpgm 4248 i32 %in, i32 addrspace(1)* %out) { 4249entry: 4250 store atomic i32 %in, i32 addrspace(1)* %out syncscope("one-as") monotonic, align 4 4251 ret void 4252} 4253 4254define amdgpu_kernel void @global_system_one_as_release_store( 4255; GFX6-LABEL: global_system_one_as_release_store: 4256; GFX6: ; %bb.0: ; %entry 4257; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 4258; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 4259; GFX6-NEXT: s_mov_b32 s3, 0x100f000 4260; GFX6-NEXT: s_mov_b32 s2, -1 4261; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4262; GFX6-NEXT: v_mov_b32_e32 v0, s6 4263; GFX6-NEXT: s_waitcnt vmcnt(0) 4264; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 4265; GFX6-NEXT: s_endpgm 4266; 4267; GFX7-LABEL: global_system_one_as_release_store: 4268; GFX7: ; %bb.0: ; %entry 4269; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 4270; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 4271; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4272; GFX7-NEXT: v_mov_b32_e32 v2, s2 4273; GFX7-NEXT: v_mov_b32_e32 v0, s0 4274; GFX7-NEXT: v_mov_b32_e32 v1, s1 4275; GFX7-NEXT: s_waitcnt vmcnt(0) 4276; GFX7-NEXT: flat_store_dword v[0:1], v2 4277; GFX7-NEXT: s_endpgm 4278; 4279; GFX10-WGP-LABEL: global_system_one_as_release_store: 4280; GFX10-WGP: ; %bb.0: ; %entry 4281; GFX10-WGP-NEXT: s_clause 0x1 4282; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 4283; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4284; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4285; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4286; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4287; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4288; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4289; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 4290; GFX10-WGP-NEXT: s_endpgm 4291; 4292; GFX10-CU-LABEL: global_system_one_as_release_store: 4293; GFX10-CU: ; %bb.0: ; %entry 4294; GFX10-CU-NEXT: s_clause 0x1 4295; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 4296; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4297; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4298; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4299; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4300; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4301; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4302; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 4303; GFX10-CU-NEXT: s_endpgm 4304; 4305; SKIP-CACHE-INV-LABEL: global_system_one_as_release_store: 4306; SKIP-CACHE-INV: ; %bb.0: ; %entry 4307; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 4308; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4309; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 4310; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 4311; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4312; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 4313; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4314; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 4315; SKIP-CACHE-INV-NEXT: s_endpgm 4316; 4317; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_release_store: 4318; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4319; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 4320; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4321; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4322; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4323; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4324; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 4325; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4326; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 4327; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4328; 4329; GFX90A-TGSPLIT-LABEL: global_system_one_as_release_store: 4330; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4331; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 4332; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4333; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4334; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4335; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4336; GFX90A-TGSPLIT-NEXT: buffer_wbl2 4337; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4338; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 4339; GFX90A-TGSPLIT-NEXT: s_endpgm 4340 i32 %in, i32 addrspace(1)* %out) { 4341entry: 4342 store atomic i32 %in, i32 addrspace(1)* %out syncscope("one-as") release, align 4 4343 ret void 4344} 4345 4346define amdgpu_kernel void @global_system_one_as_seq_cst_store( 4347; GFX6-LABEL: global_system_one_as_seq_cst_store: 4348; GFX6: ; %bb.0: ; %entry 4349; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 4350; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 4351; GFX6-NEXT: s_mov_b32 s3, 0x100f000 4352; GFX6-NEXT: s_mov_b32 s2, -1 4353; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4354; GFX6-NEXT: v_mov_b32_e32 v0, s6 4355; GFX6-NEXT: s_waitcnt vmcnt(0) 4356; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 4357; GFX6-NEXT: s_endpgm 4358; 4359; GFX7-LABEL: global_system_one_as_seq_cst_store: 4360; GFX7: ; %bb.0: ; %entry 4361; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 4362; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 4363; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4364; GFX7-NEXT: v_mov_b32_e32 v2, s2 4365; GFX7-NEXT: v_mov_b32_e32 v0, s0 4366; GFX7-NEXT: v_mov_b32_e32 v1, s1 4367; GFX7-NEXT: s_waitcnt vmcnt(0) 4368; GFX7-NEXT: flat_store_dword v[0:1], v2 4369; GFX7-NEXT: s_endpgm 4370; 4371; GFX10-WGP-LABEL: global_system_one_as_seq_cst_store: 4372; GFX10-WGP: ; %bb.0: ; %entry 4373; GFX10-WGP-NEXT: s_clause 0x1 4374; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 4375; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4376; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4377; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4378; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4379; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4380; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4381; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 4382; GFX10-WGP-NEXT: s_endpgm 4383; 4384; GFX10-CU-LABEL: global_system_one_as_seq_cst_store: 4385; GFX10-CU: ; %bb.0: ; %entry 4386; GFX10-CU-NEXT: s_clause 0x1 4387; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 4388; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4389; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4390; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4391; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4392; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4393; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4394; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 4395; GFX10-CU-NEXT: s_endpgm 4396; 4397; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_store: 4398; SKIP-CACHE-INV: ; %bb.0: ; %entry 4399; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 4400; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4401; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 4402; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 4403; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4404; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 4405; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4406; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 4407; SKIP-CACHE-INV-NEXT: s_endpgm 4408; 4409; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_store: 4410; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4411; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 4412; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4413; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4414; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4415; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4416; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 4417; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4418; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 4419; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4420; 4421; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_store: 4422; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4423; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 4424; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4425; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4426; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4427; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4428; GFX90A-TGSPLIT-NEXT: buffer_wbl2 4429; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4430; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 4431; GFX90A-TGSPLIT-NEXT: s_endpgm 4432 i32 %in, i32 addrspace(1)* %out) { 4433entry: 4434 store atomic i32 %in, i32 addrspace(1)* %out syncscope("one-as") seq_cst, align 4 4435 ret void 4436} 4437 4438define amdgpu_kernel void @global_system_one_as_monotonic_atomicrmw( 4439; GFX6-LABEL: global_system_one_as_monotonic_atomicrmw: 4440; GFX6: ; %bb.0: ; %entry 4441; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4442; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 4443; GFX6-NEXT: s_mov_b32 s3, 0x100f000 4444; GFX6-NEXT: s_mov_b32 s2, -1 4445; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4446; GFX6-NEXT: v_mov_b32_e32 v0, s4 4447; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 4448; GFX6-NEXT: s_endpgm 4449; 4450; GFX7-LABEL: global_system_one_as_monotonic_atomicrmw: 4451; GFX7: ; %bb.0: ; %entry 4452; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4453; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 4454; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4455; GFX7-NEXT: v_mov_b32_e32 v0, s0 4456; GFX7-NEXT: v_mov_b32_e32 v1, s1 4457; GFX7-NEXT: v_mov_b32_e32 v2, s2 4458; GFX7-NEXT: flat_atomic_swap v[0:1], v2 4459; GFX7-NEXT: s_endpgm 4460; 4461; GFX10-WGP-LABEL: global_system_one_as_monotonic_atomicrmw: 4462; GFX10-WGP: ; %bb.0: ; %entry 4463; GFX10-WGP-NEXT: s_clause 0x1 4464; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 4465; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4466; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4467; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4468; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4469; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 4470; GFX10-WGP-NEXT: s_endpgm 4471; 4472; GFX10-CU-LABEL: global_system_one_as_monotonic_atomicrmw: 4473; GFX10-CU: ; %bb.0: ; %entry 4474; GFX10-CU-NEXT: s_clause 0x1 4475; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 4476; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4477; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4478; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4479; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4480; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 4481; GFX10-CU-NEXT: s_endpgm 4482; 4483; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_atomicrmw: 4484; SKIP-CACHE-INV: ; %bb.0: ; %entry 4485; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4486; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4487; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4488; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4489; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4490; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4491; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 4492; SKIP-CACHE-INV-NEXT: s_endpgm 4493; 4494; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_atomicrmw: 4495; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4496; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4497; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4498; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4499; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4500; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4501; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 4502; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4503; 4504; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_atomicrmw: 4505; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4506; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4507; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4508; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4509; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4510; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4511; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 4512; GFX90A-TGSPLIT-NEXT: s_endpgm 4513 i32 addrspace(1)* %out, i32 %in) { 4514entry: 4515 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") monotonic 4516 ret void 4517} 4518 4519define amdgpu_kernel void @global_system_one_as_acquire_atomicrmw( 4520; GFX6-LABEL: global_system_one_as_acquire_atomicrmw: 4521; GFX6: ; %bb.0: ; %entry 4522; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4523; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 4524; GFX6-NEXT: s_mov_b32 s3, 0x100f000 4525; GFX6-NEXT: s_mov_b32 s2, -1 4526; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4527; GFX6-NEXT: v_mov_b32_e32 v0, s4 4528; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 4529; GFX6-NEXT: s_waitcnt vmcnt(0) 4530; GFX6-NEXT: buffer_wbinvl1 4531; GFX6-NEXT: s_endpgm 4532; 4533; GFX7-LABEL: global_system_one_as_acquire_atomicrmw: 4534; GFX7: ; %bb.0: ; %entry 4535; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4536; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 4537; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4538; GFX7-NEXT: v_mov_b32_e32 v0, s0 4539; GFX7-NEXT: v_mov_b32_e32 v1, s1 4540; GFX7-NEXT: v_mov_b32_e32 v2, s2 4541; GFX7-NEXT: flat_atomic_swap v[0:1], v2 4542; GFX7-NEXT: s_waitcnt vmcnt(0) 4543; GFX7-NEXT: buffer_wbinvl1_vol 4544; GFX7-NEXT: s_endpgm 4545; 4546; GFX10-WGP-LABEL: global_system_one_as_acquire_atomicrmw: 4547; GFX10-WGP: ; %bb.0: ; %entry 4548; GFX10-WGP-NEXT: s_clause 0x1 4549; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 4550; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4551; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4552; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4553; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4554; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 4555; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4556; GFX10-WGP-NEXT: buffer_gl0_inv 4557; GFX10-WGP-NEXT: buffer_gl1_inv 4558; GFX10-WGP-NEXT: s_endpgm 4559; 4560; GFX10-CU-LABEL: global_system_one_as_acquire_atomicrmw: 4561; GFX10-CU: ; %bb.0: ; %entry 4562; GFX10-CU-NEXT: s_clause 0x1 4563; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 4564; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4565; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4566; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4567; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4568; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 4569; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4570; GFX10-CU-NEXT: buffer_gl0_inv 4571; GFX10-CU-NEXT: buffer_gl1_inv 4572; GFX10-CU-NEXT: s_endpgm 4573; 4574; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_atomicrmw: 4575; SKIP-CACHE-INV: ; %bb.0: ; %entry 4576; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4577; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4578; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4579; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4580; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4581; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4582; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 4583; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4584; SKIP-CACHE-INV-NEXT: s_endpgm 4585; 4586; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_atomicrmw: 4587; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4588; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4589; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4590; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4591; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4592; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4593; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 4594; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4595; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 4596; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 4597; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4598; 4599; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_atomicrmw: 4600; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4601; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4602; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4603; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4604; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4605; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4606; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 4607; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4608; GFX90A-TGSPLIT-NEXT: buffer_invl2 4609; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 4610; GFX90A-TGSPLIT-NEXT: s_endpgm 4611 i32 addrspace(1)* %out, i32 %in) { 4612entry: 4613 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") acquire 4614 ret void 4615} 4616 4617define amdgpu_kernel void @global_system_one_as_release_atomicrmw( 4618; GFX6-LABEL: global_system_one_as_release_atomicrmw: 4619; GFX6: ; %bb.0: ; %entry 4620; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4621; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 4622; GFX6-NEXT: s_mov_b32 s3, 0x100f000 4623; GFX6-NEXT: s_mov_b32 s2, -1 4624; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4625; GFX6-NEXT: v_mov_b32_e32 v0, s4 4626; GFX6-NEXT: s_waitcnt vmcnt(0) 4627; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 4628; GFX6-NEXT: s_endpgm 4629; 4630; GFX7-LABEL: global_system_one_as_release_atomicrmw: 4631; GFX7: ; %bb.0: ; %entry 4632; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4633; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 4634; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4635; GFX7-NEXT: v_mov_b32_e32 v0, s0 4636; GFX7-NEXT: v_mov_b32_e32 v1, s1 4637; GFX7-NEXT: v_mov_b32_e32 v2, s2 4638; GFX7-NEXT: s_waitcnt vmcnt(0) 4639; GFX7-NEXT: flat_atomic_swap v[0:1], v2 4640; GFX7-NEXT: s_endpgm 4641; 4642; GFX10-WGP-LABEL: global_system_one_as_release_atomicrmw: 4643; GFX10-WGP: ; %bb.0: ; %entry 4644; GFX10-WGP-NEXT: s_clause 0x1 4645; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 4646; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4647; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4648; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4649; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4650; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4651; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4652; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 4653; GFX10-WGP-NEXT: s_endpgm 4654; 4655; GFX10-CU-LABEL: global_system_one_as_release_atomicrmw: 4656; GFX10-CU: ; %bb.0: ; %entry 4657; GFX10-CU-NEXT: s_clause 0x1 4658; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 4659; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4660; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4661; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4662; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4663; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4664; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4665; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 4666; GFX10-CU-NEXT: s_endpgm 4667; 4668; SKIP-CACHE-INV-LABEL: global_system_one_as_release_atomicrmw: 4669; SKIP-CACHE-INV: ; %bb.0: ; %entry 4670; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4671; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4672; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4673; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4674; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4675; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4676; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4677; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 4678; SKIP-CACHE-INV-NEXT: s_endpgm 4679; 4680; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_release_atomicrmw: 4681; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4682; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4683; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4684; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4685; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4686; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4687; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 4688; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4689; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 4690; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4691; 4692; GFX90A-TGSPLIT-LABEL: global_system_one_as_release_atomicrmw: 4693; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4694; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4695; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4696; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4697; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4698; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4699; GFX90A-TGSPLIT-NEXT: buffer_wbl2 4700; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4701; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 4702; GFX90A-TGSPLIT-NEXT: s_endpgm 4703 i32 addrspace(1)* %out, i32 %in) { 4704entry: 4705 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") release 4706 ret void 4707} 4708 4709define amdgpu_kernel void @global_system_one_as_acq_rel_atomicrmw( 4710; GFX6-LABEL: global_system_one_as_acq_rel_atomicrmw: 4711; GFX6: ; %bb.0: ; %entry 4712; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4713; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 4714; GFX6-NEXT: s_mov_b32 s3, 0x100f000 4715; GFX6-NEXT: s_mov_b32 s2, -1 4716; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4717; GFX6-NEXT: v_mov_b32_e32 v0, s4 4718; GFX6-NEXT: s_waitcnt vmcnt(0) 4719; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 4720; GFX6-NEXT: s_waitcnt vmcnt(0) 4721; GFX6-NEXT: buffer_wbinvl1 4722; GFX6-NEXT: s_endpgm 4723; 4724; GFX7-LABEL: global_system_one_as_acq_rel_atomicrmw: 4725; GFX7: ; %bb.0: ; %entry 4726; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4727; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 4728; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4729; GFX7-NEXT: v_mov_b32_e32 v0, s0 4730; GFX7-NEXT: v_mov_b32_e32 v1, s1 4731; GFX7-NEXT: v_mov_b32_e32 v2, s2 4732; GFX7-NEXT: s_waitcnt vmcnt(0) 4733; GFX7-NEXT: flat_atomic_swap v[0:1], v2 4734; GFX7-NEXT: s_waitcnt vmcnt(0) 4735; GFX7-NEXT: buffer_wbinvl1_vol 4736; GFX7-NEXT: s_endpgm 4737; 4738; GFX10-WGP-LABEL: global_system_one_as_acq_rel_atomicrmw: 4739; GFX10-WGP: ; %bb.0: ; %entry 4740; GFX10-WGP-NEXT: s_clause 0x1 4741; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 4742; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4743; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4744; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4745; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4746; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4747; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4748; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 4749; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4750; GFX10-WGP-NEXT: buffer_gl0_inv 4751; GFX10-WGP-NEXT: buffer_gl1_inv 4752; GFX10-WGP-NEXT: s_endpgm 4753; 4754; GFX10-CU-LABEL: global_system_one_as_acq_rel_atomicrmw: 4755; GFX10-CU: ; %bb.0: ; %entry 4756; GFX10-CU-NEXT: s_clause 0x1 4757; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 4758; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4759; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4760; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4761; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4762; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4763; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4764; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 4765; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4766; GFX10-CU-NEXT: buffer_gl0_inv 4767; GFX10-CU-NEXT: buffer_gl1_inv 4768; GFX10-CU-NEXT: s_endpgm 4769; 4770; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_atomicrmw: 4771; SKIP-CACHE-INV: ; %bb.0: ; %entry 4772; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4773; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4774; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4775; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4776; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4777; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4778; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4779; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 4780; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4781; SKIP-CACHE-INV-NEXT: s_endpgm 4782; 4783; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_atomicrmw: 4784; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4785; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4786; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4787; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4788; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4789; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4790; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 4791; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4792; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 4793; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4794; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 4795; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 4796; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4797; 4798; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_atomicrmw: 4799; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4800; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4801; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4802; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4803; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4804; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4805; GFX90A-TGSPLIT-NEXT: buffer_wbl2 4806; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4807; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 4808; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4809; GFX90A-TGSPLIT-NEXT: buffer_invl2 4810; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 4811; GFX90A-TGSPLIT-NEXT: s_endpgm 4812 i32 addrspace(1)* %out, i32 %in) { 4813entry: 4814 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") acq_rel 4815 ret void 4816} 4817 4818define amdgpu_kernel void @global_system_one_as_seq_cst_atomicrmw( 4819; GFX6-LABEL: global_system_one_as_seq_cst_atomicrmw: 4820; GFX6: ; %bb.0: ; %entry 4821; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4822; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 4823; GFX6-NEXT: s_mov_b32 s3, 0x100f000 4824; GFX6-NEXT: s_mov_b32 s2, -1 4825; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4826; GFX6-NEXT: v_mov_b32_e32 v0, s4 4827; GFX6-NEXT: s_waitcnt vmcnt(0) 4828; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 4829; GFX6-NEXT: s_waitcnt vmcnt(0) 4830; GFX6-NEXT: buffer_wbinvl1 4831; GFX6-NEXT: s_endpgm 4832; 4833; GFX7-LABEL: global_system_one_as_seq_cst_atomicrmw: 4834; GFX7: ; %bb.0: ; %entry 4835; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4836; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 4837; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4838; GFX7-NEXT: v_mov_b32_e32 v0, s0 4839; GFX7-NEXT: v_mov_b32_e32 v1, s1 4840; GFX7-NEXT: v_mov_b32_e32 v2, s2 4841; GFX7-NEXT: s_waitcnt vmcnt(0) 4842; GFX7-NEXT: flat_atomic_swap v[0:1], v2 4843; GFX7-NEXT: s_waitcnt vmcnt(0) 4844; GFX7-NEXT: buffer_wbinvl1_vol 4845; GFX7-NEXT: s_endpgm 4846; 4847; GFX10-WGP-LABEL: global_system_one_as_seq_cst_atomicrmw: 4848; GFX10-WGP: ; %bb.0: ; %entry 4849; GFX10-WGP-NEXT: s_clause 0x1 4850; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 4851; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4852; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4853; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4854; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4855; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4856; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4857; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 4858; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4859; GFX10-WGP-NEXT: buffer_gl0_inv 4860; GFX10-WGP-NEXT: buffer_gl1_inv 4861; GFX10-WGP-NEXT: s_endpgm 4862; 4863; GFX10-CU-LABEL: global_system_one_as_seq_cst_atomicrmw: 4864; GFX10-CU: ; %bb.0: ; %entry 4865; GFX10-CU-NEXT: s_clause 0x1 4866; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 4867; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4868; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4869; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4870; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4871; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4872; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4873; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 4874; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4875; GFX10-CU-NEXT: buffer_gl0_inv 4876; GFX10-CU-NEXT: buffer_gl1_inv 4877; GFX10-CU-NEXT: s_endpgm 4878; 4879; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_atomicrmw: 4880; SKIP-CACHE-INV: ; %bb.0: ; %entry 4881; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4882; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4883; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4884; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4885; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4886; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4887; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4888; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 4889; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4890; SKIP-CACHE-INV-NEXT: s_endpgm 4891; 4892; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_atomicrmw: 4893; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4894; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4895; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4896; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4897; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4898; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4899; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 4900; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4901; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 4902; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4903; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 4904; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 4905; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4906; 4907; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_atomicrmw: 4908; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4909; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4910; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 4911; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4912; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4913; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4914; GFX90A-TGSPLIT-NEXT: buffer_wbl2 4915; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4916; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 4917; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4918; GFX90A-TGSPLIT-NEXT: buffer_invl2 4919; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 4920; GFX90A-TGSPLIT-NEXT: s_endpgm 4921 i32 addrspace(1)* %out, i32 %in) { 4922entry: 4923 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") seq_cst 4924 ret void 4925} 4926 4927define amdgpu_kernel void @global_system_one_as_acquire_ret_atomicrmw( 4928; GFX6-LABEL: global_system_one_as_acquire_ret_atomicrmw: 4929; GFX6: ; %bb.0: ; %entry 4930; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4931; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 4932; GFX6-NEXT: s_mov_b32 s3, 0x100f000 4933; GFX6-NEXT: s_mov_b32 s2, -1 4934; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4935; GFX6-NEXT: v_mov_b32_e32 v0, s4 4936; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 4937; GFX6-NEXT: s_waitcnt vmcnt(0) 4938; GFX6-NEXT: buffer_wbinvl1 4939; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 4940; GFX6-NEXT: s_endpgm 4941; 4942; GFX7-LABEL: global_system_one_as_acquire_ret_atomicrmw: 4943; GFX7: ; %bb.0: ; %entry 4944; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4945; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 4946; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4947; GFX7-NEXT: v_mov_b32_e32 v0, s0 4948; GFX7-NEXT: v_mov_b32_e32 v1, s1 4949; GFX7-NEXT: v_mov_b32_e32 v2, s2 4950; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 4951; GFX7-NEXT: s_waitcnt vmcnt(0) 4952; GFX7-NEXT: buffer_wbinvl1_vol 4953; GFX7-NEXT: flat_store_dword v[0:1], v2 4954; GFX7-NEXT: s_endpgm 4955; 4956; GFX10-WGP-LABEL: global_system_one_as_acquire_ret_atomicrmw: 4957; GFX10-WGP: ; %bb.0: ; %entry 4958; GFX10-WGP-NEXT: s_clause 0x1 4959; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 4960; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4961; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4962; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4963; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4964; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 4965; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4966; GFX10-WGP-NEXT: buffer_gl0_inv 4967; GFX10-WGP-NEXT: buffer_gl1_inv 4968; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 4969; GFX10-WGP-NEXT: s_endpgm 4970; 4971; GFX10-CU-LABEL: global_system_one_as_acquire_ret_atomicrmw: 4972; GFX10-CU: ; %bb.0: ; %entry 4973; GFX10-CU-NEXT: s_clause 0x1 4974; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 4975; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4976; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4977; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4978; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4979; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 4980; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4981; GFX10-CU-NEXT: buffer_gl0_inv 4982; GFX10-CU-NEXT: buffer_gl1_inv 4983; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 4984; GFX10-CU-NEXT: s_endpgm 4985; 4986; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_ret_atomicrmw: 4987; SKIP-CACHE-INV: ; %bb.0: ; %entry 4988; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4989; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4990; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4991; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4992; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4993; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4994; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 4995; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4996; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 4997; SKIP-CACHE-INV-NEXT: s_endpgm 4998; 4999; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_ret_atomicrmw: 5000; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5001; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5002; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 5003; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5004; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5005; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5006; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 5007; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5008; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 5009; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5010; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 5011; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5012; 5013; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_ret_atomicrmw: 5014; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5015; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5016; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 5017; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5018; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5019; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5020; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 5021; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5022; GFX90A-TGSPLIT-NEXT: buffer_invl2 5023; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5024; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 5025; GFX90A-TGSPLIT-NEXT: s_endpgm 5026 i32 addrspace(1)* %out, i32 %in) { 5027entry: 5028 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") acquire 5029 store i32 %val, i32 addrspace(1)* %out, align 4 5030 ret void 5031} 5032 5033define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw( 5034; GFX6-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 5035; GFX6: ; %bb.0: ; %entry 5036; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5037; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 5038; GFX6-NEXT: s_mov_b32 s3, 0x100f000 5039; GFX6-NEXT: s_mov_b32 s2, -1 5040; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5041; GFX6-NEXT: v_mov_b32_e32 v0, s4 5042; GFX6-NEXT: s_waitcnt vmcnt(0) 5043; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 5044; GFX6-NEXT: s_waitcnt vmcnt(0) 5045; GFX6-NEXT: buffer_wbinvl1 5046; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 5047; GFX6-NEXT: s_endpgm 5048; 5049; GFX7-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 5050; GFX7: ; %bb.0: ; %entry 5051; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5052; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 5053; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5054; GFX7-NEXT: v_mov_b32_e32 v0, s0 5055; GFX7-NEXT: v_mov_b32_e32 v1, s1 5056; GFX7-NEXT: v_mov_b32_e32 v2, s2 5057; GFX7-NEXT: s_waitcnt vmcnt(0) 5058; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 5059; GFX7-NEXT: s_waitcnt vmcnt(0) 5060; GFX7-NEXT: buffer_wbinvl1_vol 5061; GFX7-NEXT: flat_store_dword v[0:1], v2 5062; GFX7-NEXT: s_endpgm 5063; 5064; GFX10-WGP-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 5065; GFX10-WGP: ; %bb.0: ; %entry 5066; GFX10-WGP-NEXT: s_clause 0x1 5067; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 5068; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5069; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 5070; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5071; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 5072; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5073; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5074; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 5075; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5076; GFX10-WGP-NEXT: buffer_gl0_inv 5077; GFX10-WGP-NEXT: buffer_gl1_inv 5078; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 5079; GFX10-WGP-NEXT: s_endpgm 5080; 5081; GFX10-CU-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 5082; GFX10-CU: ; %bb.0: ; %entry 5083; GFX10-CU-NEXT: s_clause 0x1 5084; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 5085; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5086; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 5087; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5088; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 5089; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5090; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5091; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 5092; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5093; GFX10-CU-NEXT: buffer_gl0_inv 5094; GFX10-CU-NEXT: buffer_gl1_inv 5095; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 5096; GFX10-CU-NEXT: s_endpgm 5097; 5098; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 5099; SKIP-CACHE-INV: ; %bb.0: ; %entry 5100; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5101; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 5102; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5103; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5104; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5105; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5106; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5107; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 5108; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5109; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 5110; SKIP-CACHE-INV-NEXT: s_endpgm 5111; 5112; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 5113; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5114; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5115; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 5116; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5117; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5118; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5119; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 5120; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5121; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 5122; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5123; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 5124; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5125; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 5126; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5127; 5128; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 5129; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5130; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5131; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 5132; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5133; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5134; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5135; GFX90A-TGSPLIT-NEXT: buffer_wbl2 5136; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5137; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 5138; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5139; GFX90A-TGSPLIT-NEXT: buffer_invl2 5140; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5141; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 5142; GFX90A-TGSPLIT-NEXT: s_endpgm 5143 i32 addrspace(1)* %out, i32 %in) { 5144entry: 5145 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") acq_rel 5146 store i32 %val, i32 addrspace(1)* %out, align 4 5147 ret void 5148} 5149 5150define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw( 5151; GFX6-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 5152; GFX6: ; %bb.0: ; %entry 5153; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5154; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 5155; GFX6-NEXT: s_mov_b32 s3, 0x100f000 5156; GFX6-NEXT: s_mov_b32 s2, -1 5157; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5158; GFX6-NEXT: v_mov_b32_e32 v0, s4 5159; GFX6-NEXT: s_waitcnt vmcnt(0) 5160; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 5161; GFX6-NEXT: s_waitcnt vmcnt(0) 5162; GFX6-NEXT: buffer_wbinvl1 5163; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 5164; GFX6-NEXT: s_endpgm 5165; 5166; GFX7-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 5167; GFX7: ; %bb.0: ; %entry 5168; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5169; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 5170; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5171; GFX7-NEXT: v_mov_b32_e32 v0, s0 5172; GFX7-NEXT: v_mov_b32_e32 v1, s1 5173; GFX7-NEXT: v_mov_b32_e32 v2, s2 5174; GFX7-NEXT: s_waitcnt vmcnt(0) 5175; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 5176; GFX7-NEXT: s_waitcnt vmcnt(0) 5177; GFX7-NEXT: buffer_wbinvl1_vol 5178; GFX7-NEXT: flat_store_dword v[0:1], v2 5179; GFX7-NEXT: s_endpgm 5180; 5181; GFX10-WGP-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 5182; GFX10-WGP: ; %bb.0: ; %entry 5183; GFX10-WGP-NEXT: s_clause 0x1 5184; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 5185; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5186; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 5187; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5188; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 5189; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5190; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5191; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 5192; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5193; GFX10-WGP-NEXT: buffer_gl0_inv 5194; GFX10-WGP-NEXT: buffer_gl1_inv 5195; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 5196; GFX10-WGP-NEXT: s_endpgm 5197; 5198; GFX10-CU-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 5199; GFX10-CU: ; %bb.0: ; %entry 5200; GFX10-CU-NEXT: s_clause 0x1 5201; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 5202; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5203; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 5204; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5205; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 5206; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5207; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5208; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 5209; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5210; GFX10-CU-NEXT: buffer_gl0_inv 5211; GFX10-CU-NEXT: buffer_gl1_inv 5212; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 5213; GFX10-CU-NEXT: s_endpgm 5214; 5215; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 5216; SKIP-CACHE-INV: ; %bb.0: ; %entry 5217; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5218; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 5219; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5220; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5221; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5222; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5223; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5224; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 5225; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5226; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 5227; SKIP-CACHE-INV-NEXT: s_endpgm 5228; 5229; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 5230; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5231; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5232; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 5233; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5234; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5235; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5236; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 5237; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5238; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 5239; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5240; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 5241; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5242; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 5243; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5244; 5245; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 5246; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5247; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5248; GFX90A-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 5249; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5250; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5251; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5252; GFX90A-TGSPLIT-NEXT: buffer_wbl2 5253; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5254; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 5255; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5256; GFX90A-TGSPLIT-NEXT: buffer_invl2 5257; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5258; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] 5259; GFX90A-TGSPLIT-NEXT: s_endpgm 5260 i32 addrspace(1)* %out, i32 %in) { 5261entry: 5262 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") seq_cst 5263 store i32 %val, i32 addrspace(1)* %out, align 4 5264 ret void 5265} 5266 5267define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_cmpxchg( 5268; GFX6-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 5269; GFX6: ; %bb.0: ; %entry 5270; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5271; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 5272; GFX6-NEXT: s_mov_b32 s3, 0x100f000 5273; GFX6-NEXT: s_mov_b32 s2, -1 5274; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5275; GFX6-NEXT: v_mov_b32_e32 v0, s4 5276; GFX6-NEXT: v_mov_b32_e32 v1, s5 5277; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5278; GFX6-NEXT: s_endpgm 5279; 5280; GFX7-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 5281; GFX7: ; %bb.0: ; %entry 5282; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5283; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 5284; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5285; GFX7-NEXT: s_add_u32 s0, s0, 16 5286; GFX7-NEXT: s_addc_u32 s1, s1, 0 5287; GFX7-NEXT: v_mov_b32_e32 v0, s0 5288; GFX7-NEXT: v_mov_b32_e32 v2, s2 5289; GFX7-NEXT: v_mov_b32_e32 v1, s1 5290; GFX7-NEXT: v_mov_b32_e32 v3, s3 5291; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5292; GFX7-NEXT: s_endpgm 5293; 5294; GFX10-WGP-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 5295; GFX10-WGP: ; %bb.0: ; %entry 5296; GFX10-WGP-NEXT: s_clause 0x1 5297; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5298; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5299; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 5300; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5301; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5302; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 5303; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5304; GFX10-WGP-NEXT: s_endpgm 5305; 5306; GFX10-CU-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 5307; GFX10-CU: ; %bb.0: ; %entry 5308; GFX10-CU-NEXT: s_clause 0x1 5309; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5310; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5311; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 5312; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5313; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5314; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 5315; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5316; GFX10-CU-NEXT: s_endpgm 5317; 5318; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 5319; SKIP-CACHE-INV: ; %bb.0: ; %entry 5320; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5321; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5322; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5323; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5324; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5325; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5326; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5327; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5328; SKIP-CACHE-INV-NEXT: s_endpgm 5329; 5330; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 5331; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5332; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5333; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5334; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5335; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5336; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5337; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5338; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5339; 5340; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 5341; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5342; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5343; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5344; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5345; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5346; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5347; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5348; GFX90A-TGSPLIT-NEXT: s_endpgm 5349 i32 addrspace(1)* %out, i32 %in, i32 %old) { 5350entry: 5351 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 5352 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic 5353 ret void 5354} 5355 5356define amdgpu_kernel void @global_system_one_as_acquire_monotonic_cmpxchg( 5357; GFX6-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 5358; GFX6: ; %bb.0: ; %entry 5359; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5360; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 5361; GFX6-NEXT: s_mov_b32 s3, 0x100f000 5362; GFX6-NEXT: s_mov_b32 s2, -1 5363; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5364; GFX6-NEXT: v_mov_b32_e32 v0, s4 5365; GFX6-NEXT: v_mov_b32_e32 v1, s5 5366; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5367; GFX6-NEXT: s_waitcnt vmcnt(0) 5368; GFX6-NEXT: buffer_wbinvl1 5369; GFX6-NEXT: s_endpgm 5370; 5371; GFX7-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 5372; GFX7: ; %bb.0: ; %entry 5373; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5374; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 5375; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5376; GFX7-NEXT: s_add_u32 s0, s0, 16 5377; GFX7-NEXT: s_addc_u32 s1, s1, 0 5378; GFX7-NEXT: v_mov_b32_e32 v0, s0 5379; GFX7-NEXT: v_mov_b32_e32 v2, s2 5380; GFX7-NEXT: v_mov_b32_e32 v1, s1 5381; GFX7-NEXT: v_mov_b32_e32 v3, s3 5382; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5383; GFX7-NEXT: s_waitcnt vmcnt(0) 5384; GFX7-NEXT: buffer_wbinvl1_vol 5385; GFX7-NEXT: s_endpgm 5386; 5387; GFX10-WGP-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 5388; GFX10-WGP: ; %bb.0: ; %entry 5389; GFX10-WGP-NEXT: s_clause 0x1 5390; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5391; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5392; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 5393; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5394; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5395; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 5396; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5397; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5398; GFX10-WGP-NEXT: buffer_gl0_inv 5399; GFX10-WGP-NEXT: buffer_gl1_inv 5400; GFX10-WGP-NEXT: s_endpgm 5401; 5402; GFX10-CU-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 5403; GFX10-CU: ; %bb.0: ; %entry 5404; GFX10-CU-NEXT: s_clause 0x1 5405; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5406; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5407; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 5408; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5409; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5410; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 5411; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5412; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5413; GFX10-CU-NEXT: buffer_gl0_inv 5414; GFX10-CU-NEXT: buffer_gl1_inv 5415; GFX10-CU-NEXT: s_endpgm 5416; 5417; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 5418; SKIP-CACHE-INV: ; %bb.0: ; %entry 5419; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5420; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5421; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5422; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5423; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5424; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5425; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5426; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5427; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5428; SKIP-CACHE-INV-NEXT: s_endpgm 5429; 5430; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 5431; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5432; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5433; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5434; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5435; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5436; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5437; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5438; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5439; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 5440; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5441; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5442; 5443; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 5444; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5445; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5446; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5447; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5448; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5449; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5450; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5451; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5452; GFX90A-TGSPLIT-NEXT: buffer_invl2 5453; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5454; GFX90A-TGSPLIT-NEXT: s_endpgm 5455 i32 addrspace(1)* %out, i32 %in, i32 %old) { 5456entry: 5457 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 5458 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic 5459 ret void 5460} 5461 5462define amdgpu_kernel void @global_system_one_as_release_monotonic_cmpxchg( 5463; GFX6-LABEL: global_system_one_as_release_monotonic_cmpxchg: 5464; GFX6: ; %bb.0: ; %entry 5465; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5466; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 5467; GFX6-NEXT: s_mov_b32 s3, 0x100f000 5468; GFX6-NEXT: s_mov_b32 s2, -1 5469; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5470; GFX6-NEXT: v_mov_b32_e32 v0, s4 5471; GFX6-NEXT: v_mov_b32_e32 v1, s5 5472; GFX6-NEXT: s_waitcnt vmcnt(0) 5473; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5474; GFX6-NEXT: s_endpgm 5475; 5476; GFX7-LABEL: global_system_one_as_release_monotonic_cmpxchg: 5477; GFX7: ; %bb.0: ; %entry 5478; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5479; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 5480; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5481; GFX7-NEXT: s_add_u32 s0, s0, 16 5482; GFX7-NEXT: s_addc_u32 s1, s1, 0 5483; GFX7-NEXT: v_mov_b32_e32 v0, s0 5484; GFX7-NEXT: v_mov_b32_e32 v2, s2 5485; GFX7-NEXT: v_mov_b32_e32 v1, s1 5486; GFX7-NEXT: v_mov_b32_e32 v3, s3 5487; GFX7-NEXT: s_waitcnt vmcnt(0) 5488; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5489; GFX7-NEXT: s_endpgm 5490; 5491; GFX10-WGP-LABEL: global_system_one_as_release_monotonic_cmpxchg: 5492; GFX10-WGP: ; %bb.0: ; %entry 5493; GFX10-WGP-NEXT: s_clause 0x1 5494; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5495; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5496; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 5497; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5498; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5499; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 5500; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5501; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5502; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5503; GFX10-WGP-NEXT: s_endpgm 5504; 5505; GFX10-CU-LABEL: global_system_one_as_release_monotonic_cmpxchg: 5506; GFX10-CU: ; %bb.0: ; %entry 5507; GFX10-CU-NEXT: s_clause 0x1 5508; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5509; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5510; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 5511; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5512; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5513; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 5514; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5515; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5516; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5517; GFX10-CU-NEXT: s_endpgm 5518; 5519; SKIP-CACHE-INV-LABEL: global_system_one_as_release_monotonic_cmpxchg: 5520; SKIP-CACHE-INV: ; %bb.0: ; %entry 5521; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5522; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5523; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5524; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5525; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5526; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5527; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5528; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5529; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5530; SKIP-CACHE-INV-NEXT: s_endpgm 5531; 5532; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_release_monotonic_cmpxchg: 5533; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5534; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5535; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5536; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5537; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5538; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5539; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 5540; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5541; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5542; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5543; 5544; GFX90A-TGSPLIT-LABEL: global_system_one_as_release_monotonic_cmpxchg: 5545; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5546; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5547; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5548; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5549; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5550; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5551; GFX90A-TGSPLIT-NEXT: buffer_wbl2 5552; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5553; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5554; GFX90A-TGSPLIT-NEXT: s_endpgm 5555 i32 addrspace(1)* %out, i32 %in, i32 %old) { 5556entry: 5557 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 5558 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") release monotonic 5559 ret void 5560} 5561 5562define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_cmpxchg( 5563; GFX6-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 5564; GFX6: ; %bb.0: ; %entry 5565; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5566; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 5567; GFX6-NEXT: s_mov_b32 s3, 0x100f000 5568; GFX6-NEXT: s_mov_b32 s2, -1 5569; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5570; GFX6-NEXT: v_mov_b32_e32 v0, s4 5571; GFX6-NEXT: v_mov_b32_e32 v1, s5 5572; GFX6-NEXT: s_waitcnt vmcnt(0) 5573; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5574; GFX6-NEXT: s_waitcnt vmcnt(0) 5575; GFX6-NEXT: buffer_wbinvl1 5576; GFX6-NEXT: s_endpgm 5577; 5578; GFX7-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 5579; GFX7: ; %bb.0: ; %entry 5580; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5581; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 5582; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5583; GFX7-NEXT: s_add_u32 s0, s0, 16 5584; GFX7-NEXT: s_addc_u32 s1, s1, 0 5585; GFX7-NEXT: v_mov_b32_e32 v0, s0 5586; GFX7-NEXT: v_mov_b32_e32 v2, s2 5587; GFX7-NEXT: v_mov_b32_e32 v1, s1 5588; GFX7-NEXT: v_mov_b32_e32 v3, s3 5589; GFX7-NEXT: s_waitcnt vmcnt(0) 5590; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5591; GFX7-NEXT: s_waitcnt vmcnt(0) 5592; GFX7-NEXT: buffer_wbinvl1_vol 5593; GFX7-NEXT: s_endpgm 5594; 5595; GFX10-WGP-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 5596; GFX10-WGP: ; %bb.0: ; %entry 5597; GFX10-WGP-NEXT: s_clause 0x1 5598; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5599; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5600; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 5601; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5602; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5603; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 5604; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5605; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5606; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5607; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5608; GFX10-WGP-NEXT: buffer_gl0_inv 5609; GFX10-WGP-NEXT: buffer_gl1_inv 5610; GFX10-WGP-NEXT: s_endpgm 5611; 5612; GFX10-CU-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 5613; GFX10-CU: ; %bb.0: ; %entry 5614; GFX10-CU-NEXT: s_clause 0x1 5615; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5616; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5617; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 5618; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5619; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5620; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 5621; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5622; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5623; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5624; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5625; GFX10-CU-NEXT: buffer_gl0_inv 5626; GFX10-CU-NEXT: buffer_gl1_inv 5627; GFX10-CU-NEXT: s_endpgm 5628; 5629; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 5630; SKIP-CACHE-INV: ; %bb.0: ; %entry 5631; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5632; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5633; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5634; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5635; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5636; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5637; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5638; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5639; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5640; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5641; SKIP-CACHE-INV-NEXT: s_endpgm 5642; 5643; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 5644; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5645; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5646; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5647; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5648; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5649; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5650; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 5651; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5652; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5653; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5654; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 5655; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5656; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5657; 5658; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 5659; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5660; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5661; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5662; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5663; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5664; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5665; GFX90A-TGSPLIT-NEXT: buffer_wbl2 5666; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5667; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5668; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5669; GFX90A-TGSPLIT-NEXT: buffer_invl2 5670; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5671; GFX90A-TGSPLIT-NEXT: s_endpgm 5672 i32 addrspace(1)* %out, i32 %in, i32 %old) { 5673entry: 5674 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 5675 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic 5676 ret void 5677} 5678 5679define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_cmpxchg( 5680; GFX6-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 5681; GFX6: ; %bb.0: ; %entry 5682; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5683; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 5684; GFX6-NEXT: s_mov_b32 s3, 0x100f000 5685; GFX6-NEXT: s_mov_b32 s2, -1 5686; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5687; GFX6-NEXT: v_mov_b32_e32 v0, s4 5688; GFX6-NEXT: v_mov_b32_e32 v1, s5 5689; GFX6-NEXT: s_waitcnt vmcnt(0) 5690; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5691; GFX6-NEXT: s_waitcnt vmcnt(0) 5692; GFX6-NEXT: buffer_wbinvl1 5693; GFX6-NEXT: s_endpgm 5694; 5695; GFX7-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 5696; GFX7: ; %bb.0: ; %entry 5697; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5698; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 5699; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5700; GFX7-NEXT: s_add_u32 s0, s0, 16 5701; GFX7-NEXT: s_addc_u32 s1, s1, 0 5702; GFX7-NEXT: v_mov_b32_e32 v0, s0 5703; GFX7-NEXT: v_mov_b32_e32 v2, s2 5704; GFX7-NEXT: v_mov_b32_e32 v1, s1 5705; GFX7-NEXT: v_mov_b32_e32 v3, s3 5706; GFX7-NEXT: s_waitcnt vmcnt(0) 5707; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5708; GFX7-NEXT: s_waitcnt vmcnt(0) 5709; GFX7-NEXT: buffer_wbinvl1_vol 5710; GFX7-NEXT: s_endpgm 5711; 5712; GFX10-WGP-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 5713; GFX10-WGP: ; %bb.0: ; %entry 5714; GFX10-WGP-NEXT: s_clause 0x1 5715; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5716; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5717; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 5718; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5719; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5720; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 5721; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5722; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5723; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5724; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5725; GFX10-WGP-NEXT: buffer_gl0_inv 5726; GFX10-WGP-NEXT: buffer_gl1_inv 5727; GFX10-WGP-NEXT: s_endpgm 5728; 5729; GFX10-CU-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 5730; GFX10-CU: ; %bb.0: ; %entry 5731; GFX10-CU-NEXT: s_clause 0x1 5732; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5733; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5734; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 5735; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5736; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5737; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 5738; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5739; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5740; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5741; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5742; GFX10-CU-NEXT: buffer_gl0_inv 5743; GFX10-CU-NEXT: buffer_gl1_inv 5744; GFX10-CU-NEXT: s_endpgm 5745; 5746; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 5747; SKIP-CACHE-INV: ; %bb.0: ; %entry 5748; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5749; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5750; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5751; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5752; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5753; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5754; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5755; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5756; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5757; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5758; SKIP-CACHE-INV-NEXT: s_endpgm 5759; 5760; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 5761; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5762; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5763; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5764; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5765; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5766; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5767; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 5768; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5769; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5770; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5771; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 5772; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5773; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5774; 5775; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 5776; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5777; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5778; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5779; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5780; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5781; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5782; GFX90A-TGSPLIT-NEXT: buffer_wbl2 5783; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5784; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5785; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5786; GFX90A-TGSPLIT-NEXT: buffer_invl2 5787; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5788; GFX90A-TGSPLIT-NEXT: s_endpgm 5789 i32 addrspace(1)* %out, i32 %in, i32 %old) { 5790entry: 5791 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 5792 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic 5793 ret void 5794} 5795 5796define amdgpu_kernel void @global_system_one_as_acquire_acquire_cmpxchg( 5797; GFX6-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 5798; GFX6: ; %bb.0: ; %entry 5799; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5800; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 5801; GFX6-NEXT: s_mov_b32 s3, 0x100f000 5802; GFX6-NEXT: s_mov_b32 s2, -1 5803; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5804; GFX6-NEXT: v_mov_b32_e32 v0, s4 5805; GFX6-NEXT: v_mov_b32_e32 v1, s5 5806; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5807; GFX6-NEXT: s_waitcnt vmcnt(0) 5808; GFX6-NEXT: buffer_wbinvl1 5809; GFX6-NEXT: s_endpgm 5810; 5811; GFX7-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 5812; GFX7: ; %bb.0: ; %entry 5813; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5814; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 5815; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5816; GFX7-NEXT: s_add_u32 s0, s0, 16 5817; GFX7-NEXT: s_addc_u32 s1, s1, 0 5818; GFX7-NEXT: v_mov_b32_e32 v0, s0 5819; GFX7-NEXT: v_mov_b32_e32 v2, s2 5820; GFX7-NEXT: v_mov_b32_e32 v1, s1 5821; GFX7-NEXT: v_mov_b32_e32 v3, s3 5822; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5823; GFX7-NEXT: s_waitcnt vmcnt(0) 5824; GFX7-NEXT: buffer_wbinvl1_vol 5825; GFX7-NEXT: s_endpgm 5826; 5827; GFX10-WGP-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 5828; GFX10-WGP: ; %bb.0: ; %entry 5829; GFX10-WGP-NEXT: s_clause 0x1 5830; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5831; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5832; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 5833; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5834; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5835; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 5836; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5837; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5838; GFX10-WGP-NEXT: buffer_gl0_inv 5839; GFX10-WGP-NEXT: buffer_gl1_inv 5840; GFX10-WGP-NEXT: s_endpgm 5841; 5842; GFX10-CU-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 5843; GFX10-CU: ; %bb.0: ; %entry 5844; GFX10-CU-NEXT: s_clause 0x1 5845; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5846; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5847; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 5848; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5849; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5850; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 5851; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5852; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5853; GFX10-CU-NEXT: buffer_gl0_inv 5854; GFX10-CU-NEXT: buffer_gl1_inv 5855; GFX10-CU-NEXT: s_endpgm 5856; 5857; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 5858; SKIP-CACHE-INV: ; %bb.0: ; %entry 5859; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5860; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5861; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5862; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5863; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5864; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5865; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5866; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5867; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5868; SKIP-CACHE-INV-NEXT: s_endpgm 5869; 5870; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 5871; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5872; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5873; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5874; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5875; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5876; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5877; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5878; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5879; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 5880; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5881; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5882; 5883; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 5884; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5885; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5886; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5887; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5888; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5889; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5890; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5891; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5892; GFX90A-TGSPLIT-NEXT: buffer_invl2 5893; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5894; GFX90A-TGSPLIT-NEXT: s_endpgm 5895 i32 addrspace(1)* %out, i32 %in, i32 %old) { 5896entry: 5897 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 5898 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire 5899 ret void 5900} 5901 5902define amdgpu_kernel void @global_system_one_as_release_acquire_cmpxchg( 5903; GFX6-LABEL: global_system_one_as_release_acquire_cmpxchg: 5904; GFX6: ; %bb.0: ; %entry 5905; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5906; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 5907; GFX6-NEXT: s_mov_b32 s3, 0x100f000 5908; GFX6-NEXT: s_mov_b32 s2, -1 5909; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5910; GFX6-NEXT: v_mov_b32_e32 v0, s4 5911; GFX6-NEXT: v_mov_b32_e32 v1, s5 5912; GFX6-NEXT: s_waitcnt vmcnt(0) 5913; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5914; GFX6-NEXT: s_waitcnt vmcnt(0) 5915; GFX6-NEXT: buffer_wbinvl1 5916; GFX6-NEXT: s_endpgm 5917; 5918; GFX7-LABEL: global_system_one_as_release_acquire_cmpxchg: 5919; GFX7: ; %bb.0: ; %entry 5920; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5921; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 5922; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5923; GFX7-NEXT: s_add_u32 s0, s0, 16 5924; GFX7-NEXT: s_addc_u32 s1, s1, 0 5925; GFX7-NEXT: v_mov_b32_e32 v0, s0 5926; GFX7-NEXT: v_mov_b32_e32 v2, s2 5927; GFX7-NEXT: v_mov_b32_e32 v1, s1 5928; GFX7-NEXT: v_mov_b32_e32 v3, s3 5929; GFX7-NEXT: s_waitcnt vmcnt(0) 5930; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5931; GFX7-NEXT: s_waitcnt vmcnt(0) 5932; GFX7-NEXT: buffer_wbinvl1_vol 5933; GFX7-NEXT: s_endpgm 5934; 5935; GFX10-WGP-LABEL: global_system_one_as_release_acquire_cmpxchg: 5936; GFX10-WGP: ; %bb.0: ; %entry 5937; GFX10-WGP-NEXT: s_clause 0x1 5938; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5939; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5940; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 5941; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5942; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5943; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 5944; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5945; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5946; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5947; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5948; GFX10-WGP-NEXT: buffer_gl0_inv 5949; GFX10-WGP-NEXT: buffer_gl1_inv 5950; GFX10-WGP-NEXT: s_endpgm 5951; 5952; GFX10-CU-LABEL: global_system_one_as_release_acquire_cmpxchg: 5953; GFX10-CU: ; %bb.0: ; %entry 5954; GFX10-CU-NEXT: s_clause 0x1 5955; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5956; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5957; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 5958; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5959; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5960; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 5961; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5962; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5963; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 5964; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5965; GFX10-CU-NEXT: buffer_gl0_inv 5966; GFX10-CU-NEXT: buffer_gl1_inv 5967; GFX10-CU-NEXT: s_endpgm 5968; 5969; SKIP-CACHE-INV-LABEL: global_system_one_as_release_acquire_cmpxchg: 5970; SKIP-CACHE-INV: ; %bb.0: ; %entry 5971; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5972; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5973; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5974; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5975; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5976; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5977; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5978; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5979; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5980; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5981; SKIP-CACHE-INV-NEXT: s_endpgm 5982; 5983; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_release_acquire_cmpxchg: 5984; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5985; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5986; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 5987; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 5988; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5989; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 5990; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 5991; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5992; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 5993; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5994; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 5995; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5996; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5997; 5998; GFX90A-TGSPLIT-LABEL: global_system_one_as_release_acquire_cmpxchg: 5999; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6000; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6001; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6002; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6003; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6004; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6005; GFX90A-TGSPLIT-NEXT: buffer_wbl2 6006; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6007; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 6008; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6009; GFX90A-TGSPLIT-NEXT: buffer_invl2 6010; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6011; GFX90A-TGSPLIT-NEXT: s_endpgm 6012 i32 addrspace(1)* %out, i32 %in, i32 %old) { 6013entry: 6014 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 6015 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") release acquire 6016 ret void 6017} 6018 6019define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_cmpxchg( 6020; GFX6-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 6021; GFX6: ; %bb.0: ; %entry 6022; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6023; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 6024; GFX6-NEXT: s_mov_b32 s3, 0x100f000 6025; GFX6-NEXT: s_mov_b32 s2, -1 6026; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6027; GFX6-NEXT: v_mov_b32_e32 v0, s4 6028; GFX6-NEXT: v_mov_b32_e32 v1, s5 6029; GFX6-NEXT: s_waitcnt vmcnt(0) 6030; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 6031; GFX6-NEXT: s_waitcnt vmcnt(0) 6032; GFX6-NEXT: buffer_wbinvl1 6033; GFX6-NEXT: s_endpgm 6034; 6035; GFX7-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 6036; GFX7: ; %bb.0: ; %entry 6037; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6038; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 6039; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6040; GFX7-NEXT: s_add_u32 s0, s0, 16 6041; GFX7-NEXT: s_addc_u32 s1, s1, 0 6042; GFX7-NEXT: v_mov_b32_e32 v0, s0 6043; GFX7-NEXT: v_mov_b32_e32 v2, s2 6044; GFX7-NEXT: v_mov_b32_e32 v1, s1 6045; GFX7-NEXT: v_mov_b32_e32 v3, s3 6046; GFX7-NEXT: s_waitcnt vmcnt(0) 6047; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 6048; GFX7-NEXT: s_waitcnt vmcnt(0) 6049; GFX7-NEXT: buffer_wbinvl1_vol 6050; GFX7-NEXT: s_endpgm 6051; 6052; GFX10-WGP-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 6053; GFX10-WGP: ; %bb.0: ; %entry 6054; GFX10-WGP-NEXT: s_clause 0x1 6055; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6056; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6057; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 6058; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6059; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 6060; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 6061; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6062; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6063; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 6064; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6065; GFX10-WGP-NEXT: buffer_gl0_inv 6066; GFX10-WGP-NEXT: buffer_gl1_inv 6067; GFX10-WGP-NEXT: s_endpgm 6068; 6069; GFX10-CU-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 6070; GFX10-CU: ; %bb.0: ; %entry 6071; GFX10-CU-NEXT: s_clause 0x1 6072; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6073; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6074; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 6075; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6076; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 6077; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 6078; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6079; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6080; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 6081; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6082; GFX10-CU-NEXT: buffer_gl0_inv 6083; GFX10-CU-NEXT: buffer_gl1_inv 6084; GFX10-CU-NEXT: s_endpgm 6085; 6086; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 6087; SKIP-CACHE-INV: ; %bb.0: ; %entry 6088; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 6089; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 6090; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 6091; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 6092; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6093; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 6094; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 6095; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6096; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 6097; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6098; SKIP-CACHE-INV-NEXT: s_endpgm 6099; 6100; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 6101; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6102; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6103; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6104; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6105; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6106; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6107; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 6108; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6109; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 6110; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6111; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 6112; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6113; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6114; 6115; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 6116; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6117; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6118; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6119; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6120; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6121; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6122; GFX90A-TGSPLIT-NEXT: buffer_wbl2 6123; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6124; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 6125; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6126; GFX90A-TGSPLIT-NEXT: buffer_invl2 6127; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6128; GFX90A-TGSPLIT-NEXT: s_endpgm 6129 i32 addrspace(1)* %out, i32 %in, i32 %old) { 6130entry: 6131 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 6132 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire 6133 ret void 6134} 6135 6136define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_cmpxchg( 6137; GFX6-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 6138; GFX6: ; %bb.0: ; %entry 6139; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6140; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 6141; GFX6-NEXT: s_mov_b32 s3, 0x100f000 6142; GFX6-NEXT: s_mov_b32 s2, -1 6143; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6144; GFX6-NEXT: v_mov_b32_e32 v0, s4 6145; GFX6-NEXT: v_mov_b32_e32 v1, s5 6146; GFX6-NEXT: s_waitcnt vmcnt(0) 6147; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 6148; GFX6-NEXT: s_waitcnt vmcnt(0) 6149; GFX6-NEXT: buffer_wbinvl1 6150; GFX6-NEXT: s_endpgm 6151; 6152; GFX7-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 6153; GFX7: ; %bb.0: ; %entry 6154; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6155; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 6156; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6157; GFX7-NEXT: s_add_u32 s0, s0, 16 6158; GFX7-NEXT: s_addc_u32 s1, s1, 0 6159; GFX7-NEXT: v_mov_b32_e32 v0, s0 6160; GFX7-NEXT: v_mov_b32_e32 v2, s2 6161; GFX7-NEXT: v_mov_b32_e32 v1, s1 6162; GFX7-NEXT: v_mov_b32_e32 v3, s3 6163; GFX7-NEXT: s_waitcnt vmcnt(0) 6164; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 6165; GFX7-NEXT: s_waitcnt vmcnt(0) 6166; GFX7-NEXT: buffer_wbinvl1_vol 6167; GFX7-NEXT: s_endpgm 6168; 6169; GFX10-WGP-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 6170; GFX10-WGP: ; %bb.0: ; %entry 6171; GFX10-WGP-NEXT: s_clause 0x1 6172; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6173; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6174; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 6175; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6176; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 6177; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 6178; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6179; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6180; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 6181; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6182; GFX10-WGP-NEXT: buffer_gl0_inv 6183; GFX10-WGP-NEXT: buffer_gl1_inv 6184; GFX10-WGP-NEXT: s_endpgm 6185; 6186; GFX10-CU-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 6187; GFX10-CU: ; %bb.0: ; %entry 6188; GFX10-CU-NEXT: s_clause 0x1 6189; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6190; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6191; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 6192; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6193; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 6194; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 6195; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6196; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6197; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 6198; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6199; GFX10-CU-NEXT: buffer_gl0_inv 6200; GFX10-CU-NEXT: buffer_gl1_inv 6201; GFX10-CU-NEXT: s_endpgm 6202; 6203; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 6204; SKIP-CACHE-INV: ; %bb.0: ; %entry 6205; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 6206; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 6207; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 6208; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 6209; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6210; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 6211; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 6212; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6213; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 6214; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6215; SKIP-CACHE-INV-NEXT: s_endpgm 6216; 6217; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 6218; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6219; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6220; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6221; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6222; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6223; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6224; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 6225; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6226; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 6227; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6228; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 6229; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6230; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6231; 6232; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 6233; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6234; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6235; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6236; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6237; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6238; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6239; GFX90A-TGSPLIT-NEXT: buffer_wbl2 6240; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6241; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 6242; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6243; GFX90A-TGSPLIT-NEXT: buffer_invl2 6244; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6245; GFX90A-TGSPLIT-NEXT: s_endpgm 6246 i32 addrspace(1)* %out, i32 %in, i32 %old) { 6247entry: 6248 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 6249 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire 6250 ret void 6251} 6252 6253define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_cmpxchg( 6254; GFX6-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 6255; GFX6: ; %bb.0: ; %entry 6256; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6257; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 6258; GFX6-NEXT: s_mov_b32 s3, 0x100f000 6259; GFX6-NEXT: s_mov_b32 s2, -1 6260; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6261; GFX6-NEXT: v_mov_b32_e32 v0, s4 6262; GFX6-NEXT: v_mov_b32_e32 v1, s5 6263; GFX6-NEXT: s_waitcnt vmcnt(0) 6264; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 6265; GFX6-NEXT: s_waitcnt vmcnt(0) 6266; GFX6-NEXT: buffer_wbinvl1 6267; GFX6-NEXT: s_endpgm 6268; 6269; GFX7-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 6270; GFX7: ; %bb.0: ; %entry 6271; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6272; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 6273; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6274; GFX7-NEXT: s_add_u32 s0, s0, 16 6275; GFX7-NEXT: s_addc_u32 s1, s1, 0 6276; GFX7-NEXT: v_mov_b32_e32 v0, s0 6277; GFX7-NEXT: v_mov_b32_e32 v2, s2 6278; GFX7-NEXT: v_mov_b32_e32 v1, s1 6279; GFX7-NEXT: v_mov_b32_e32 v3, s3 6280; GFX7-NEXT: s_waitcnt vmcnt(0) 6281; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 6282; GFX7-NEXT: s_waitcnt vmcnt(0) 6283; GFX7-NEXT: buffer_wbinvl1_vol 6284; GFX7-NEXT: s_endpgm 6285; 6286; GFX10-WGP-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 6287; GFX10-WGP: ; %bb.0: ; %entry 6288; GFX10-WGP-NEXT: s_clause 0x1 6289; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6290; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6291; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 6292; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6293; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 6294; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 6295; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6296; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6297; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 6298; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6299; GFX10-WGP-NEXT: buffer_gl0_inv 6300; GFX10-WGP-NEXT: buffer_gl1_inv 6301; GFX10-WGP-NEXT: s_endpgm 6302; 6303; GFX10-CU-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 6304; GFX10-CU: ; %bb.0: ; %entry 6305; GFX10-CU-NEXT: s_clause 0x1 6306; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6307; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6308; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 6309; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6310; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 6311; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 6312; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6313; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6314; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 6315; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6316; GFX10-CU-NEXT: buffer_gl0_inv 6317; GFX10-CU-NEXT: buffer_gl1_inv 6318; GFX10-CU-NEXT: s_endpgm 6319; 6320; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 6321; SKIP-CACHE-INV: ; %bb.0: ; %entry 6322; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 6323; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 6324; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 6325; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 6326; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6327; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 6328; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 6329; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6330; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 6331; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6332; SKIP-CACHE-INV-NEXT: s_endpgm 6333; 6334; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 6335; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6336; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6337; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6338; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6339; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6340; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6341; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 6342; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6343; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 6344; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6345; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 6346; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6347; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6348; 6349; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 6350; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6351; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6352; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6353; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6354; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6355; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6356; GFX90A-TGSPLIT-NEXT: buffer_wbl2 6357; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6358; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 6359; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6360; GFX90A-TGSPLIT-NEXT: buffer_invl2 6361; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6362; GFX90A-TGSPLIT-NEXT: s_endpgm 6363 i32 addrspace(1)* %out, i32 %in, i32 %old) { 6364entry: 6365 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 6366 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst 6367 ret void 6368} 6369 6370define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg( 6371; GFX6-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 6372; GFX6: ; %bb.0: ; %entry 6373; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6374; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 6375; GFX6-NEXT: s_mov_b32 s3, 0x100f000 6376; GFX6-NEXT: s_mov_b32 s2, -1 6377; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6378; GFX6-NEXT: v_mov_b32_e32 v0, s4 6379; GFX6-NEXT: v_mov_b32_e32 v1, s5 6380; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 6381; GFX6-NEXT: s_waitcnt vmcnt(0) 6382; GFX6-NEXT: buffer_wbinvl1 6383; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 6384; GFX6-NEXT: s_endpgm 6385; 6386; GFX7-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 6387; GFX7: ; %bb.0: ; %entry 6388; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6389; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 6390; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6391; GFX7-NEXT: s_add_u32 s4, s0, 16 6392; GFX7-NEXT: s_addc_u32 s5, s1, 0 6393; GFX7-NEXT: v_mov_b32_e32 v0, s4 6394; GFX7-NEXT: v_mov_b32_e32 v2, s2 6395; GFX7-NEXT: v_mov_b32_e32 v1, s5 6396; GFX7-NEXT: v_mov_b32_e32 v3, s3 6397; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6398; GFX7-NEXT: s_waitcnt vmcnt(0) 6399; GFX7-NEXT: buffer_wbinvl1_vol 6400; GFX7-NEXT: v_mov_b32_e32 v0, s0 6401; GFX7-NEXT: v_mov_b32_e32 v1, s1 6402; GFX7-NEXT: flat_store_dword v[0:1], v2 6403; GFX7-NEXT: s_endpgm 6404; 6405; GFX10-WGP-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 6406; GFX10-WGP: ; %bb.0: ; %entry 6407; GFX10-WGP-NEXT: s_clause 0x1 6408; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6409; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6410; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 6411; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6412; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 6413; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 6414; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6415; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6416; GFX10-WGP-NEXT: buffer_gl0_inv 6417; GFX10-WGP-NEXT: buffer_gl1_inv 6418; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 6419; GFX10-WGP-NEXT: s_endpgm 6420; 6421; GFX10-CU-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 6422; GFX10-CU: ; %bb.0: ; %entry 6423; GFX10-CU-NEXT: s_clause 0x1 6424; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6425; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6426; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 6427; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6428; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 6429; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 6430; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6431; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6432; GFX10-CU-NEXT: buffer_gl0_inv 6433; GFX10-CU-NEXT: buffer_gl1_inv 6434; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 6435; GFX10-CU-NEXT: s_endpgm 6436; 6437; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 6438; SKIP-CACHE-INV: ; %bb.0: ; %entry 6439; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 6440; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 6441; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 6442; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 6443; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6444; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 6445; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 6446; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 6447; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6448; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 6449; SKIP-CACHE-INV-NEXT: s_endpgm 6450; 6451; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 6452; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6453; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6454; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6455; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6456; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6457; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6458; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6459; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6460; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 6461; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6462; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6463; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6464; 6465; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 6466; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6467; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6468; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6469; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6470; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6471; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6472; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6473; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6474; GFX90A-TGSPLIT-NEXT: buffer_invl2 6475; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6476; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6477; GFX90A-TGSPLIT-NEXT: s_endpgm 6478 i32 addrspace(1)* %out, i32 %in, i32 %old) { 6479entry: 6480 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 6481 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic 6482 %val0 = extractvalue { i32, i1 } %val, 0 6483 store i32 %val0, i32 addrspace(1)* %out, align 4 6484 ret void 6485} 6486 6487define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg( 6488; GFX6-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 6489; GFX6: ; %bb.0: ; %entry 6490; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6491; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 6492; GFX6-NEXT: s_mov_b32 s3, 0x100f000 6493; GFX6-NEXT: s_mov_b32 s2, -1 6494; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6495; GFX6-NEXT: v_mov_b32_e32 v0, s4 6496; GFX6-NEXT: v_mov_b32_e32 v1, s5 6497; GFX6-NEXT: s_waitcnt vmcnt(0) 6498; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 6499; GFX6-NEXT: s_waitcnt vmcnt(0) 6500; GFX6-NEXT: buffer_wbinvl1 6501; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 6502; GFX6-NEXT: s_endpgm 6503; 6504; GFX7-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 6505; GFX7: ; %bb.0: ; %entry 6506; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6507; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 6508; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6509; GFX7-NEXT: s_add_u32 s4, s0, 16 6510; GFX7-NEXT: s_addc_u32 s5, s1, 0 6511; GFX7-NEXT: v_mov_b32_e32 v0, s4 6512; GFX7-NEXT: v_mov_b32_e32 v2, s2 6513; GFX7-NEXT: v_mov_b32_e32 v1, s5 6514; GFX7-NEXT: v_mov_b32_e32 v3, s3 6515; GFX7-NEXT: s_waitcnt vmcnt(0) 6516; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6517; GFX7-NEXT: s_waitcnt vmcnt(0) 6518; GFX7-NEXT: buffer_wbinvl1_vol 6519; GFX7-NEXT: v_mov_b32_e32 v0, s0 6520; GFX7-NEXT: v_mov_b32_e32 v1, s1 6521; GFX7-NEXT: flat_store_dword v[0:1], v2 6522; GFX7-NEXT: s_endpgm 6523; 6524; GFX10-WGP-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 6525; GFX10-WGP: ; %bb.0: ; %entry 6526; GFX10-WGP-NEXT: s_clause 0x1 6527; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6528; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6529; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 6530; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6531; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 6532; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 6533; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6534; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6535; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6536; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6537; GFX10-WGP-NEXT: buffer_gl0_inv 6538; GFX10-WGP-NEXT: buffer_gl1_inv 6539; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 6540; GFX10-WGP-NEXT: s_endpgm 6541; 6542; GFX10-CU-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 6543; GFX10-CU: ; %bb.0: ; %entry 6544; GFX10-CU-NEXT: s_clause 0x1 6545; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6546; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6547; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 6548; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6549; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 6550; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 6551; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6552; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6553; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6554; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6555; GFX10-CU-NEXT: buffer_gl0_inv 6556; GFX10-CU-NEXT: buffer_gl1_inv 6557; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 6558; GFX10-CU-NEXT: s_endpgm 6559; 6560; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 6561; SKIP-CACHE-INV: ; %bb.0: ; %entry 6562; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 6563; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 6564; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 6565; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 6566; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6567; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 6568; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 6569; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6570; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 6571; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6572; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 6573; SKIP-CACHE-INV-NEXT: s_endpgm 6574; 6575; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 6576; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6577; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6578; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6579; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6580; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6581; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6582; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 6583; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6584; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6585; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6586; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 6587; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6588; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6589; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6590; 6591; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 6592; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6593; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6594; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6595; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6596; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6597; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6598; GFX90A-TGSPLIT-NEXT: buffer_wbl2 6599; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6600; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6601; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6602; GFX90A-TGSPLIT-NEXT: buffer_invl2 6603; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6604; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6605; GFX90A-TGSPLIT-NEXT: s_endpgm 6606 i32 addrspace(1)* %out, i32 %in, i32 %old) { 6607entry: 6608 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 6609 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic 6610 %val0 = extractvalue { i32, i1 } %val, 0 6611 store i32 %val0, i32 addrspace(1)* %out, align 4 6612 ret void 6613} 6614 6615define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg( 6616; GFX6-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 6617; GFX6: ; %bb.0: ; %entry 6618; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6619; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 6620; GFX6-NEXT: s_mov_b32 s3, 0x100f000 6621; GFX6-NEXT: s_mov_b32 s2, -1 6622; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6623; GFX6-NEXT: v_mov_b32_e32 v0, s4 6624; GFX6-NEXT: v_mov_b32_e32 v1, s5 6625; GFX6-NEXT: s_waitcnt vmcnt(0) 6626; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 6627; GFX6-NEXT: s_waitcnt vmcnt(0) 6628; GFX6-NEXT: buffer_wbinvl1 6629; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 6630; GFX6-NEXT: s_endpgm 6631; 6632; GFX7-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 6633; GFX7: ; %bb.0: ; %entry 6634; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6635; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 6636; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6637; GFX7-NEXT: s_add_u32 s4, s0, 16 6638; GFX7-NEXT: s_addc_u32 s5, s1, 0 6639; GFX7-NEXT: v_mov_b32_e32 v0, s4 6640; GFX7-NEXT: v_mov_b32_e32 v2, s2 6641; GFX7-NEXT: v_mov_b32_e32 v1, s5 6642; GFX7-NEXT: v_mov_b32_e32 v3, s3 6643; GFX7-NEXT: s_waitcnt vmcnt(0) 6644; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6645; GFX7-NEXT: s_waitcnt vmcnt(0) 6646; GFX7-NEXT: buffer_wbinvl1_vol 6647; GFX7-NEXT: v_mov_b32_e32 v0, s0 6648; GFX7-NEXT: v_mov_b32_e32 v1, s1 6649; GFX7-NEXT: flat_store_dword v[0:1], v2 6650; GFX7-NEXT: s_endpgm 6651; 6652; GFX10-WGP-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 6653; GFX10-WGP: ; %bb.0: ; %entry 6654; GFX10-WGP-NEXT: s_clause 0x1 6655; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6656; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6657; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 6658; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6659; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 6660; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 6661; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6662; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6663; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6664; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6665; GFX10-WGP-NEXT: buffer_gl0_inv 6666; GFX10-WGP-NEXT: buffer_gl1_inv 6667; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 6668; GFX10-WGP-NEXT: s_endpgm 6669; 6670; GFX10-CU-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 6671; GFX10-CU: ; %bb.0: ; %entry 6672; GFX10-CU-NEXT: s_clause 0x1 6673; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6674; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6675; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 6676; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6677; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 6678; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 6679; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6680; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6681; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6682; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6683; GFX10-CU-NEXT: buffer_gl0_inv 6684; GFX10-CU-NEXT: buffer_gl1_inv 6685; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 6686; GFX10-CU-NEXT: s_endpgm 6687; 6688; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 6689; SKIP-CACHE-INV: ; %bb.0: ; %entry 6690; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 6691; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 6692; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 6693; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 6694; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6695; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 6696; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 6697; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6698; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 6699; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6700; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 6701; SKIP-CACHE-INV-NEXT: s_endpgm 6702; 6703; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 6704; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6705; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6706; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6707; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6708; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6709; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6710; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 6711; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6712; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6713; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6714; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 6715; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6716; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6717; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6718; 6719; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 6720; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6721; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6722; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6723; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6724; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6725; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6726; GFX90A-TGSPLIT-NEXT: buffer_wbl2 6727; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6728; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6729; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6730; GFX90A-TGSPLIT-NEXT: buffer_invl2 6731; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6732; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6733; GFX90A-TGSPLIT-NEXT: s_endpgm 6734 i32 addrspace(1)* %out, i32 %in, i32 %old) { 6735entry: 6736 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 6737 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic 6738 %val0 = extractvalue { i32, i1 } %val, 0 6739 store i32 %val0, i32 addrspace(1)* %out, align 4 6740 ret void 6741} 6742 6743define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg( 6744; GFX6-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 6745; GFX6: ; %bb.0: ; %entry 6746; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6747; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 6748; GFX6-NEXT: s_mov_b32 s3, 0x100f000 6749; GFX6-NEXT: s_mov_b32 s2, -1 6750; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6751; GFX6-NEXT: v_mov_b32_e32 v0, s4 6752; GFX6-NEXT: v_mov_b32_e32 v1, s5 6753; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 6754; GFX6-NEXT: s_waitcnt vmcnt(0) 6755; GFX6-NEXT: buffer_wbinvl1 6756; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 6757; GFX6-NEXT: s_endpgm 6758; 6759; GFX7-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 6760; GFX7: ; %bb.0: ; %entry 6761; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6762; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 6763; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6764; GFX7-NEXT: s_add_u32 s4, s0, 16 6765; GFX7-NEXT: s_addc_u32 s5, s1, 0 6766; GFX7-NEXT: v_mov_b32_e32 v0, s4 6767; GFX7-NEXT: v_mov_b32_e32 v2, s2 6768; GFX7-NEXT: v_mov_b32_e32 v1, s5 6769; GFX7-NEXT: v_mov_b32_e32 v3, s3 6770; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6771; GFX7-NEXT: s_waitcnt vmcnt(0) 6772; GFX7-NEXT: buffer_wbinvl1_vol 6773; GFX7-NEXT: v_mov_b32_e32 v0, s0 6774; GFX7-NEXT: v_mov_b32_e32 v1, s1 6775; GFX7-NEXT: flat_store_dword v[0:1], v2 6776; GFX7-NEXT: s_endpgm 6777; 6778; GFX10-WGP-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 6779; GFX10-WGP: ; %bb.0: ; %entry 6780; GFX10-WGP-NEXT: s_clause 0x1 6781; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6782; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6783; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 6784; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6785; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 6786; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 6787; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6788; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6789; GFX10-WGP-NEXT: buffer_gl0_inv 6790; GFX10-WGP-NEXT: buffer_gl1_inv 6791; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 6792; GFX10-WGP-NEXT: s_endpgm 6793; 6794; GFX10-CU-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 6795; GFX10-CU: ; %bb.0: ; %entry 6796; GFX10-CU-NEXT: s_clause 0x1 6797; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6798; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6799; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 6800; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6801; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 6802; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 6803; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6804; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6805; GFX10-CU-NEXT: buffer_gl0_inv 6806; GFX10-CU-NEXT: buffer_gl1_inv 6807; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 6808; GFX10-CU-NEXT: s_endpgm 6809; 6810; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 6811; SKIP-CACHE-INV: ; %bb.0: ; %entry 6812; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 6813; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 6814; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 6815; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 6816; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6817; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 6818; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 6819; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 6820; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6821; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 6822; SKIP-CACHE-INV-NEXT: s_endpgm 6823; 6824; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 6825; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6826; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6827; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6828; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6829; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6830; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6831; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6832; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6833; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 6834; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6835; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6836; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6837; 6838; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 6839; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6840; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6841; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6842; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6843; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6844; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6845; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6846; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6847; GFX90A-TGSPLIT-NEXT: buffer_invl2 6848; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6849; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6850; GFX90A-TGSPLIT-NEXT: s_endpgm 6851 i32 addrspace(1)* %out, i32 %in, i32 %old) { 6852entry: 6853 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 6854 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire 6855 %val0 = extractvalue { i32, i1 } %val, 0 6856 store i32 %val0, i32 addrspace(1)* %out, align 4 6857 ret void 6858} 6859 6860define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg( 6861; GFX6-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 6862; GFX6: ; %bb.0: ; %entry 6863; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6864; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 6865; GFX6-NEXT: s_mov_b32 s3, 0x100f000 6866; GFX6-NEXT: s_mov_b32 s2, -1 6867; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6868; GFX6-NEXT: v_mov_b32_e32 v0, s4 6869; GFX6-NEXT: v_mov_b32_e32 v1, s5 6870; GFX6-NEXT: s_waitcnt vmcnt(0) 6871; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 6872; GFX6-NEXT: s_waitcnt vmcnt(0) 6873; GFX6-NEXT: buffer_wbinvl1 6874; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 6875; GFX6-NEXT: s_endpgm 6876; 6877; GFX7-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 6878; GFX7: ; %bb.0: ; %entry 6879; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6880; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 6881; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6882; GFX7-NEXT: s_add_u32 s4, s0, 16 6883; GFX7-NEXT: s_addc_u32 s5, s1, 0 6884; GFX7-NEXT: v_mov_b32_e32 v0, s4 6885; GFX7-NEXT: v_mov_b32_e32 v2, s2 6886; GFX7-NEXT: v_mov_b32_e32 v1, s5 6887; GFX7-NEXT: v_mov_b32_e32 v3, s3 6888; GFX7-NEXT: s_waitcnt vmcnt(0) 6889; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6890; GFX7-NEXT: s_waitcnt vmcnt(0) 6891; GFX7-NEXT: buffer_wbinvl1_vol 6892; GFX7-NEXT: v_mov_b32_e32 v0, s0 6893; GFX7-NEXT: v_mov_b32_e32 v1, s1 6894; GFX7-NEXT: flat_store_dword v[0:1], v2 6895; GFX7-NEXT: s_endpgm 6896; 6897; GFX10-WGP-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 6898; GFX10-WGP: ; %bb.0: ; %entry 6899; GFX10-WGP-NEXT: s_clause 0x1 6900; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6901; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6902; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 6903; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6904; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 6905; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 6906; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6907; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6908; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6909; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6910; GFX10-WGP-NEXT: buffer_gl0_inv 6911; GFX10-WGP-NEXT: buffer_gl1_inv 6912; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 6913; GFX10-WGP-NEXT: s_endpgm 6914; 6915; GFX10-CU-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 6916; GFX10-CU: ; %bb.0: ; %entry 6917; GFX10-CU-NEXT: s_clause 0x1 6918; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 6919; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 6920; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 6921; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6922; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 6923; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 6924; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6925; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6926; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 6927; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6928; GFX10-CU-NEXT: buffer_gl0_inv 6929; GFX10-CU-NEXT: buffer_gl1_inv 6930; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 6931; GFX10-CU-NEXT: s_endpgm 6932; 6933; SKIP-CACHE-INV-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 6934; SKIP-CACHE-INV: ; %bb.0: ; %entry 6935; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 6936; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 6937; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 6938; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 6939; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6940; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 6941; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 6942; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6943; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 6944; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6945; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 6946; SKIP-CACHE-INV-NEXT: s_endpgm 6947; 6948; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 6949; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6950; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6951; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6952; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6953; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6954; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6955; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 6956; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6957; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6958; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6959; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 6960; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6961; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6962; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6963; 6964; GFX90A-TGSPLIT-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 6965; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6966; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6967; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 6968; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 6969; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6970; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 6971; GFX90A-TGSPLIT-NEXT: buffer_wbl2 6972; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6973; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 6974; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6975; GFX90A-TGSPLIT-NEXT: buffer_invl2 6976; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6977; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 6978; GFX90A-TGSPLIT-NEXT: s_endpgm 6979 i32 addrspace(1)* %out, i32 %in, i32 %old) { 6980entry: 6981 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 6982 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") release acquire 6983 %val0 = extractvalue { i32, i1 } %val, 0 6984 store i32 %val0, i32 addrspace(1)* %out, align 4 6985 ret void 6986} 6987 6988define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg( 6989; GFX6-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 6990; GFX6: ; %bb.0: ; %entry 6991; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6992; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 6993; GFX6-NEXT: s_mov_b32 s3, 0x100f000 6994; GFX6-NEXT: s_mov_b32 s2, -1 6995; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6996; GFX6-NEXT: v_mov_b32_e32 v0, s4 6997; GFX6-NEXT: v_mov_b32_e32 v1, s5 6998; GFX6-NEXT: s_waitcnt vmcnt(0) 6999; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 7000; GFX6-NEXT: s_waitcnt vmcnt(0) 7001; GFX6-NEXT: buffer_wbinvl1 7002; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 7003; GFX6-NEXT: s_endpgm 7004; 7005; GFX7-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 7006; GFX7: ; %bb.0: ; %entry 7007; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7008; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 7009; GFX7-NEXT: s_waitcnt lgkmcnt(0) 7010; GFX7-NEXT: s_add_u32 s4, s0, 16 7011; GFX7-NEXT: s_addc_u32 s5, s1, 0 7012; GFX7-NEXT: v_mov_b32_e32 v0, s4 7013; GFX7-NEXT: v_mov_b32_e32 v2, s2 7014; GFX7-NEXT: v_mov_b32_e32 v1, s5 7015; GFX7-NEXT: v_mov_b32_e32 v3, s3 7016; GFX7-NEXT: s_waitcnt vmcnt(0) 7017; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7018; GFX7-NEXT: s_waitcnt vmcnt(0) 7019; GFX7-NEXT: buffer_wbinvl1_vol 7020; GFX7-NEXT: v_mov_b32_e32 v0, s0 7021; GFX7-NEXT: v_mov_b32_e32 v1, s1 7022; GFX7-NEXT: flat_store_dword v[0:1], v2 7023; GFX7-NEXT: s_endpgm 7024; 7025; GFX10-WGP-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 7026; GFX10-WGP: ; %bb.0: ; %entry 7027; GFX10-WGP-NEXT: s_clause 0x1 7028; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 7029; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 7030; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 7031; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 7032; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 7033; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 7034; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 7035; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 7036; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 7037; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 7038; GFX10-WGP-NEXT: buffer_gl0_inv 7039; GFX10-WGP-NEXT: buffer_gl1_inv 7040; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 7041; GFX10-WGP-NEXT: s_endpgm 7042; 7043; GFX10-CU-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 7044; GFX10-CU: ; %bb.0: ; %entry 7045; GFX10-CU-NEXT: s_clause 0x1 7046; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 7047; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 7048; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 7049; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 7050; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 7051; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 7052; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 7053; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 7054; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 7055; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 7056; GFX10-CU-NEXT: buffer_gl0_inv 7057; GFX10-CU-NEXT: buffer_gl1_inv 7058; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 7059; GFX10-CU-NEXT: s_endpgm 7060; 7061; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 7062; SKIP-CACHE-INV: ; %bb.0: ; %entry 7063; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 7064; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 7065; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 7066; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 7067; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 7068; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 7069; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 7070; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7071; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 7072; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7073; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 7074; SKIP-CACHE-INV-NEXT: s_endpgm 7075; 7076; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 7077; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 7078; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7079; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 7080; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 7081; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7082; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 7083; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 7084; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7085; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 7086; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7087; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 7088; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 7089; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 7090; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 7091; 7092; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 7093; GFX90A-TGSPLIT: ; %bb.0: ; %entry 7094; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7095; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 7096; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 7097; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7098; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 7099; GFX90A-TGSPLIT-NEXT: buffer_wbl2 7100; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7101; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 7102; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7103; GFX90A-TGSPLIT-NEXT: buffer_invl2 7104; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 7105; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 7106; GFX90A-TGSPLIT-NEXT: s_endpgm 7107 i32 addrspace(1)* %out, i32 %in, i32 %old) { 7108entry: 7109 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 7110 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire 7111 %val0 = extractvalue { i32, i1 } %val, 0 7112 store i32 %val0, i32 addrspace(1)* %out, align 4 7113 ret void 7114} 7115 7116define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg( 7117; GFX6-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 7118; GFX6: ; %bb.0: ; %entry 7119; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7120; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 7121; GFX6-NEXT: s_mov_b32 s3, 0x100f000 7122; GFX6-NEXT: s_mov_b32 s2, -1 7123; GFX6-NEXT: s_waitcnt lgkmcnt(0) 7124; GFX6-NEXT: v_mov_b32_e32 v0, s4 7125; GFX6-NEXT: v_mov_b32_e32 v1, s5 7126; GFX6-NEXT: s_waitcnt vmcnt(0) 7127; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 7128; GFX6-NEXT: s_waitcnt vmcnt(0) 7129; GFX6-NEXT: buffer_wbinvl1 7130; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 7131; GFX6-NEXT: s_endpgm 7132; 7133; GFX7-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 7134; GFX7: ; %bb.0: ; %entry 7135; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7136; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 7137; GFX7-NEXT: s_waitcnt lgkmcnt(0) 7138; GFX7-NEXT: s_add_u32 s4, s0, 16 7139; GFX7-NEXT: s_addc_u32 s5, s1, 0 7140; GFX7-NEXT: v_mov_b32_e32 v0, s4 7141; GFX7-NEXT: v_mov_b32_e32 v2, s2 7142; GFX7-NEXT: v_mov_b32_e32 v1, s5 7143; GFX7-NEXT: v_mov_b32_e32 v3, s3 7144; GFX7-NEXT: s_waitcnt vmcnt(0) 7145; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7146; GFX7-NEXT: s_waitcnt vmcnt(0) 7147; GFX7-NEXT: buffer_wbinvl1_vol 7148; GFX7-NEXT: v_mov_b32_e32 v0, s0 7149; GFX7-NEXT: v_mov_b32_e32 v1, s1 7150; GFX7-NEXT: flat_store_dword v[0:1], v2 7151; GFX7-NEXT: s_endpgm 7152; 7153; GFX10-WGP-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 7154; GFX10-WGP: ; %bb.0: ; %entry 7155; GFX10-WGP-NEXT: s_clause 0x1 7156; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 7157; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 7158; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 7159; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 7160; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 7161; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 7162; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 7163; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 7164; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 7165; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 7166; GFX10-WGP-NEXT: buffer_gl0_inv 7167; GFX10-WGP-NEXT: buffer_gl1_inv 7168; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 7169; GFX10-WGP-NEXT: s_endpgm 7170; 7171; GFX10-CU-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 7172; GFX10-CU: ; %bb.0: ; %entry 7173; GFX10-CU-NEXT: s_clause 0x1 7174; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 7175; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 7176; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 7177; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 7178; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 7179; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 7180; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 7181; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 7182; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 7183; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 7184; GFX10-CU-NEXT: buffer_gl0_inv 7185; GFX10-CU-NEXT: buffer_gl1_inv 7186; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 7187; GFX10-CU-NEXT: s_endpgm 7188; 7189; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 7190; SKIP-CACHE-INV: ; %bb.0: ; %entry 7191; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 7192; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 7193; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 7194; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 7195; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 7196; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 7197; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 7198; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7199; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 7200; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7201; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 7202; SKIP-CACHE-INV-NEXT: s_endpgm 7203; 7204; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 7205; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 7206; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7207; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 7208; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 7209; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7210; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 7211; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 7212; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7213; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 7214; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7215; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 7216; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 7217; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 7218; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 7219; 7220; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 7221; GFX90A-TGSPLIT: ; %bb.0: ; %entry 7222; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7223; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 7224; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 7225; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7226; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 7227; GFX90A-TGSPLIT-NEXT: buffer_wbl2 7228; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7229; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 7230; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7231; GFX90A-TGSPLIT-NEXT: buffer_invl2 7232; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 7233; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 7234; GFX90A-TGSPLIT-NEXT: s_endpgm 7235 i32 addrspace(1)* %out, i32 %in, i32 %old) { 7236entry: 7237 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 7238 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire 7239 %val0 = extractvalue { i32, i1 } %val, 0 7240 store i32 %val0, i32 addrspace(1)* %out, align 4 7241 ret void 7242} 7243 7244define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg( 7245; GFX6-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 7246; GFX6: ; %bb.0: ; %entry 7247; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7248; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 7249; GFX6-NEXT: s_mov_b32 s3, 0x100f000 7250; GFX6-NEXT: s_mov_b32 s2, -1 7251; GFX6-NEXT: s_waitcnt lgkmcnt(0) 7252; GFX6-NEXT: v_mov_b32_e32 v0, s4 7253; GFX6-NEXT: v_mov_b32_e32 v1, s5 7254; GFX6-NEXT: s_waitcnt vmcnt(0) 7255; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 7256; GFX6-NEXT: s_waitcnt vmcnt(0) 7257; GFX6-NEXT: buffer_wbinvl1 7258; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 7259; GFX6-NEXT: s_endpgm 7260; 7261; GFX7-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 7262; GFX7: ; %bb.0: ; %entry 7263; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7264; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 7265; GFX7-NEXT: s_waitcnt lgkmcnt(0) 7266; GFX7-NEXT: s_add_u32 s4, s0, 16 7267; GFX7-NEXT: s_addc_u32 s5, s1, 0 7268; GFX7-NEXT: v_mov_b32_e32 v0, s4 7269; GFX7-NEXT: v_mov_b32_e32 v2, s2 7270; GFX7-NEXT: v_mov_b32_e32 v1, s5 7271; GFX7-NEXT: v_mov_b32_e32 v3, s3 7272; GFX7-NEXT: s_waitcnt vmcnt(0) 7273; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7274; GFX7-NEXT: s_waitcnt vmcnt(0) 7275; GFX7-NEXT: buffer_wbinvl1_vol 7276; GFX7-NEXT: v_mov_b32_e32 v0, s0 7277; GFX7-NEXT: v_mov_b32_e32 v1, s1 7278; GFX7-NEXT: flat_store_dword v[0:1], v2 7279; GFX7-NEXT: s_endpgm 7280; 7281; GFX10-WGP-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 7282; GFX10-WGP: ; %bb.0: ; %entry 7283; GFX10-WGP-NEXT: s_clause 0x1 7284; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 7285; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 7286; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 7287; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 7288; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 7289; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 7290; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 7291; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 7292; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 7293; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 7294; GFX10-WGP-NEXT: buffer_gl0_inv 7295; GFX10-WGP-NEXT: buffer_gl1_inv 7296; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 7297; GFX10-WGP-NEXT: s_endpgm 7298; 7299; GFX10-CU-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 7300; GFX10-CU: ; %bb.0: ; %entry 7301; GFX10-CU-NEXT: s_clause 0x1 7302; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 7303; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 7304; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 7305; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 7306; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 7307; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 7308; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 7309; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 7310; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 7311; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 7312; GFX10-CU-NEXT: buffer_gl0_inv 7313; GFX10-CU-NEXT: buffer_gl1_inv 7314; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 7315; GFX10-CU-NEXT: s_endpgm 7316; 7317; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 7318; SKIP-CACHE-INV: ; %bb.0: ; %entry 7319; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 7320; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 7321; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 7322; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 7323; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 7324; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 7325; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 7326; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7327; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 7328; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7329; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 7330; SKIP-CACHE-INV-NEXT: s_endpgm 7331; 7332; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 7333; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 7334; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7335; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 7336; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 7337; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7338; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 7339; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 7340; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7341; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 7342; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7343; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 7344; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 7345; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 7346; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 7347; 7348; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 7349; GFX90A-TGSPLIT: ; %bb.0: ; %entry 7350; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7351; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 7352; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 7353; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7354; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 7355; GFX90A-TGSPLIT-NEXT: buffer_wbl2 7356; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7357; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc 7358; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7359; GFX90A-TGSPLIT-NEXT: buffer_invl2 7360; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 7361; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] 7362; GFX90A-TGSPLIT-NEXT: s_endpgm 7363 i32 addrspace(1)* %out, i32 %in, i32 %old) { 7364entry: 7365 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 7366 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst 7367 %val0 = extractvalue { i32, i1 } %val, 0 7368 store i32 %val0, i32 addrspace(1)* %out, align 4 7369 ret void 7370} 7371 7372