1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s 5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s 6; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 -amdgcn-skip-cache-invalidations -verify-machineinstrs < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s 7; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s 8; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+tgsplit -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s 9 10define amdgpu_kernel void @local_singlethread_unordered_load( 11; GFX6-LABEL: local_singlethread_unordered_load: 12; GFX6: ; %bb.0: ; %entry 13; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 14; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 15; GFX6-NEXT: s_mov_b32 m0, -1 16; GFX6-NEXT: s_waitcnt lgkmcnt(0) 17; GFX6-NEXT: v_mov_b32_e32 v0, s0 18; GFX6-NEXT: ds_read_b32 v0, v0 19; GFX6-NEXT: v_mov_b32_e32 v1, s1 20; GFX6-NEXT: s_waitcnt lgkmcnt(0) 21; GFX6-NEXT: ds_write_b32 v1, v0 22; GFX6-NEXT: s_endpgm 23; 24; GFX7-LABEL: local_singlethread_unordered_load: 25; GFX7: ; %bb.0: ; %entry 26; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 27; GFX7-NEXT: s_mov_b32 m0, -1 28; GFX7-NEXT: s_waitcnt lgkmcnt(0) 29; GFX7-NEXT: v_mov_b32_e32 v0, s0 30; GFX7-NEXT: ds_read_b32 v0, v0 31; GFX7-NEXT: v_mov_b32_e32 v1, s1 32; GFX7-NEXT: s_waitcnt lgkmcnt(0) 33; GFX7-NEXT: ds_write_b32 v1, v0 34; GFX7-NEXT: s_endpgm 35; 36; GFX10-WGP-LABEL: local_singlethread_unordered_load: 37; GFX10-WGP: ; %bb.0: ; %entry 38; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 39; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 40; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 41; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 42; GFX10-WGP-NEXT: ds_read_b32 v0, v0 43; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 44; GFX10-WGP-NEXT: ds_write_b32 v1, v0 45; GFX10-WGP-NEXT: s_endpgm 46; 47; GFX10-CU-LABEL: local_singlethread_unordered_load: 48; GFX10-CU: ; %bb.0: ; %entry 49; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 50; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 51; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 52; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 53; GFX10-CU-NEXT: ds_read_b32 v0, v0 54; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 55; GFX10-CU-NEXT: ds_write_b32 v1, v0 56; GFX10-CU-NEXT: s_endpgm 57; 58; SKIP-CACHE-INV-LABEL: local_singlethread_unordered_load: 59; SKIP-CACHE-INV: ; %bb.0: ; %entry 60; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 61; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 62; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 63; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 64; SKIP-CACHE-INV-NEXT: ds_read_b32 v0, v0 65; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 66; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 67; SKIP-CACHE-INV-NEXT: ds_write_b32 v1, v0 68; SKIP-CACHE-INV-NEXT: s_endpgm 69; 70; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_unordered_load: 71; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 72; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 73; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 74; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 75; GFX90A-NOTTGSPLIT-NEXT: ds_read_b32 v0, v0 76; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 77; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 78; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v1, v0 79; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 80; 81; GFX90A-TGSPLIT-LABEL: local_singlethread_unordered_load: 82; GFX90A-TGSPLIT: ; %bb.0: ; %entry 83; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 84; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 85; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 86; GFX90A-TGSPLIT-NEXT: ds_read_b32 v0, v0 87; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 88; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 89; GFX90A-TGSPLIT-NEXT: ds_write_b32 v1, v0 90; GFX90A-TGSPLIT-NEXT: s_endpgm 91; 92; 93 i32 addrspace(3)* %in, i32 addrspace(3)* %out) { 94entry: 95 %val = load atomic i32, i32 addrspace(3)* %in syncscope("singlethread") unordered, align 4 96 store i32 %val, i32 addrspace(3)* %out 97 ret void 98} 99 100define amdgpu_kernel void @local_singlethread_monotonic_load( 101; GFX6-LABEL: local_singlethread_monotonic_load: 102; GFX6: ; %bb.0: ; %entry 103; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 104; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 105; GFX6-NEXT: s_mov_b32 m0, -1 106; GFX6-NEXT: s_waitcnt lgkmcnt(0) 107; GFX6-NEXT: v_mov_b32_e32 v0, s0 108; GFX6-NEXT: ds_read_b32 v0, v0 109; GFX6-NEXT: v_mov_b32_e32 v1, s1 110; GFX6-NEXT: s_waitcnt lgkmcnt(0) 111; GFX6-NEXT: ds_write_b32 v1, v0 112; GFX6-NEXT: s_endpgm 113; 114; GFX7-LABEL: local_singlethread_monotonic_load: 115; GFX7: ; %bb.0: ; %entry 116; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 117; GFX7-NEXT: s_mov_b32 m0, -1 118; GFX7-NEXT: s_waitcnt lgkmcnt(0) 119; GFX7-NEXT: v_mov_b32_e32 v0, s0 120; GFX7-NEXT: ds_read_b32 v0, v0 121; GFX7-NEXT: v_mov_b32_e32 v1, s1 122; GFX7-NEXT: s_waitcnt lgkmcnt(0) 123; GFX7-NEXT: ds_write_b32 v1, v0 124; GFX7-NEXT: s_endpgm 125; 126; GFX10-WGP-LABEL: local_singlethread_monotonic_load: 127; GFX10-WGP: ; %bb.0: ; %entry 128; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 129; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 130; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 131; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 132; GFX10-WGP-NEXT: ds_read_b32 v0, v0 133; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 134; GFX10-WGP-NEXT: ds_write_b32 v1, v0 135; GFX10-WGP-NEXT: s_endpgm 136; 137; GFX10-CU-LABEL: local_singlethread_monotonic_load: 138; GFX10-CU: ; %bb.0: ; %entry 139; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 140; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 141; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 142; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 143; GFX10-CU-NEXT: ds_read_b32 v0, v0 144; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 145; GFX10-CU-NEXT: ds_write_b32 v1, v0 146; GFX10-CU-NEXT: s_endpgm 147; 148; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_load: 149; SKIP-CACHE-INV: ; %bb.0: ; %entry 150; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 151; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 152; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 153; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 154; SKIP-CACHE-INV-NEXT: ds_read_b32 v0, v0 155; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 156; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 157; SKIP-CACHE-INV-NEXT: ds_write_b32 v1, v0 158; SKIP-CACHE-INV-NEXT: s_endpgm 159; 160; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_monotonic_load: 161; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 162; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 163; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 164; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 165; GFX90A-NOTTGSPLIT-NEXT: ds_read_b32 v0, v0 166; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 167; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 168; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v1, v0 169; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 170; 171; GFX90A-TGSPLIT-LABEL: local_singlethread_monotonic_load: 172; GFX90A-TGSPLIT: ; %bb.0: ; %entry 173; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 174; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 175; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 176; GFX90A-TGSPLIT-NEXT: ds_read_b32 v0, v0 177; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 178; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 179; GFX90A-TGSPLIT-NEXT: ds_write_b32 v1, v0 180; GFX90A-TGSPLIT-NEXT: s_endpgm 181; 182; 183 i32 addrspace(3)* %in, i32 addrspace(3)* %out) { 184entry: 185 %val = load atomic i32, i32 addrspace(3)* %in syncscope("singlethread") monotonic, align 4 186 store i32 %val, i32 addrspace(3)* %out 187 ret void 188} 189 190define amdgpu_kernel void @local_singlethread_acquire_load( 191; GFX6-LABEL: local_singlethread_acquire_load: 192; GFX6: ; %bb.0: ; %entry 193; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 194; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 195; GFX6-NEXT: s_mov_b32 m0, -1 196; GFX6-NEXT: s_waitcnt lgkmcnt(0) 197; GFX6-NEXT: v_mov_b32_e32 v0, s0 198; GFX6-NEXT: ds_read_b32 v0, v0 199; GFX6-NEXT: v_mov_b32_e32 v1, s1 200; GFX6-NEXT: s_waitcnt lgkmcnt(0) 201; GFX6-NEXT: ds_write_b32 v1, v0 202; GFX6-NEXT: s_endpgm 203; 204; GFX7-LABEL: local_singlethread_acquire_load: 205; GFX7: ; %bb.0: ; %entry 206; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 207; GFX7-NEXT: s_mov_b32 m0, -1 208; GFX7-NEXT: s_waitcnt lgkmcnt(0) 209; GFX7-NEXT: v_mov_b32_e32 v0, s0 210; GFX7-NEXT: ds_read_b32 v0, v0 211; GFX7-NEXT: v_mov_b32_e32 v1, s1 212; GFX7-NEXT: s_waitcnt lgkmcnt(0) 213; GFX7-NEXT: ds_write_b32 v1, v0 214; GFX7-NEXT: s_endpgm 215; 216; GFX10-WGP-LABEL: local_singlethread_acquire_load: 217; GFX10-WGP: ; %bb.0: ; %entry 218; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 219; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 220; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 221; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 222; GFX10-WGP-NEXT: ds_read_b32 v0, v0 223; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 224; GFX10-WGP-NEXT: ds_write_b32 v1, v0 225; GFX10-WGP-NEXT: s_endpgm 226; 227; GFX10-CU-LABEL: local_singlethread_acquire_load: 228; GFX10-CU: ; %bb.0: ; %entry 229; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 230; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 231; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 232; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 233; GFX10-CU-NEXT: ds_read_b32 v0, v0 234; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 235; GFX10-CU-NEXT: ds_write_b32 v1, v0 236; GFX10-CU-NEXT: s_endpgm 237; 238; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_load: 239; SKIP-CACHE-INV: ; %bb.0: ; %entry 240; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 241; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 242; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 243; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 244; SKIP-CACHE-INV-NEXT: ds_read_b32 v0, v0 245; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 246; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 247; SKIP-CACHE-INV-NEXT: ds_write_b32 v1, v0 248; SKIP-CACHE-INV-NEXT: s_endpgm 249; 250; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_acquire_load: 251; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 252; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 253; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 254; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 255; GFX90A-NOTTGSPLIT-NEXT: ds_read_b32 v0, v0 256; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 257; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 258; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v1, v0 259; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 260; 261; GFX90A-TGSPLIT-LABEL: local_singlethread_acquire_load: 262; GFX90A-TGSPLIT: ; %bb.0: ; %entry 263; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 264; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 265; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 266; GFX90A-TGSPLIT-NEXT: ds_read_b32 v0, v0 267; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 268; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 269; GFX90A-TGSPLIT-NEXT: ds_write_b32 v1, v0 270; GFX90A-TGSPLIT-NEXT: s_endpgm 271; 272; 273 i32 addrspace(3)* %in, i32 addrspace(3)* %out) { 274entry: 275 %val = load atomic i32, i32 addrspace(3)* %in syncscope("singlethread") acquire, align 4 276 store i32 %val, i32 addrspace(3)* %out 277 ret void 278} 279 280define amdgpu_kernel void @local_singlethread_seq_cst_load( 281; GFX6-LABEL: local_singlethread_seq_cst_load: 282; GFX6: ; %bb.0: ; %entry 283; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 284; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 285; GFX6-NEXT: s_mov_b32 m0, -1 286; GFX6-NEXT: s_waitcnt lgkmcnt(0) 287; GFX6-NEXT: v_mov_b32_e32 v0, s0 288; GFX6-NEXT: ds_read_b32 v0, v0 289; GFX6-NEXT: v_mov_b32_e32 v1, s1 290; GFX6-NEXT: s_waitcnt lgkmcnt(0) 291; GFX6-NEXT: ds_write_b32 v1, v0 292; GFX6-NEXT: s_endpgm 293; 294; GFX7-LABEL: local_singlethread_seq_cst_load: 295; GFX7: ; %bb.0: ; %entry 296; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 297; GFX7-NEXT: s_mov_b32 m0, -1 298; GFX7-NEXT: s_waitcnt lgkmcnt(0) 299; GFX7-NEXT: v_mov_b32_e32 v0, s0 300; GFX7-NEXT: ds_read_b32 v0, v0 301; GFX7-NEXT: v_mov_b32_e32 v1, s1 302; GFX7-NEXT: s_waitcnt lgkmcnt(0) 303; GFX7-NEXT: ds_write_b32 v1, v0 304; GFX7-NEXT: s_endpgm 305; 306; GFX10-WGP-LABEL: local_singlethread_seq_cst_load: 307; GFX10-WGP: ; %bb.0: ; %entry 308; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 309; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 310; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 311; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 312; GFX10-WGP-NEXT: ds_read_b32 v0, v0 313; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 314; GFX10-WGP-NEXT: ds_write_b32 v1, v0 315; GFX10-WGP-NEXT: s_endpgm 316; 317; GFX10-CU-LABEL: local_singlethread_seq_cst_load: 318; GFX10-CU: ; %bb.0: ; %entry 319; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 320; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 321; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 322; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 323; GFX10-CU-NEXT: ds_read_b32 v0, v0 324; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 325; GFX10-CU-NEXT: ds_write_b32 v1, v0 326; GFX10-CU-NEXT: s_endpgm 327; 328; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_load: 329; SKIP-CACHE-INV: ; %bb.0: ; %entry 330; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 331; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 332; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 333; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 334; SKIP-CACHE-INV-NEXT: ds_read_b32 v0, v0 335; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 336; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 337; SKIP-CACHE-INV-NEXT: ds_write_b32 v1, v0 338; SKIP-CACHE-INV-NEXT: s_endpgm 339; 340; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_load: 341; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 342; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 343; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 344; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 345; GFX90A-NOTTGSPLIT-NEXT: ds_read_b32 v0, v0 346; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 347; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 348; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v1, v0 349; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 350; 351; GFX90A-TGSPLIT-LABEL: local_singlethread_seq_cst_load: 352; GFX90A-TGSPLIT: ; %bb.0: ; %entry 353; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 354; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 355; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 356; GFX90A-TGSPLIT-NEXT: ds_read_b32 v0, v0 357; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 358; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 359; GFX90A-TGSPLIT-NEXT: ds_write_b32 v1, v0 360; GFX90A-TGSPLIT-NEXT: s_endpgm 361; 362; 363 i32 addrspace(3)* %in, i32 addrspace(3)* %out) { 364entry: 365 %val = load atomic i32, i32 addrspace(3)* %in syncscope("singlethread") seq_cst, align 4 366 store i32 %val, i32 addrspace(3)* %out 367 ret void 368} 369 370define amdgpu_kernel void @local_singlethread_unordered_store( 371; GFX6-LABEL: local_singlethread_unordered_store: 372; GFX6: ; %bb.0: ; %entry 373; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 374; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 375; GFX6-NEXT: s_mov_b32 m0, -1 376; GFX6-NEXT: s_waitcnt lgkmcnt(0) 377; GFX6-NEXT: v_mov_b32_e32 v1, s0 378; GFX6-NEXT: v_mov_b32_e32 v0, s1 379; GFX6-NEXT: ds_write_b32 v0, v1 380; GFX6-NEXT: s_endpgm 381; 382; GFX7-LABEL: local_singlethread_unordered_store: 383; GFX7: ; %bb.0: ; %entry 384; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 385; GFX7-NEXT: s_mov_b32 m0, -1 386; GFX7-NEXT: s_waitcnt lgkmcnt(0) 387; GFX7-NEXT: v_mov_b32_e32 v0, s1 388; GFX7-NEXT: v_mov_b32_e32 v1, s0 389; GFX7-NEXT: ds_write_b32 v0, v1 390; GFX7-NEXT: s_endpgm 391; 392; GFX10-WGP-LABEL: local_singlethread_unordered_store: 393; GFX10-WGP: ; %bb.0: ; %entry 394; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 395; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 396; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 397; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 398; GFX10-WGP-NEXT: ds_write_b32 v0, v1 399; GFX10-WGP-NEXT: s_endpgm 400; 401; GFX10-CU-LABEL: local_singlethread_unordered_store: 402; GFX10-CU: ; %bb.0: ; %entry 403; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 404; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 405; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 406; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 407; GFX10-CU-NEXT: ds_write_b32 v0, v1 408; GFX10-CU-NEXT: s_endpgm 409; 410; SKIP-CACHE-INV-LABEL: local_singlethread_unordered_store: 411; SKIP-CACHE-INV: ; %bb.0: ; %entry 412; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 413; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 414; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 415; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 416; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 417; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 418; SKIP-CACHE-INV-NEXT: s_endpgm 419; 420; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_unordered_store: 421; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 422; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 423; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 424; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 425; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 426; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 427; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 428; 429; GFX90A-TGSPLIT-LABEL: local_singlethread_unordered_store: 430; GFX90A-TGSPLIT: ; %bb.0: ; %entry 431; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 432; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 433; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 434; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 435; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 436; GFX90A-TGSPLIT-NEXT: s_endpgm 437; 438; 439 i32 %in, i32 addrspace(3)* %out) { 440entry: 441 store atomic i32 %in, i32 addrspace(3)* %out syncscope("singlethread") unordered, align 4 442 ret void 443} 444 445define amdgpu_kernel void @local_singlethread_monotonic_store( 446; GFX6-LABEL: local_singlethread_monotonic_store: 447; GFX6: ; %bb.0: ; %entry 448; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 449; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 450; GFX6-NEXT: s_mov_b32 m0, -1 451; GFX6-NEXT: s_waitcnt lgkmcnt(0) 452; GFX6-NEXT: v_mov_b32_e32 v1, s0 453; GFX6-NEXT: v_mov_b32_e32 v0, s1 454; GFX6-NEXT: ds_write_b32 v0, v1 455; GFX6-NEXT: s_endpgm 456; 457; GFX7-LABEL: local_singlethread_monotonic_store: 458; GFX7: ; %bb.0: ; %entry 459; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 460; GFX7-NEXT: s_mov_b32 m0, -1 461; GFX7-NEXT: s_waitcnt lgkmcnt(0) 462; GFX7-NEXT: v_mov_b32_e32 v0, s1 463; GFX7-NEXT: v_mov_b32_e32 v1, s0 464; GFX7-NEXT: ds_write_b32 v0, v1 465; GFX7-NEXT: s_endpgm 466; 467; GFX10-WGP-LABEL: local_singlethread_monotonic_store: 468; GFX10-WGP: ; %bb.0: ; %entry 469; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 470; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 471; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 472; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 473; GFX10-WGP-NEXT: ds_write_b32 v0, v1 474; GFX10-WGP-NEXT: s_endpgm 475; 476; GFX10-CU-LABEL: local_singlethread_monotonic_store: 477; GFX10-CU: ; %bb.0: ; %entry 478; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 479; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 480; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 481; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 482; GFX10-CU-NEXT: ds_write_b32 v0, v1 483; GFX10-CU-NEXT: s_endpgm 484; 485; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_store: 486; SKIP-CACHE-INV: ; %bb.0: ; %entry 487; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 488; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 489; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 490; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 491; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 492; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 493; SKIP-CACHE-INV-NEXT: s_endpgm 494; 495; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_monotonic_store: 496; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 497; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 498; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 499; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 500; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 501; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 502; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 503; 504; GFX90A-TGSPLIT-LABEL: local_singlethread_monotonic_store: 505; GFX90A-TGSPLIT: ; %bb.0: ; %entry 506; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 507; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 508; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 509; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 510; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 511; GFX90A-TGSPLIT-NEXT: s_endpgm 512; 513; 514 i32 %in, i32 addrspace(3)* %out) { 515entry: 516 store atomic i32 %in, i32 addrspace(3)* %out syncscope("singlethread") monotonic, align 4 517 ret void 518} 519 520define amdgpu_kernel void @local_singlethread_release_store( 521; GFX6-LABEL: local_singlethread_release_store: 522; GFX6: ; %bb.0: ; %entry 523; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 524; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 525; GFX6-NEXT: s_mov_b32 m0, -1 526; GFX6-NEXT: s_waitcnt lgkmcnt(0) 527; GFX6-NEXT: v_mov_b32_e32 v1, s0 528; GFX6-NEXT: v_mov_b32_e32 v0, s1 529; GFX6-NEXT: ds_write_b32 v0, v1 530; GFX6-NEXT: s_endpgm 531; 532; GFX7-LABEL: local_singlethread_release_store: 533; GFX7: ; %bb.0: ; %entry 534; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 535; GFX7-NEXT: s_mov_b32 m0, -1 536; GFX7-NEXT: s_waitcnt lgkmcnt(0) 537; GFX7-NEXT: v_mov_b32_e32 v0, s1 538; GFX7-NEXT: v_mov_b32_e32 v1, s0 539; GFX7-NEXT: ds_write_b32 v0, v1 540; GFX7-NEXT: s_endpgm 541; 542; GFX10-WGP-LABEL: local_singlethread_release_store: 543; GFX10-WGP: ; %bb.0: ; %entry 544; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 545; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 546; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 547; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 548; GFX10-WGP-NEXT: ds_write_b32 v0, v1 549; GFX10-WGP-NEXT: s_endpgm 550; 551; GFX10-CU-LABEL: local_singlethread_release_store: 552; GFX10-CU: ; %bb.0: ; %entry 553; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 554; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 555; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 556; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 557; GFX10-CU-NEXT: ds_write_b32 v0, v1 558; GFX10-CU-NEXT: s_endpgm 559; 560; SKIP-CACHE-INV-LABEL: local_singlethread_release_store: 561; SKIP-CACHE-INV: ; %bb.0: ; %entry 562; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 563; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 564; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 565; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 566; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 567; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 568; SKIP-CACHE-INV-NEXT: s_endpgm 569; 570; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_release_store: 571; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 572; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 573; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 574; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 575; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 576; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 577; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 578; 579; GFX90A-TGSPLIT-LABEL: local_singlethread_release_store: 580; GFX90A-TGSPLIT: ; %bb.0: ; %entry 581; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 582; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 583; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 584; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 585; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 586; GFX90A-TGSPLIT-NEXT: s_endpgm 587; 588; 589 i32 %in, i32 addrspace(3)* %out) { 590entry: 591 store atomic i32 %in, i32 addrspace(3)* %out syncscope("singlethread") release, align 4 592 ret void 593} 594 595define amdgpu_kernel void @local_singlethread_seq_cst_store( 596; GFX6-LABEL: local_singlethread_seq_cst_store: 597; GFX6: ; %bb.0: ; %entry 598; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 599; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 600; GFX6-NEXT: s_mov_b32 m0, -1 601; GFX6-NEXT: s_waitcnt lgkmcnt(0) 602; GFX6-NEXT: v_mov_b32_e32 v1, s0 603; GFX6-NEXT: v_mov_b32_e32 v0, s1 604; GFX6-NEXT: ds_write_b32 v0, v1 605; GFX6-NEXT: s_endpgm 606; 607; GFX7-LABEL: local_singlethread_seq_cst_store: 608; GFX7: ; %bb.0: ; %entry 609; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 610; GFX7-NEXT: s_mov_b32 m0, -1 611; GFX7-NEXT: s_waitcnt lgkmcnt(0) 612; GFX7-NEXT: v_mov_b32_e32 v0, s1 613; GFX7-NEXT: v_mov_b32_e32 v1, s0 614; GFX7-NEXT: ds_write_b32 v0, v1 615; GFX7-NEXT: s_endpgm 616; 617; GFX10-WGP-LABEL: local_singlethread_seq_cst_store: 618; GFX10-WGP: ; %bb.0: ; %entry 619; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 620; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 621; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 622; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 623; GFX10-WGP-NEXT: ds_write_b32 v0, v1 624; GFX10-WGP-NEXT: s_endpgm 625; 626; GFX10-CU-LABEL: local_singlethread_seq_cst_store: 627; GFX10-CU: ; %bb.0: ; %entry 628; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 629; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 630; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 631; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 632; GFX10-CU-NEXT: ds_write_b32 v0, v1 633; GFX10-CU-NEXT: s_endpgm 634; 635; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_store: 636; SKIP-CACHE-INV: ; %bb.0: ; %entry 637; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 638; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 639; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 640; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 641; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 642; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 643; SKIP-CACHE-INV-NEXT: s_endpgm 644; 645; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_store: 646; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 647; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 648; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 649; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 650; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 651; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 652; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 653; 654; GFX90A-TGSPLIT-LABEL: local_singlethread_seq_cst_store: 655; GFX90A-TGSPLIT: ; %bb.0: ; %entry 656; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 657; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 658; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 659; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 660; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 661; GFX90A-TGSPLIT-NEXT: s_endpgm 662; 663; 664 i32 %in, i32 addrspace(3)* %out) { 665entry: 666 store atomic i32 %in, i32 addrspace(3)* %out syncscope("singlethread") seq_cst, align 4 667 ret void 668} 669 670define amdgpu_kernel void @local_singlethread_monotonic_atomicrmw( 671; GFX6-LABEL: local_singlethread_monotonic_atomicrmw: 672; GFX6: ; %bb.0: ; %entry 673; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 674; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 675; GFX6-NEXT: s_mov_b32 m0, -1 676; GFX6-NEXT: s_waitcnt lgkmcnt(0) 677; GFX6-NEXT: v_mov_b32_e32 v0, s0 678; GFX6-NEXT: v_mov_b32_e32 v1, s1 679; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 680; GFX6-NEXT: s_endpgm 681; 682; GFX7-LABEL: local_singlethread_monotonic_atomicrmw: 683; GFX7: ; %bb.0: ; %entry 684; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 685; GFX7-NEXT: s_mov_b32 m0, -1 686; GFX7-NEXT: s_waitcnt lgkmcnt(0) 687; GFX7-NEXT: v_mov_b32_e32 v0, s0 688; GFX7-NEXT: v_mov_b32_e32 v1, s1 689; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 690; GFX7-NEXT: s_endpgm 691; 692; GFX10-WGP-LABEL: local_singlethread_monotonic_atomicrmw: 693; GFX10-WGP: ; %bb.0: ; %entry 694; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 695; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 696; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 697; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 698; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 699; GFX10-WGP-NEXT: s_endpgm 700; 701; GFX10-CU-LABEL: local_singlethread_monotonic_atomicrmw: 702; GFX10-CU: ; %bb.0: ; %entry 703; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 704; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 705; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 706; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 707; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 708; GFX10-CU-NEXT: s_endpgm 709; 710; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_atomicrmw: 711; SKIP-CACHE-INV: ; %bb.0: ; %entry 712; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 713; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 714; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 715; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 716; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 717; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 718; SKIP-CACHE-INV-NEXT: s_endpgm 719; 720; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_monotonic_atomicrmw: 721; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 722; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 723; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 724; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 725; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 726; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 727; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 728; 729; GFX90A-TGSPLIT-LABEL: local_singlethread_monotonic_atomicrmw: 730; GFX90A-TGSPLIT: ; %bb.0: ; %entry 731; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 732; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 733; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 734; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 735; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 736; GFX90A-TGSPLIT-NEXT: s_endpgm 737; 738; 739 i32 addrspace(3)* %out, i32 %in) { 740entry: 741 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread") monotonic 742 ret void 743} 744 745define amdgpu_kernel void @local_singlethread_acquire_atomicrmw( 746; GFX6-LABEL: local_singlethread_acquire_atomicrmw: 747; GFX6: ; %bb.0: ; %entry 748; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 749; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 750; GFX6-NEXT: s_mov_b32 m0, -1 751; GFX6-NEXT: s_waitcnt lgkmcnt(0) 752; GFX6-NEXT: v_mov_b32_e32 v0, s0 753; GFX6-NEXT: v_mov_b32_e32 v1, s1 754; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 755; GFX6-NEXT: s_endpgm 756; 757; GFX7-LABEL: local_singlethread_acquire_atomicrmw: 758; GFX7: ; %bb.0: ; %entry 759; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 760; GFX7-NEXT: s_mov_b32 m0, -1 761; GFX7-NEXT: s_waitcnt lgkmcnt(0) 762; GFX7-NEXT: v_mov_b32_e32 v0, s0 763; GFX7-NEXT: v_mov_b32_e32 v1, s1 764; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 765; GFX7-NEXT: s_endpgm 766; 767; GFX10-WGP-LABEL: local_singlethread_acquire_atomicrmw: 768; GFX10-WGP: ; %bb.0: ; %entry 769; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 770; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 771; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 772; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 773; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 774; GFX10-WGP-NEXT: s_endpgm 775; 776; GFX10-CU-LABEL: local_singlethread_acquire_atomicrmw: 777; GFX10-CU: ; %bb.0: ; %entry 778; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 779; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 780; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 781; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 782; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 783; GFX10-CU-NEXT: s_endpgm 784; 785; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_atomicrmw: 786; SKIP-CACHE-INV: ; %bb.0: ; %entry 787; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 788; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 789; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 790; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 791; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 792; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 793; SKIP-CACHE-INV-NEXT: s_endpgm 794; 795; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_acquire_atomicrmw: 796; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 797; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 798; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 799; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 800; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 801; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 802; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 803; 804; GFX90A-TGSPLIT-LABEL: local_singlethread_acquire_atomicrmw: 805; GFX90A-TGSPLIT: ; %bb.0: ; %entry 806; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 807; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 808; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 809; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 810; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 811; GFX90A-TGSPLIT-NEXT: s_endpgm 812; 813; 814 i32 addrspace(3)* %out, i32 %in) { 815entry: 816 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread") acquire 817 ret void 818} 819 820define amdgpu_kernel void @local_singlethread_release_atomicrmw( 821; GFX6-LABEL: local_singlethread_release_atomicrmw: 822; GFX6: ; %bb.0: ; %entry 823; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 824; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 825; GFX6-NEXT: s_mov_b32 m0, -1 826; GFX6-NEXT: s_waitcnt lgkmcnt(0) 827; GFX6-NEXT: v_mov_b32_e32 v0, s0 828; GFX6-NEXT: v_mov_b32_e32 v1, s1 829; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 830; GFX6-NEXT: s_endpgm 831; 832; GFX7-LABEL: local_singlethread_release_atomicrmw: 833; GFX7: ; %bb.0: ; %entry 834; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 835; GFX7-NEXT: s_mov_b32 m0, -1 836; GFX7-NEXT: s_waitcnt lgkmcnt(0) 837; GFX7-NEXT: v_mov_b32_e32 v0, s0 838; GFX7-NEXT: v_mov_b32_e32 v1, s1 839; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 840; GFX7-NEXT: s_endpgm 841; 842; GFX10-WGP-LABEL: local_singlethread_release_atomicrmw: 843; GFX10-WGP: ; %bb.0: ; %entry 844; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 845; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 846; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 847; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 848; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 849; GFX10-WGP-NEXT: s_endpgm 850; 851; GFX10-CU-LABEL: local_singlethread_release_atomicrmw: 852; GFX10-CU: ; %bb.0: ; %entry 853; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 854; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 855; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 856; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 857; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 858; GFX10-CU-NEXT: s_endpgm 859; 860; SKIP-CACHE-INV-LABEL: local_singlethread_release_atomicrmw: 861; SKIP-CACHE-INV: ; %bb.0: ; %entry 862; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 863; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 864; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 865; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 866; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 867; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 868; SKIP-CACHE-INV-NEXT: s_endpgm 869; 870; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_release_atomicrmw: 871; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 872; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 873; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 874; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 875; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 876; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 877; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 878; 879; GFX90A-TGSPLIT-LABEL: local_singlethread_release_atomicrmw: 880; GFX90A-TGSPLIT: ; %bb.0: ; %entry 881; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 882; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 883; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 884; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 885; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 886; GFX90A-TGSPLIT-NEXT: s_endpgm 887; 888; 889 i32 addrspace(3)* %out, i32 %in) { 890entry: 891 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread") release 892 ret void 893} 894 895define amdgpu_kernel void @local_singlethread_acq_rel_atomicrmw( 896; GFX6-LABEL: local_singlethread_acq_rel_atomicrmw: 897; GFX6: ; %bb.0: ; %entry 898; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 899; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 900; GFX6-NEXT: s_mov_b32 m0, -1 901; GFX6-NEXT: s_waitcnt lgkmcnt(0) 902; GFX6-NEXT: v_mov_b32_e32 v0, s0 903; GFX6-NEXT: v_mov_b32_e32 v1, s1 904; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 905; GFX6-NEXT: s_endpgm 906; 907; GFX7-LABEL: local_singlethread_acq_rel_atomicrmw: 908; GFX7: ; %bb.0: ; %entry 909; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 910; GFX7-NEXT: s_mov_b32 m0, -1 911; GFX7-NEXT: s_waitcnt lgkmcnt(0) 912; GFX7-NEXT: v_mov_b32_e32 v0, s0 913; GFX7-NEXT: v_mov_b32_e32 v1, s1 914; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 915; GFX7-NEXT: s_endpgm 916; 917; GFX10-WGP-LABEL: local_singlethread_acq_rel_atomicrmw: 918; GFX10-WGP: ; %bb.0: ; %entry 919; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 920; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 921; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 922; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 923; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 924; GFX10-WGP-NEXT: s_endpgm 925; 926; GFX10-CU-LABEL: local_singlethread_acq_rel_atomicrmw: 927; GFX10-CU: ; %bb.0: ; %entry 928; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 929; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 930; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 931; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 932; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 933; GFX10-CU-NEXT: s_endpgm 934; 935; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_atomicrmw: 936; SKIP-CACHE-INV: ; %bb.0: ; %entry 937; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 938; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 939; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 940; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 941; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 942; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 943; SKIP-CACHE-INV-NEXT: s_endpgm 944; 945; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_atomicrmw: 946; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 947; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 948; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 949; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 950; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 951; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 952; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 953; 954; GFX90A-TGSPLIT-LABEL: local_singlethread_acq_rel_atomicrmw: 955; GFX90A-TGSPLIT: ; %bb.0: ; %entry 956; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 957; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 958; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 959; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 960; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 961; GFX90A-TGSPLIT-NEXT: s_endpgm 962; 963; 964 i32 addrspace(3)* %out, i32 %in) { 965entry: 966 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread") acq_rel 967 ret void 968} 969 970define amdgpu_kernel void @local_singlethread_seq_cst_atomicrmw( 971; GFX6-LABEL: local_singlethread_seq_cst_atomicrmw: 972; GFX6: ; %bb.0: ; %entry 973; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 974; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 975; GFX6-NEXT: s_mov_b32 m0, -1 976; GFX6-NEXT: s_waitcnt lgkmcnt(0) 977; GFX6-NEXT: v_mov_b32_e32 v0, s0 978; GFX6-NEXT: v_mov_b32_e32 v1, s1 979; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 980; GFX6-NEXT: s_endpgm 981; 982; GFX7-LABEL: local_singlethread_seq_cst_atomicrmw: 983; GFX7: ; %bb.0: ; %entry 984; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 985; GFX7-NEXT: s_mov_b32 m0, -1 986; GFX7-NEXT: s_waitcnt lgkmcnt(0) 987; GFX7-NEXT: v_mov_b32_e32 v0, s0 988; GFX7-NEXT: v_mov_b32_e32 v1, s1 989; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 990; GFX7-NEXT: s_endpgm 991; 992; GFX10-WGP-LABEL: local_singlethread_seq_cst_atomicrmw: 993; GFX10-WGP: ; %bb.0: ; %entry 994; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 995; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 996; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 997; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 998; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 999; GFX10-WGP-NEXT: s_endpgm 1000; 1001; GFX10-CU-LABEL: local_singlethread_seq_cst_atomicrmw: 1002; GFX10-CU: ; %bb.0: ; %entry 1003; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1004; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1005; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1006; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1007; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 1008; GFX10-CU-NEXT: s_endpgm 1009; 1010; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_atomicrmw: 1011; SKIP-CACHE-INV: ; %bb.0: ; %entry 1012; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1013; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1014; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1015; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1016; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1017; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 1018; SKIP-CACHE-INV-NEXT: s_endpgm 1019; 1020; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_atomicrmw: 1021; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1022; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1023; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1024; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1025; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 1026; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 1027; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1028; 1029; GFX90A-TGSPLIT-LABEL: local_singlethread_seq_cst_atomicrmw: 1030; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1031; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1032; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1033; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1034; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 1035; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 1036; GFX90A-TGSPLIT-NEXT: s_endpgm 1037; 1038; 1039 i32 addrspace(3)* %out, i32 %in) { 1040entry: 1041 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread") seq_cst 1042 ret void 1043} 1044 1045define amdgpu_kernel void @local_singlethread_acquire_ret_atomicrmw( 1046; GFX6-LABEL: local_singlethread_acquire_ret_atomicrmw: 1047; GFX6: ; %bb.0: ; %entry 1048; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 1049; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 1050; GFX6-NEXT: s_mov_b32 m0, -1 1051; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1052; GFX6-NEXT: v_mov_b32_e32 v0, s0 1053; GFX6-NEXT: v_mov_b32_e32 v1, s1 1054; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1055; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1056; GFX6-NEXT: ds_write_b32 v0, v1 1057; GFX6-NEXT: s_endpgm 1058; 1059; GFX7-LABEL: local_singlethread_acquire_ret_atomicrmw: 1060; GFX7: ; %bb.0: ; %entry 1061; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1062; GFX7-NEXT: s_mov_b32 m0, -1 1063; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1064; GFX7-NEXT: v_mov_b32_e32 v0, s0 1065; GFX7-NEXT: v_mov_b32_e32 v1, s1 1066; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1067; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1068; GFX7-NEXT: ds_write_b32 v0, v1 1069; GFX7-NEXT: s_endpgm 1070; 1071; GFX10-WGP-LABEL: local_singlethread_acquire_ret_atomicrmw: 1072; GFX10-WGP: ; %bb.0: ; %entry 1073; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1074; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1075; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1076; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1077; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1078; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1079; GFX10-WGP-NEXT: ds_write_b32 v0, v1 1080; GFX10-WGP-NEXT: s_endpgm 1081; 1082; GFX10-CU-LABEL: local_singlethread_acquire_ret_atomicrmw: 1083; GFX10-CU: ; %bb.0: ; %entry 1084; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1085; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1086; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1087; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1088; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1089; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1090; GFX10-CU-NEXT: ds_write_b32 v0, v1 1091; GFX10-CU-NEXT: s_endpgm 1092; 1093; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_ret_atomicrmw: 1094; SKIP-CACHE-INV: ; %bb.0: ; %entry 1095; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1096; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1097; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1098; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1099; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1100; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1101; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1102; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 1103; SKIP-CACHE-INV-NEXT: s_endpgm 1104; 1105; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_acquire_ret_atomicrmw: 1106; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1107; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1108; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1109; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1110; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 1111; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1112; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1113; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 1114; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1115; 1116; GFX90A-TGSPLIT-LABEL: local_singlethread_acquire_ret_atomicrmw: 1117; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1118; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1119; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1120; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1121; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 1122; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1123; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1124; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 1125; GFX90A-TGSPLIT-NEXT: s_endpgm 1126; 1127; 1128 i32 addrspace(3)* %out, i32 %in) { 1129entry: 1130 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread") acquire 1131 store i32 %val, i32 addrspace(3)* %out, align 4 1132 ret void 1133} 1134 1135define amdgpu_kernel void @local_singlethread_acq_rel_ret_atomicrmw( 1136; GFX6-LABEL: local_singlethread_acq_rel_ret_atomicrmw: 1137; GFX6: ; %bb.0: ; %entry 1138; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 1139; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 1140; GFX6-NEXT: s_mov_b32 m0, -1 1141; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1142; GFX6-NEXT: v_mov_b32_e32 v0, s0 1143; GFX6-NEXT: v_mov_b32_e32 v1, s1 1144; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1145; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1146; GFX6-NEXT: ds_write_b32 v0, v1 1147; GFX6-NEXT: s_endpgm 1148; 1149; GFX7-LABEL: local_singlethread_acq_rel_ret_atomicrmw: 1150; GFX7: ; %bb.0: ; %entry 1151; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1152; GFX7-NEXT: s_mov_b32 m0, -1 1153; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1154; GFX7-NEXT: v_mov_b32_e32 v0, s0 1155; GFX7-NEXT: v_mov_b32_e32 v1, s1 1156; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1157; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1158; GFX7-NEXT: ds_write_b32 v0, v1 1159; GFX7-NEXT: s_endpgm 1160; 1161; GFX10-WGP-LABEL: local_singlethread_acq_rel_ret_atomicrmw: 1162; GFX10-WGP: ; %bb.0: ; %entry 1163; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1164; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1165; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1166; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1167; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1168; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1169; GFX10-WGP-NEXT: ds_write_b32 v0, v1 1170; GFX10-WGP-NEXT: s_endpgm 1171; 1172; GFX10-CU-LABEL: local_singlethread_acq_rel_ret_atomicrmw: 1173; GFX10-CU: ; %bb.0: ; %entry 1174; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1175; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1176; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1177; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1178; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1179; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1180; GFX10-CU-NEXT: ds_write_b32 v0, v1 1181; GFX10-CU-NEXT: s_endpgm 1182; 1183; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_ret_atomicrmw: 1184; SKIP-CACHE-INV: ; %bb.0: ; %entry 1185; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1186; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1187; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1188; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1189; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1190; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1191; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1192; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 1193; SKIP-CACHE-INV-NEXT: s_endpgm 1194; 1195; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_ret_atomicrmw: 1196; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1197; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1198; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1199; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1200; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 1201; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1202; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1203; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 1204; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1205; 1206; GFX90A-TGSPLIT-LABEL: local_singlethread_acq_rel_ret_atomicrmw: 1207; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1208; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1209; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1210; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1211; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 1212; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1213; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1214; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 1215; GFX90A-TGSPLIT-NEXT: s_endpgm 1216; 1217; 1218 i32 addrspace(3)* %out, i32 %in) { 1219entry: 1220 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread") acq_rel 1221 store i32 %val, i32 addrspace(3)* %out, align 4 1222 ret void 1223} 1224 1225define amdgpu_kernel void @local_singlethread_seq_cst_ret_atomicrmw( 1226; GFX6-LABEL: local_singlethread_seq_cst_ret_atomicrmw: 1227; GFX6: ; %bb.0: ; %entry 1228; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 1229; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 1230; GFX6-NEXT: s_mov_b32 m0, -1 1231; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1232; GFX6-NEXT: v_mov_b32_e32 v0, s0 1233; GFX6-NEXT: v_mov_b32_e32 v1, s1 1234; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1235; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1236; GFX6-NEXT: ds_write_b32 v0, v1 1237; GFX6-NEXT: s_endpgm 1238; 1239; GFX7-LABEL: local_singlethread_seq_cst_ret_atomicrmw: 1240; GFX7: ; %bb.0: ; %entry 1241; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1242; GFX7-NEXT: s_mov_b32 m0, -1 1243; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1244; GFX7-NEXT: v_mov_b32_e32 v0, s0 1245; GFX7-NEXT: v_mov_b32_e32 v1, s1 1246; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1247; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1248; GFX7-NEXT: ds_write_b32 v0, v1 1249; GFX7-NEXT: s_endpgm 1250; 1251; GFX10-WGP-LABEL: local_singlethread_seq_cst_ret_atomicrmw: 1252; GFX10-WGP: ; %bb.0: ; %entry 1253; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1254; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1255; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1256; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1257; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1258; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1259; GFX10-WGP-NEXT: ds_write_b32 v0, v1 1260; GFX10-WGP-NEXT: s_endpgm 1261; 1262; GFX10-CU-LABEL: local_singlethread_seq_cst_ret_atomicrmw: 1263; GFX10-CU: ; %bb.0: ; %entry 1264; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1265; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1266; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1267; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1268; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1269; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1270; GFX10-CU-NEXT: ds_write_b32 v0, v1 1271; GFX10-CU-NEXT: s_endpgm 1272; 1273; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_ret_atomicrmw: 1274; SKIP-CACHE-INV: ; %bb.0: ; %entry 1275; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1276; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1277; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1278; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1279; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1280; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1281; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1282; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 1283; SKIP-CACHE-INV-NEXT: s_endpgm 1284; 1285; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_ret_atomicrmw: 1286; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1287; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1288; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1289; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1290; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 1291; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1292; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1293; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 1294; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1295; 1296; GFX90A-TGSPLIT-LABEL: local_singlethread_seq_cst_ret_atomicrmw: 1297; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1298; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1299; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1300; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1301; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 1302; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1303; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1304; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 1305; GFX90A-TGSPLIT-NEXT: s_endpgm 1306; 1307; 1308 i32 addrspace(3)* %out, i32 %in) { 1309entry: 1310 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread") seq_cst 1311 store i32 %val, i32 addrspace(3)* %out, align 4 1312 ret void 1313} 1314 1315define amdgpu_kernel void @local_singlethread_monotonic_monotonic_cmpxchg( 1316; GFX6-LABEL: local_singlethread_monotonic_monotonic_cmpxchg: 1317; GFX6: ; %bb.0: ; %entry 1318; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 1319; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 1320; GFX6-NEXT: s_mov_b32 m0, -1 1321; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1322; GFX6-NEXT: v_mov_b32_e32 v0, s2 1323; GFX6-NEXT: v_mov_b32_e32 v1, s1 1324; GFX6-NEXT: v_mov_b32_e32 v2, s0 1325; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1326; GFX6-NEXT: s_endpgm 1327; 1328; GFX7-LABEL: local_singlethread_monotonic_monotonic_cmpxchg: 1329; GFX7: ; %bb.0: ; %entry 1330; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1331; GFX7-NEXT: s_mov_b32 m0, -1 1332; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1333; GFX7-NEXT: v_mov_b32_e32 v0, s0 1334; GFX7-NEXT: v_mov_b32_e32 v1, s2 1335; GFX7-NEXT: v_mov_b32_e32 v2, s1 1336; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1337; GFX7-NEXT: s_endpgm 1338; 1339; GFX10-WGP-LABEL: local_singlethread_monotonic_monotonic_cmpxchg: 1340; GFX10-WGP: ; %bb.0: ; %entry 1341; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1342; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1343; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1344; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1345; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 1346; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1347; GFX10-WGP-NEXT: s_endpgm 1348; 1349; GFX10-CU-LABEL: local_singlethread_monotonic_monotonic_cmpxchg: 1350; GFX10-CU: ; %bb.0: ; %entry 1351; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1352; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1353; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1354; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1355; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 1356; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1357; GFX10-CU-NEXT: s_endpgm 1358; 1359; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_monotonic_cmpxchg: 1360; SKIP-CACHE-INV: ; %bb.0: ; %entry 1361; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 1362; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1363; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1364; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1365; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1366; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 1367; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 1368; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1369; SKIP-CACHE-INV-NEXT: s_endpgm 1370; 1371; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_monotonic_monotonic_cmpxchg: 1372; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1373; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1374; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1375; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1376; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1377; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 1378; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1379; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1380; 1381; GFX90A-TGSPLIT-LABEL: local_singlethread_monotonic_monotonic_cmpxchg: 1382; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1383; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1384; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1385; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1386; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1387; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 1388; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1389; GFX90A-TGSPLIT-NEXT: s_endpgm 1390; 1391; 1392 i32 addrspace(3)* %out, i32 %in, i32 %old) { 1393entry: 1394 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 1395 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic 1396 ret void 1397} 1398 1399define amdgpu_kernel void @local_singlethread_acquire_monotonic_cmpxchg( 1400; GFX6-LABEL: local_singlethread_acquire_monotonic_cmpxchg: 1401; GFX6: ; %bb.0: ; %entry 1402; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 1403; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 1404; GFX6-NEXT: s_mov_b32 m0, -1 1405; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1406; GFX6-NEXT: v_mov_b32_e32 v0, s2 1407; GFX6-NEXT: v_mov_b32_e32 v1, s1 1408; GFX6-NEXT: v_mov_b32_e32 v2, s0 1409; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1410; GFX6-NEXT: s_endpgm 1411; 1412; GFX7-LABEL: local_singlethread_acquire_monotonic_cmpxchg: 1413; GFX7: ; %bb.0: ; %entry 1414; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1415; GFX7-NEXT: s_mov_b32 m0, -1 1416; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1417; GFX7-NEXT: v_mov_b32_e32 v0, s0 1418; GFX7-NEXT: v_mov_b32_e32 v1, s2 1419; GFX7-NEXT: v_mov_b32_e32 v2, s1 1420; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1421; GFX7-NEXT: s_endpgm 1422; 1423; GFX10-WGP-LABEL: local_singlethread_acquire_monotonic_cmpxchg: 1424; GFX10-WGP: ; %bb.0: ; %entry 1425; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1426; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1427; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1428; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1429; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 1430; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1431; GFX10-WGP-NEXT: s_endpgm 1432; 1433; GFX10-CU-LABEL: local_singlethread_acquire_monotonic_cmpxchg: 1434; GFX10-CU: ; %bb.0: ; %entry 1435; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1436; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1437; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1438; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1439; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 1440; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1441; GFX10-CU-NEXT: s_endpgm 1442; 1443; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_monotonic_cmpxchg: 1444; SKIP-CACHE-INV: ; %bb.0: ; %entry 1445; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 1446; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1447; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1448; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1449; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1450; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 1451; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 1452; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1453; SKIP-CACHE-INV-NEXT: s_endpgm 1454; 1455; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_acquire_monotonic_cmpxchg: 1456; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1457; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1458; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1459; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1460; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1461; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 1462; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1463; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1464; 1465; GFX90A-TGSPLIT-LABEL: local_singlethread_acquire_monotonic_cmpxchg: 1466; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1467; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1468; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1469; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1470; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1471; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 1472; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1473; GFX90A-TGSPLIT-NEXT: s_endpgm 1474; 1475; 1476 i32 addrspace(3)* %out, i32 %in, i32 %old) { 1477entry: 1478 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 1479 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic 1480 ret void 1481} 1482 1483define amdgpu_kernel void @local_singlethread_release_monotonic_cmpxchg( 1484; GFX6-LABEL: local_singlethread_release_monotonic_cmpxchg: 1485; GFX6: ; %bb.0: ; %entry 1486; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 1487; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 1488; GFX6-NEXT: s_mov_b32 m0, -1 1489; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1490; GFX6-NEXT: v_mov_b32_e32 v0, s2 1491; GFX6-NEXT: v_mov_b32_e32 v1, s1 1492; GFX6-NEXT: v_mov_b32_e32 v2, s0 1493; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1494; GFX6-NEXT: s_endpgm 1495; 1496; GFX7-LABEL: local_singlethread_release_monotonic_cmpxchg: 1497; GFX7: ; %bb.0: ; %entry 1498; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1499; GFX7-NEXT: s_mov_b32 m0, -1 1500; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1501; GFX7-NEXT: v_mov_b32_e32 v0, s0 1502; GFX7-NEXT: v_mov_b32_e32 v1, s2 1503; GFX7-NEXT: v_mov_b32_e32 v2, s1 1504; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1505; GFX7-NEXT: s_endpgm 1506; 1507; GFX10-WGP-LABEL: local_singlethread_release_monotonic_cmpxchg: 1508; GFX10-WGP: ; %bb.0: ; %entry 1509; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1510; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1511; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1512; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1513; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 1514; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1515; GFX10-WGP-NEXT: s_endpgm 1516; 1517; GFX10-CU-LABEL: local_singlethread_release_monotonic_cmpxchg: 1518; GFX10-CU: ; %bb.0: ; %entry 1519; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1520; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1521; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1522; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1523; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 1524; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1525; GFX10-CU-NEXT: s_endpgm 1526; 1527; SKIP-CACHE-INV-LABEL: local_singlethread_release_monotonic_cmpxchg: 1528; SKIP-CACHE-INV: ; %bb.0: ; %entry 1529; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 1530; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1531; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1532; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1533; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1534; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 1535; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 1536; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1537; SKIP-CACHE-INV-NEXT: s_endpgm 1538; 1539; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_release_monotonic_cmpxchg: 1540; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1541; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1542; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1543; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1544; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1545; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 1546; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1547; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1548; 1549; GFX90A-TGSPLIT-LABEL: local_singlethread_release_monotonic_cmpxchg: 1550; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1551; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1552; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1553; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1554; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1555; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 1556; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1557; GFX90A-TGSPLIT-NEXT: s_endpgm 1558; 1559; 1560 i32 addrspace(3)* %out, i32 %in, i32 %old) { 1561entry: 1562 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 1563 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic 1564 ret void 1565} 1566 1567define amdgpu_kernel void @local_singlethread_acq_rel_monotonic_cmpxchg( 1568; GFX6-LABEL: local_singlethread_acq_rel_monotonic_cmpxchg: 1569; GFX6: ; %bb.0: ; %entry 1570; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 1571; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 1572; GFX6-NEXT: s_mov_b32 m0, -1 1573; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1574; GFX6-NEXT: v_mov_b32_e32 v0, s2 1575; GFX6-NEXT: v_mov_b32_e32 v1, s1 1576; GFX6-NEXT: v_mov_b32_e32 v2, s0 1577; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1578; GFX6-NEXT: s_endpgm 1579; 1580; GFX7-LABEL: local_singlethread_acq_rel_monotonic_cmpxchg: 1581; GFX7: ; %bb.0: ; %entry 1582; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1583; GFX7-NEXT: s_mov_b32 m0, -1 1584; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1585; GFX7-NEXT: v_mov_b32_e32 v0, s0 1586; GFX7-NEXT: v_mov_b32_e32 v1, s2 1587; GFX7-NEXT: v_mov_b32_e32 v2, s1 1588; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1589; GFX7-NEXT: s_endpgm 1590; 1591; GFX10-WGP-LABEL: local_singlethread_acq_rel_monotonic_cmpxchg: 1592; GFX10-WGP: ; %bb.0: ; %entry 1593; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1594; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1595; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1596; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1597; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 1598; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1599; GFX10-WGP-NEXT: s_endpgm 1600; 1601; GFX10-CU-LABEL: local_singlethread_acq_rel_monotonic_cmpxchg: 1602; GFX10-CU: ; %bb.0: ; %entry 1603; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1604; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1605; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1606; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1607; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 1608; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1609; GFX10-CU-NEXT: s_endpgm 1610; 1611; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_monotonic_cmpxchg: 1612; SKIP-CACHE-INV: ; %bb.0: ; %entry 1613; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 1614; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1615; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1616; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1617; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1618; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 1619; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 1620; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1621; SKIP-CACHE-INV-NEXT: s_endpgm 1622; 1623; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_monotonic_cmpxchg: 1624; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1625; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1626; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1627; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1628; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1629; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 1630; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1631; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1632; 1633; GFX90A-TGSPLIT-LABEL: local_singlethread_acq_rel_monotonic_cmpxchg: 1634; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1635; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1636; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1637; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1638; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1639; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 1640; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1641; GFX90A-TGSPLIT-NEXT: s_endpgm 1642; 1643; 1644 i32 addrspace(3)* %out, i32 %in, i32 %old) { 1645entry: 1646 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 1647 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic 1648 ret void 1649} 1650 1651define amdgpu_kernel void @local_singlethread_seq_cst_monotonic_cmpxchg( 1652; GFX6-LABEL: local_singlethread_seq_cst_monotonic_cmpxchg: 1653; GFX6: ; %bb.0: ; %entry 1654; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 1655; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 1656; GFX6-NEXT: s_mov_b32 m0, -1 1657; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1658; GFX6-NEXT: v_mov_b32_e32 v0, s2 1659; GFX6-NEXT: v_mov_b32_e32 v1, s1 1660; GFX6-NEXT: v_mov_b32_e32 v2, s0 1661; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1662; GFX6-NEXT: s_endpgm 1663; 1664; GFX7-LABEL: local_singlethread_seq_cst_monotonic_cmpxchg: 1665; GFX7: ; %bb.0: ; %entry 1666; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1667; GFX7-NEXT: s_mov_b32 m0, -1 1668; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1669; GFX7-NEXT: v_mov_b32_e32 v0, s0 1670; GFX7-NEXT: v_mov_b32_e32 v1, s2 1671; GFX7-NEXT: v_mov_b32_e32 v2, s1 1672; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1673; GFX7-NEXT: s_endpgm 1674; 1675; GFX10-WGP-LABEL: local_singlethread_seq_cst_monotonic_cmpxchg: 1676; GFX10-WGP: ; %bb.0: ; %entry 1677; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1678; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1679; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1680; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1681; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 1682; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1683; GFX10-WGP-NEXT: s_endpgm 1684; 1685; GFX10-CU-LABEL: local_singlethread_seq_cst_monotonic_cmpxchg: 1686; GFX10-CU: ; %bb.0: ; %entry 1687; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1688; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1689; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1690; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1691; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 1692; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1693; GFX10-CU-NEXT: s_endpgm 1694; 1695; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_monotonic_cmpxchg: 1696; SKIP-CACHE-INV: ; %bb.0: ; %entry 1697; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 1698; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1699; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1700; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1701; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1702; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 1703; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 1704; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1705; SKIP-CACHE-INV-NEXT: s_endpgm 1706; 1707; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_monotonic_cmpxchg: 1708; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1709; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1710; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1711; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1712; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1713; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 1714; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1715; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1716; 1717; GFX90A-TGSPLIT-LABEL: local_singlethread_seq_cst_monotonic_cmpxchg: 1718; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1719; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1720; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1721; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1722; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1723; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 1724; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1725; GFX90A-TGSPLIT-NEXT: s_endpgm 1726; 1727; 1728 i32 addrspace(3)* %out, i32 %in, i32 %old) { 1729entry: 1730 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 1731 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic 1732 ret void 1733} 1734 1735define amdgpu_kernel void @local_singlethread_acquire_acquire_cmpxchg( 1736; GFX6-LABEL: local_singlethread_acquire_acquire_cmpxchg: 1737; GFX6: ; %bb.0: ; %entry 1738; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 1739; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 1740; GFX6-NEXT: s_mov_b32 m0, -1 1741; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1742; GFX6-NEXT: v_mov_b32_e32 v0, s2 1743; GFX6-NEXT: v_mov_b32_e32 v1, s1 1744; GFX6-NEXT: v_mov_b32_e32 v2, s0 1745; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1746; GFX6-NEXT: s_endpgm 1747; 1748; GFX7-LABEL: local_singlethread_acquire_acquire_cmpxchg: 1749; GFX7: ; %bb.0: ; %entry 1750; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1751; GFX7-NEXT: s_mov_b32 m0, -1 1752; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1753; GFX7-NEXT: v_mov_b32_e32 v0, s0 1754; GFX7-NEXT: v_mov_b32_e32 v1, s2 1755; GFX7-NEXT: v_mov_b32_e32 v2, s1 1756; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1757; GFX7-NEXT: s_endpgm 1758; 1759; GFX10-WGP-LABEL: local_singlethread_acquire_acquire_cmpxchg: 1760; GFX10-WGP: ; %bb.0: ; %entry 1761; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1762; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1763; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1764; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1765; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 1766; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1767; GFX10-WGP-NEXT: s_endpgm 1768; 1769; GFX10-CU-LABEL: local_singlethread_acquire_acquire_cmpxchg: 1770; GFX10-CU: ; %bb.0: ; %entry 1771; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1772; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1773; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1774; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1775; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 1776; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1777; GFX10-CU-NEXT: s_endpgm 1778; 1779; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_acquire_cmpxchg: 1780; SKIP-CACHE-INV: ; %bb.0: ; %entry 1781; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 1782; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1783; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1784; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1785; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1786; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 1787; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 1788; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1789; SKIP-CACHE-INV-NEXT: s_endpgm 1790; 1791; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_acquire_acquire_cmpxchg: 1792; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1793; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1794; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1795; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1796; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1797; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 1798; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1799; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1800; 1801; GFX90A-TGSPLIT-LABEL: local_singlethread_acquire_acquire_cmpxchg: 1802; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1803; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1804; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1805; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1806; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1807; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 1808; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1809; GFX90A-TGSPLIT-NEXT: s_endpgm 1810; 1811; 1812 i32 addrspace(3)* %out, i32 %in, i32 %old) { 1813entry: 1814 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 1815 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire 1816 ret void 1817} 1818 1819define amdgpu_kernel void @local_singlethread_release_acquire_cmpxchg( 1820; GFX6-LABEL: local_singlethread_release_acquire_cmpxchg: 1821; GFX6: ; %bb.0: ; %entry 1822; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 1823; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 1824; GFX6-NEXT: s_mov_b32 m0, -1 1825; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1826; GFX6-NEXT: v_mov_b32_e32 v0, s2 1827; GFX6-NEXT: v_mov_b32_e32 v1, s1 1828; GFX6-NEXT: v_mov_b32_e32 v2, s0 1829; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1830; GFX6-NEXT: s_endpgm 1831; 1832; GFX7-LABEL: local_singlethread_release_acquire_cmpxchg: 1833; GFX7: ; %bb.0: ; %entry 1834; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1835; GFX7-NEXT: s_mov_b32 m0, -1 1836; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1837; GFX7-NEXT: v_mov_b32_e32 v0, s0 1838; GFX7-NEXT: v_mov_b32_e32 v1, s2 1839; GFX7-NEXT: v_mov_b32_e32 v2, s1 1840; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1841; GFX7-NEXT: s_endpgm 1842; 1843; GFX10-WGP-LABEL: local_singlethread_release_acquire_cmpxchg: 1844; GFX10-WGP: ; %bb.0: ; %entry 1845; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1846; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1847; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1848; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1849; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 1850; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1851; GFX10-WGP-NEXT: s_endpgm 1852; 1853; GFX10-CU-LABEL: local_singlethread_release_acquire_cmpxchg: 1854; GFX10-CU: ; %bb.0: ; %entry 1855; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1856; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1857; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1858; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1859; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 1860; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1861; GFX10-CU-NEXT: s_endpgm 1862; 1863; SKIP-CACHE-INV-LABEL: local_singlethread_release_acquire_cmpxchg: 1864; SKIP-CACHE-INV: ; %bb.0: ; %entry 1865; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 1866; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1867; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1868; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1869; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1870; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 1871; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 1872; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1873; SKIP-CACHE-INV-NEXT: s_endpgm 1874; 1875; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_release_acquire_cmpxchg: 1876; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1877; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1878; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1879; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1880; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1881; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 1882; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1883; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1884; 1885; GFX90A-TGSPLIT-LABEL: local_singlethread_release_acquire_cmpxchg: 1886; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1887; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1888; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1889; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1890; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1891; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 1892; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1893; GFX90A-TGSPLIT-NEXT: s_endpgm 1894; 1895; 1896 i32 addrspace(3)* %out, i32 %in, i32 %old) { 1897entry: 1898 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 1899 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire 1900 ret void 1901} 1902 1903define amdgpu_kernel void @local_singlethread_acq_rel_acquire_cmpxchg( 1904; GFX6-LABEL: local_singlethread_acq_rel_acquire_cmpxchg: 1905; GFX6: ; %bb.0: ; %entry 1906; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 1907; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 1908; GFX6-NEXT: s_mov_b32 m0, -1 1909; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1910; GFX6-NEXT: v_mov_b32_e32 v0, s2 1911; GFX6-NEXT: v_mov_b32_e32 v1, s1 1912; GFX6-NEXT: v_mov_b32_e32 v2, s0 1913; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1914; GFX6-NEXT: s_endpgm 1915; 1916; GFX7-LABEL: local_singlethread_acq_rel_acquire_cmpxchg: 1917; GFX7: ; %bb.0: ; %entry 1918; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1919; GFX7-NEXT: s_mov_b32 m0, -1 1920; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1921; GFX7-NEXT: v_mov_b32_e32 v0, s0 1922; GFX7-NEXT: v_mov_b32_e32 v1, s2 1923; GFX7-NEXT: v_mov_b32_e32 v2, s1 1924; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1925; GFX7-NEXT: s_endpgm 1926; 1927; GFX10-WGP-LABEL: local_singlethread_acq_rel_acquire_cmpxchg: 1928; GFX10-WGP: ; %bb.0: ; %entry 1929; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1930; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1931; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1932; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1933; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 1934; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1935; GFX10-WGP-NEXT: s_endpgm 1936; 1937; GFX10-CU-LABEL: local_singlethread_acq_rel_acquire_cmpxchg: 1938; GFX10-CU: ; %bb.0: ; %entry 1939; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1940; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1941; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1942; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1943; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 1944; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1945; GFX10-CU-NEXT: s_endpgm 1946; 1947; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_acquire_cmpxchg: 1948; SKIP-CACHE-INV: ; %bb.0: ; %entry 1949; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 1950; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1951; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1952; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1953; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1954; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 1955; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 1956; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1957; SKIP-CACHE-INV-NEXT: s_endpgm 1958; 1959; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_acquire_cmpxchg: 1960; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1961; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1962; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1963; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1964; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1965; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 1966; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1967; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1968; 1969; GFX90A-TGSPLIT-LABEL: local_singlethread_acq_rel_acquire_cmpxchg: 1970; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1971; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1972; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1973; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 1974; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1975; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 1976; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1977; GFX90A-TGSPLIT-NEXT: s_endpgm 1978; 1979; 1980 i32 addrspace(3)* %out, i32 %in, i32 %old) { 1981entry: 1982 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 1983 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire 1984 ret void 1985} 1986 1987define amdgpu_kernel void @local_singlethread_seq_cst_acquire_cmpxchg( 1988; GFX6-LABEL: local_singlethread_seq_cst_acquire_cmpxchg: 1989; GFX6: ; %bb.0: ; %entry 1990; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 1991; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 1992; GFX6-NEXT: s_mov_b32 m0, -1 1993; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1994; GFX6-NEXT: v_mov_b32_e32 v0, s2 1995; GFX6-NEXT: v_mov_b32_e32 v1, s1 1996; GFX6-NEXT: v_mov_b32_e32 v2, s0 1997; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1998; GFX6-NEXT: s_endpgm 1999; 2000; GFX7-LABEL: local_singlethread_seq_cst_acquire_cmpxchg: 2001; GFX7: ; %bb.0: ; %entry 2002; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2003; GFX7-NEXT: s_mov_b32 m0, -1 2004; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2005; GFX7-NEXT: v_mov_b32_e32 v0, s0 2006; GFX7-NEXT: v_mov_b32_e32 v1, s2 2007; GFX7-NEXT: v_mov_b32_e32 v2, s1 2008; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 2009; GFX7-NEXT: s_endpgm 2010; 2011; GFX10-WGP-LABEL: local_singlethread_seq_cst_acquire_cmpxchg: 2012; GFX10-WGP: ; %bb.0: ; %entry 2013; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2014; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2015; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2016; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 2017; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 2018; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 2019; GFX10-WGP-NEXT: s_endpgm 2020; 2021; GFX10-CU-LABEL: local_singlethread_seq_cst_acquire_cmpxchg: 2022; GFX10-CU: ; %bb.0: ; %entry 2023; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2024; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2025; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2026; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 2027; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 2028; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 2029; GFX10-CU-NEXT: s_endpgm 2030; 2031; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_acquire_cmpxchg: 2032; SKIP-CACHE-INV: ; %bb.0: ; %entry 2033; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 2034; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 2035; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2036; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2037; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 2038; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 2039; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 2040; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 2041; SKIP-CACHE-INV-NEXT: s_endpgm 2042; 2043; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_acquire_cmpxchg: 2044; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2045; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2046; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2047; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 2048; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2049; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 2050; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 2051; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2052; 2053; GFX90A-TGSPLIT-LABEL: local_singlethread_seq_cst_acquire_cmpxchg: 2054; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2055; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2056; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2057; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 2058; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2059; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 2060; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 2061; GFX90A-TGSPLIT-NEXT: s_endpgm 2062; 2063; 2064 i32 addrspace(3)* %out, i32 %in, i32 %old) { 2065entry: 2066 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 2067 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire 2068 ret void 2069} 2070 2071define amdgpu_kernel void @local_singlethread_seq_cst_seq_cst_cmpxchg( 2072; GFX6-LABEL: local_singlethread_seq_cst_seq_cst_cmpxchg: 2073; GFX6: ; %bb.0: ; %entry 2074; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 2075; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 2076; GFX6-NEXT: s_mov_b32 m0, -1 2077; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2078; GFX6-NEXT: v_mov_b32_e32 v0, s2 2079; GFX6-NEXT: v_mov_b32_e32 v1, s1 2080; GFX6-NEXT: v_mov_b32_e32 v2, s0 2081; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 2082; GFX6-NEXT: s_endpgm 2083; 2084; GFX7-LABEL: local_singlethread_seq_cst_seq_cst_cmpxchg: 2085; GFX7: ; %bb.0: ; %entry 2086; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2087; GFX7-NEXT: s_mov_b32 m0, -1 2088; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2089; GFX7-NEXT: v_mov_b32_e32 v0, s0 2090; GFX7-NEXT: v_mov_b32_e32 v1, s2 2091; GFX7-NEXT: v_mov_b32_e32 v2, s1 2092; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 2093; GFX7-NEXT: s_endpgm 2094; 2095; GFX10-WGP-LABEL: local_singlethread_seq_cst_seq_cst_cmpxchg: 2096; GFX10-WGP: ; %bb.0: ; %entry 2097; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2098; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2099; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2100; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 2101; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 2102; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 2103; GFX10-WGP-NEXT: s_endpgm 2104; 2105; GFX10-CU-LABEL: local_singlethread_seq_cst_seq_cst_cmpxchg: 2106; GFX10-CU: ; %bb.0: ; %entry 2107; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2108; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2109; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2110; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 2111; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 2112; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 2113; GFX10-CU-NEXT: s_endpgm 2114; 2115; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_seq_cst_cmpxchg: 2116; SKIP-CACHE-INV: ; %bb.0: ; %entry 2117; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 2118; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 2119; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2120; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2121; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 2122; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 2123; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 2124; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 2125; SKIP-CACHE-INV-NEXT: s_endpgm 2126; 2127; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_seq_cst_cmpxchg: 2128; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2129; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2130; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2131; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 2132; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2133; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 2134; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 2135; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2136; 2137; GFX90A-TGSPLIT-LABEL: local_singlethread_seq_cst_seq_cst_cmpxchg: 2138; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2139; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2140; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2141; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 2142; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2143; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 2144; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 2145; GFX90A-TGSPLIT-NEXT: s_endpgm 2146; 2147; 2148 i32 addrspace(3)* %out, i32 %in, i32 %old) { 2149entry: 2150 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 2151 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst 2152 ret void 2153} 2154 2155define amdgpu_kernel void @local_singlethread_acquire_monotonic_ret_cmpxchg( 2156; GFX6-LABEL: local_singlethread_acquire_monotonic_ret_cmpxchg: 2157; GFX6: ; %bb.0: ; %entry 2158; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 2159; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 2160; GFX6-NEXT: s_mov_b32 m0, -1 2161; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2162; GFX6-NEXT: v_mov_b32_e32 v0, s2 2163; GFX6-NEXT: v_mov_b32_e32 v1, s1 2164; GFX6-NEXT: v_mov_b32_e32 v2, s0 2165; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2166; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2167; GFX6-NEXT: ds_write_b32 v0, v1 2168; GFX6-NEXT: s_endpgm 2169; 2170; GFX7-LABEL: local_singlethread_acquire_monotonic_ret_cmpxchg: 2171; GFX7: ; %bb.0: ; %entry 2172; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2173; GFX7-NEXT: s_mov_b32 m0, -1 2174; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2175; GFX7-NEXT: v_mov_b32_e32 v0, s0 2176; GFX7-NEXT: v_mov_b32_e32 v1, s2 2177; GFX7-NEXT: v_mov_b32_e32 v2, s1 2178; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2179; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2180; GFX7-NEXT: ds_write_b32 v0, v1 2181; GFX7-NEXT: s_endpgm 2182; 2183; GFX10-WGP-LABEL: local_singlethread_acquire_monotonic_ret_cmpxchg: 2184; GFX10-WGP: ; %bb.0: ; %entry 2185; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2186; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2187; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2188; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 2189; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 2190; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2191; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2192; GFX10-WGP-NEXT: ds_write_b32 v0, v1 2193; GFX10-WGP-NEXT: s_endpgm 2194; 2195; GFX10-CU-LABEL: local_singlethread_acquire_monotonic_ret_cmpxchg: 2196; GFX10-CU: ; %bb.0: ; %entry 2197; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2198; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2199; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2200; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 2201; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 2202; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2203; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2204; GFX10-CU-NEXT: ds_write_b32 v0, v1 2205; GFX10-CU-NEXT: s_endpgm 2206; 2207; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_monotonic_ret_cmpxchg: 2208; SKIP-CACHE-INV: ; %bb.0: ; %entry 2209; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 2210; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 2211; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2212; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2213; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 2214; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 2215; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 2216; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2217; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2218; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 2219; SKIP-CACHE-INV-NEXT: s_endpgm 2220; 2221; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_acquire_monotonic_ret_cmpxchg: 2222; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2223; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2224; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2225; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 2226; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2227; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 2228; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2229; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2230; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 2231; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2232; 2233; GFX90A-TGSPLIT-LABEL: local_singlethread_acquire_monotonic_ret_cmpxchg: 2234; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2235; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2236; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2237; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 2238; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2239; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 2240; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2241; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2242; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 2243; GFX90A-TGSPLIT-NEXT: s_endpgm 2244; 2245; 2246 i32 addrspace(3)* %out, i32 %in, i32 %old) { 2247entry: 2248 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 2249 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic 2250 %val0 = extractvalue { i32, i1 } %val, 0 2251 store i32 %val0, i32 addrspace(3)* %out, align 4 2252 ret void 2253} 2254 2255define amdgpu_kernel void @local_singlethread_acq_rel_monotonic_ret_cmpxchg( 2256; GFX6-LABEL: local_singlethread_acq_rel_monotonic_ret_cmpxchg: 2257; GFX6: ; %bb.0: ; %entry 2258; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 2259; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 2260; GFX6-NEXT: s_mov_b32 m0, -1 2261; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2262; GFX6-NEXT: v_mov_b32_e32 v0, s2 2263; GFX6-NEXT: v_mov_b32_e32 v1, s1 2264; GFX6-NEXT: v_mov_b32_e32 v2, s0 2265; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2266; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2267; GFX6-NEXT: ds_write_b32 v0, v1 2268; GFX6-NEXT: s_endpgm 2269; 2270; GFX7-LABEL: local_singlethread_acq_rel_monotonic_ret_cmpxchg: 2271; GFX7: ; %bb.0: ; %entry 2272; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2273; GFX7-NEXT: s_mov_b32 m0, -1 2274; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2275; GFX7-NEXT: v_mov_b32_e32 v0, s0 2276; GFX7-NEXT: v_mov_b32_e32 v1, s2 2277; GFX7-NEXT: v_mov_b32_e32 v2, s1 2278; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2279; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2280; GFX7-NEXT: ds_write_b32 v0, v1 2281; GFX7-NEXT: s_endpgm 2282; 2283; GFX10-WGP-LABEL: local_singlethread_acq_rel_monotonic_ret_cmpxchg: 2284; GFX10-WGP: ; %bb.0: ; %entry 2285; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2286; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2287; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2288; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 2289; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 2290; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2291; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2292; GFX10-WGP-NEXT: ds_write_b32 v0, v1 2293; GFX10-WGP-NEXT: s_endpgm 2294; 2295; GFX10-CU-LABEL: local_singlethread_acq_rel_monotonic_ret_cmpxchg: 2296; GFX10-CU: ; %bb.0: ; %entry 2297; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2298; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2299; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2300; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 2301; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 2302; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2303; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2304; GFX10-CU-NEXT: ds_write_b32 v0, v1 2305; GFX10-CU-NEXT: s_endpgm 2306; 2307; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_monotonic_ret_cmpxchg: 2308; SKIP-CACHE-INV: ; %bb.0: ; %entry 2309; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 2310; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 2311; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2312; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2313; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 2314; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 2315; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 2316; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2317; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2318; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 2319; SKIP-CACHE-INV-NEXT: s_endpgm 2320; 2321; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_monotonic_ret_cmpxchg: 2322; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2323; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2324; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2325; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 2326; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2327; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 2328; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2329; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2330; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 2331; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2332; 2333; GFX90A-TGSPLIT-LABEL: local_singlethread_acq_rel_monotonic_ret_cmpxchg: 2334; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2335; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2336; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2337; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 2338; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2339; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 2340; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2341; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2342; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 2343; GFX90A-TGSPLIT-NEXT: s_endpgm 2344; 2345; 2346 i32 addrspace(3)* %out, i32 %in, i32 %old) { 2347entry: 2348 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 2349 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic 2350 %val0 = extractvalue { i32, i1 } %val, 0 2351 store i32 %val0, i32 addrspace(3)* %out, align 4 2352 ret void 2353} 2354 2355define amdgpu_kernel void @local_singlethread_seq_cst_monotonic_ret_cmpxchg( 2356; GFX6-LABEL: local_singlethread_seq_cst_monotonic_ret_cmpxchg: 2357; GFX6: ; %bb.0: ; %entry 2358; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 2359; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 2360; GFX6-NEXT: s_mov_b32 m0, -1 2361; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2362; GFX6-NEXT: v_mov_b32_e32 v0, s2 2363; GFX6-NEXT: v_mov_b32_e32 v1, s1 2364; GFX6-NEXT: v_mov_b32_e32 v2, s0 2365; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2366; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2367; GFX6-NEXT: ds_write_b32 v0, v1 2368; GFX6-NEXT: s_endpgm 2369; 2370; GFX7-LABEL: local_singlethread_seq_cst_monotonic_ret_cmpxchg: 2371; GFX7: ; %bb.0: ; %entry 2372; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2373; GFX7-NEXT: s_mov_b32 m0, -1 2374; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2375; GFX7-NEXT: v_mov_b32_e32 v0, s0 2376; GFX7-NEXT: v_mov_b32_e32 v1, s2 2377; GFX7-NEXT: v_mov_b32_e32 v2, s1 2378; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2379; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2380; GFX7-NEXT: ds_write_b32 v0, v1 2381; GFX7-NEXT: s_endpgm 2382; 2383; GFX10-WGP-LABEL: local_singlethread_seq_cst_monotonic_ret_cmpxchg: 2384; GFX10-WGP: ; %bb.0: ; %entry 2385; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2386; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2387; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2388; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 2389; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 2390; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2391; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2392; GFX10-WGP-NEXT: ds_write_b32 v0, v1 2393; GFX10-WGP-NEXT: s_endpgm 2394; 2395; GFX10-CU-LABEL: local_singlethread_seq_cst_monotonic_ret_cmpxchg: 2396; GFX10-CU: ; %bb.0: ; %entry 2397; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2398; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2399; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2400; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 2401; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 2402; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2403; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2404; GFX10-CU-NEXT: ds_write_b32 v0, v1 2405; GFX10-CU-NEXT: s_endpgm 2406; 2407; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_monotonic_ret_cmpxchg: 2408; SKIP-CACHE-INV: ; %bb.0: ; %entry 2409; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 2410; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 2411; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2412; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2413; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 2414; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 2415; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 2416; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2417; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2418; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 2419; SKIP-CACHE-INV-NEXT: s_endpgm 2420; 2421; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_monotonic_ret_cmpxchg: 2422; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2423; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2424; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2425; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 2426; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2427; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 2428; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2429; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2430; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 2431; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2432; 2433; GFX90A-TGSPLIT-LABEL: local_singlethread_seq_cst_monotonic_ret_cmpxchg: 2434; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2435; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2436; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2437; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 2438; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2439; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 2440; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2441; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2442; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 2443; GFX90A-TGSPLIT-NEXT: s_endpgm 2444; 2445; 2446 i32 addrspace(3)* %out, i32 %in, i32 %old) { 2447entry: 2448 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 2449 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic 2450 %val0 = extractvalue { i32, i1 } %val, 0 2451 store i32 %val0, i32 addrspace(3)* %out, align 4 2452 ret void 2453} 2454 2455define amdgpu_kernel void @local_singlethread_acquire_acquire_ret_cmpxchg( 2456; GFX6-LABEL: local_singlethread_acquire_acquire_ret_cmpxchg: 2457; GFX6: ; %bb.0: ; %entry 2458; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 2459; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 2460; GFX6-NEXT: s_mov_b32 m0, -1 2461; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2462; GFX6-NEXT: v_mov_b32_e32 v0, s2 2463; GFX6-NEXT: v_mov_b32_e32 v1, s1 2464; GFX6-NEXT: v_mov_b32_e32 v2, s0 2465; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2466; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2467; GFX6-NEXT: ds_write_b32 v0, v1 2468; GFX6-NEXT: s_endpgm 2469; 2470; GFX7-LABEL: local_singlethread_acquire_acquire_ret_cmpxchg: 2471; GFX7: ; %bb.0: ; %entry 2472; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2473; GFX7-NEXT: s_mov_b32 m0, -1 2474; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2475; GFX7-NEXT: v_mov_b32_e32 v0, s0 2476; GFX7-NEXT: v_mov_b32_e32 v1, s2 2477; GFX7-NEXT: v_mov_b32_e32 v2, s1 2478; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2479; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2480; GFX7-NEXT: ds_write_b32 v0, v1 2481; GFX7-NEXT: s_endpgm 2482; 2483; GFX10-WGP-LABEL: local_singlethread_acquire_acquire_ret_cmpxchg: 2484; GFX10-WGP: ; %bb.0: ; %entry 2485; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2486; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2487; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2488; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 2489; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 2490; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2491; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2492; GFX10-WGP-NEXT: ds_write_b32 v0, v1 2493; GFX10-WGP-NEXT: s_endpgm 2494; 2495; GFX10-CU-LABEL: local_singlethread_acquire_acquire_ret_cmpxchg: 2496; GFX10-CU: ; %bb.0: ; %entry 2497; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2498; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2499; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2500; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 2501; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 2502; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2503; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2504; GFX10-CU-NEXT: ds_write_b32 v0, v1 2505; GFX10-CU-NEXT: s_endpgm 2506; 2507; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_acquire_ret_cmpxchg: 2508; SKIP-CACHE-INV: ; %bb.0: ; %entry 2509; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 2510; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 2511; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2512; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2513; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 2514; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 2515; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 2516; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2517; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2518; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 2519; SKIP-CACHE-INV-NEXT: s_endpgm 2520; 2521; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_acquire_acquire_ret_cmpxchg: 2522; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2523; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2524; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2525; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 2526; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2527; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 2528; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2529; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2530; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 2531; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2532; 2533; GFX90A-TGSPLIT-LABEL: local_singlethread_acquire_acquire_ret_cmpxchg: 2534; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2535; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2536; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2537; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 2538; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2539; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 2540; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2541; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2542; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 2543; GFX90A-TGSPLIT-NEXT: s_endpgm 2544; 2545; 2546 i32 addrspace(3)* %out, i32 %in, i32 %old) { 2547entry: 2548 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 2549 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire 2550 %val0 = extractvalue { i32, i1 } %val, 0 2551 store i32 %val0, i32 addrspace(3)* %out, align 4 2552 ret void 2553} 2554 2555define amdgpu_kernel void @local_singlethread_release_acquire_ret_cmpxchg( 2556; GFX6-LABEL: local_singlethread_release_acquire_ret_cmpxchg: 2557; GFX6: ; %bb.0: ; %entry 2558; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 2559; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 2560; GFX6-NEXT: s_mov_b32 m0, -1 2561; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2562; GFX6-NEXT: v_mov_b32_e32 v0, s2 2563; GFX6-NEXT: v_mov_b32_e32 v1, s1 2564; GFX6-NEXT: v_mov_b32_e32 v2, s0 2565; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2566; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2567; GFX6-NEXT: ds_write_b32 v0, v1 2568; GFX6-NEXT: s_endpgm 2569; 2570; GFX7-LABEL: local_singlethread_release_acquire_ret_cmpxchg: 2571; GFX7: ; %bb.0: ; %entry 2572; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2573; GFX7-NEXT: s_mov_b32 m0, -1 2574; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2575; GFX7-NEXT: v_mov_b32_e32 v0, s0 2576; GFX7-NEXT: v_mov_b32_e32 v1, s2 2577; GFX7-NEXT: v_mov_b32_e32 v2, s1 2578; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2579; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2580; GFX7-NEXT: ds_write_b32 v0, v1 2581; GFX7-NEXT: s_endpgm 2582; 2583; GFX10-WGP-LABEL: local_singlethread_release_acquire_ret_cmpxchg: 2584; GFX10-WGP: ; %bb.0: ; %entry 2585; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2586; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2587; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2588; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 2589; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 2590; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2591; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2592; GFX10-WGP-NEXT: ds_write_b32 v0, v1 2593; GFX10-WGP-NEXT: s_endpgm 2594; 2595; GFX10-CU-LABEL: local_singlethread_release_acquire_ret_cmpxchg: 2596; GFX10-CU: ; %bb.0: ; %entry 2597; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2598; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2599; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2600; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 2601; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 2602; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2603; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2604; GFX10-CU-NEXT: ds_write_b32 v0, v1 2605; GFX10-CU-NEXT: s_endpgm 2606; 2607; SKIP-CACHE-INV-LABEL: local_singlethread_release_acquire_ret_cmpxchg: 2608; SKIP-CACHE-INV: ; %bb.0: ; %entry 2609; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 2610; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 2611; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2612; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2613; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 2614; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 2615; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 2616; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2617; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2618; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 2619; SKIP-CACHE-INV-NEXT: s_endpgm 2620; 2621; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_release_acquire_ret_cmpxchg: 2622; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2623; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2624; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2625; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 2626; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2627; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 2628; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2629; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2630; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 2631; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2632; 2633; GFX90A-TGSPLIT-LABEL: local_singlethread_release_acquire_ret_cmpxchg: 2634; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2635; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2636; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2637; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 2638; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2639; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 2640; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2641; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2642; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 2643; GFX90A-TGSPLIT-NEXT: s_endpgm 2644; 2645; 2646 i32 addrspace(3)* %out, i32 %in, i32 %old) { 2647entry: 2648 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 2649 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire 2650 %val0 = extractvalue { i32, i1 } %val, 0 2651 store i32 %val0, i32 addrspace(3)* %out, align 4 2652 ret void 2653} 2654 2655define amdgpu_kernel void @local_singlethread_acq_rel_acquire_ret_cmpxchg( 2656; GFX6-LABEL: local_singlethread_acq_rel_acquire_ret_cmpxchg: 2657; GFX6: ; %bb.0: ; %entry 2658; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 2659; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 2660; GFX6-NEXT: s_mov_b32 m0, -1 2661; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2662; GFX6-NEXT: v_mov_b32_e32 v0, s2 2663; GFX6-NEXT: v_mov_b32_e32 v1, s1 2664; GFX6-NEXT: v_mov_b32_e32 v2, s0 2665; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2666; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2667; GFX6-NEXT: ds_write_b32 v0, v1 2668; GFX6-NEXT: s_endpgm 2669; 2670; GFX7-LABEL: local_singlethread_acq_rel_acquire_ret_cmpxchg: 2671; GFX7: ; %bb.0: ; %entry 2672; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2673; GFX7-NEXT: s_mov_b32 m0, -1 2674; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2675; GFX7-NEXT: v_mov_b32_e32 v0, s0 2676; GFX7-NEXT: v_mov_b32_e32 v1, s2 2677; GFX7-NEXT: v_mov_b32_e32 v2, s1 2678; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2679; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2680; GFX7-NEXT: ds_write_b32 v0, v1 2681; GFX7-NEXT: s_endpgm 2682; 2683; GFX10-WGP-LABEL: local_singlethread_acq_rel_acquire_ret_cmpxchg: 2684; GFX10-WGP: ; %bb.0: ; %entry 2685; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2686; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2687; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2688; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 2689; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 2690; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2691; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2692; GFX10-WGP-NEXT: ds_write_b32 v0, v1 2693; GFX10-WGP-NEXT: s_endpgm 2694; 2695; GFX10-CU-LABEL: local_singlethread_acq_rel_acquire_ret_cmpxchg: 2696; GFX10-CU: ; %bb.0: ; %entry 2697; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2698; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2699; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2700; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 2701; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 2702; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2703; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2704; GFX10-CU-NEXT: ds_write_b32 v0, v1 2705; GFX10-CU-NEXT: s_endpgm 2706; 2707; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_acquire_ret_cmpxchg: 2708; SKIP-CACHE-INV: ; %bb.0: ; %entry 2709; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 2710; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 2711; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2712; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2713; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 2714; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 2715; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 2716; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2717; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2718; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 2719; SKIP-CACHE-INV-NEXT: s_endpgm 2720; 2721; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_acquire_ret_cmpxchg: 2722; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2723; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2724; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2725; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 2726; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2727; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 2728; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2729; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2730; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 2731; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2732; 2733; GFX90A-TGSPLIT-LABEL: local_singlethread_acq_rel_acquire_ret_cmpxchg: 2734; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2735; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2736; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2737; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 2738; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2739; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 2740; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2741; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2742; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 2743; GFX90A-TGSPLIT-NEXT: s_endpgm 2744; 2745; 2746 i32 addrspace(3)* %out, i32 %in, i32 %old) { 2747entry: 2748 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 2749 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire 2750 %val0 = extractvalue { i32, i1 } %val, 0 2751 store i32 %val0, i32 addrspace(3)* %out, align 4 2752 ret void 2753} 2754 2755define amdgpu_kernel void @local_singlethread_seq_cst_acquire_ret_cmpxchg( 2756; GFX6-LABEL: local_singlethread_seq_cst_acquire_ret_cmpxchg: 2757; GFX6: ; %bb.0: ; %entry 2758; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 2759; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 2760; GFX6-NEXT: s_mov_b32 m0, -1 2761; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2762; GFX6-NEXT: v_mov_b32_e32 v0, s2 2763; GFX6-NEXT: v_mov_b32_e32 v1, s1 2764; GFX6-NEXT: v_mov_b32_e32 v2, s0 2765; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2766; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2767; GFX6-NEXT: ds_write_b32 v0, v1 2768; GFX6-NEXT: s_endpgm 2769; 2770; GFX7-LABEL: local_singlethread_seq_cst_acquire_ret_cmpxchg: 2771; GFX7: ; %bb.0: ; %entry 2772; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2773; GFX7-NEXT: s_mov_b32 m0, -1 2774; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2775; GFX7-NEXT: v_mov_b32_e32 v0, s0 2776; GFX7-NEXT: v_mov_b32_e32 v1, s2 2777; GFX7-NEXT: v_mov_b32_e32 v2, s1 2778; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2779; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2780; GFX7-NEXT: ds_write_b32 v0, v1 2781; GFX7-NEXT: s_endpgm 2782; 2783; GFX10-WGP-LABEL: local_singlethread_seq_cst_acquire_ret_cmpxchg: 2784; GFX10-WGP: ; %bb.0: ; %entry 2785; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2786; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2787; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2788; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 2789; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 2790; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2791; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2792; GFX10-WGP-NEXT: ds_write_b32 v0, v1 2793; GFX10-WGP-NEXT: s_endpgm 2794; 2795; GFX10-CU-LABEL: local_singlethread_seq_cst_acquire_ret_cmpxchg: 2796; GFX10-CU: ; %bb.0: ; %entry 2797; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2798; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2799; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2800; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 2801; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 2802; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2803; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2804; GFX10-CU-NEXT: ds_write_b32 v0, v1 2805; GFX10-CU-NEXT: s_endpgm 2806; 2807; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_acquire_ret_cmpxchg: 2808; SKIP-CACHE-INV: ; %bb.0: ; %entry 2809; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 2810; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 2811; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2812; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2813; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 2814; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 2815; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 2816; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2817; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2818; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 2819; SKIP-CACHE-INV-NEXT: s_endpgm 2820; 2821; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_acquire_ret_cmpxchg: 2822; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2823; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2824; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2825; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 2826; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2827; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 2828; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2829; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2830; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 2831; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2832; 2833; GFX90A-TGSPLIT-LABEL: local_singlethread_seq_cst_acquire_ret_cmpxchg: 2834; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2835; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2836; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2837; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 2838; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2839; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 2840; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2841; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2842; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 2843; GFX90A-TGSPLIT-NEXT: s_endpgm 2844; 2845; 2846 i32 addrspace(3)* %out, i32 %in, i32 %old) { 2847entry: 2848 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 2849 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire 2850 %val0 = extractvalue { i32, i1 } %val, 0 2851 store i32 %val0, i32 addrspace(3)* %out, align 4 2852 ret void 2853} 2854 2855define amdgpu_kernel void @local_singlethread_seq_cst_seq_cst_ret_cmpxchg( 2856; GFX6-LABEL: local_singlethread_seq_cst_seq_cst_ret_cmpxchg: 2857; GFX6: ; %bb.0: ; %entry 2858; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 2859; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 2860; GFX6-NEXT: s_mov_b32 m0, -1 2861; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2862; GFX6-NEXT: v_mov_b32_e32 v0, s2 2863; GFX6-NEXT: v_mov_b32_e32 v1, s1 2864; GFX6-NEXT: v_mov_b32_e32 v2, s0 2865; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2866; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2867; GFX6-NEXT: ds_write_b32 v0, v1 2868; GFX6-NEXT: s_endpgm 2869; 2870; GFX7-LABEL: local_singlethread_seq_cst_seq_cst_ret_cmpxchg: 2871; GFX7: ; %bb.0: ; %entry 2872; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2873; GFX7-NEXT: s_mov_b32 m0, -1 2874; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2875; GFX7-NEXT: v_mov_b32_e32 v0, s0 2876; GFX7-NEXT: v_mov_b32_e32 v1, s2 2877; GFX7-NEXT: v_mov_b32_e32 v2, s1 2878; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2879; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2880; GFX7-NEXT: ds_write_b32 v0, v1 2881; GFX7-NEXT: s_endpgm 2882; 2883; GFX10-WGP-LABEL: local_singlethread_seq_cst_seq_cst_ret_cmpxchg: 2884; GFX10-WGP: ; %bb.0: ; %entry 2885; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2886; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2887; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2888; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 2889; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 2890; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2891; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2892; GFX10-WGP-NEXT: ds_write_b32 v0, v1 2893; GFX10-WGP-NEXT: s_endpgm 2894; 2895; GFX10-CU-LABEL: local_singlethread_seq_cst_seq_cst_ret_cmpxchg: 2896; GFX10-CU: ; %bb.0: ; %entry 2897; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2898; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2899; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2900; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 2901; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 2902; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2903; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2904; GFX10-CU-NEXT: ds_write_b32 v0, v1 2905; GFX10-CU-NEXT: s_endpgm 2906; 2907; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_seq_cst_ret_cmpxchg: 2908; SKIP-CACHE-INV: ; %bb.0: ; %entry 2909; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 2910; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 2911; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2912; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2913; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 2914; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 2915; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 2916; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2917; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2918; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 2919; SKIP-CACHE-INV-NEXT: s_endpgm 2920; 2921; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_seq_cst_ret_cmpxchg: 2922; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2923; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2924; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2925; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 2926; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2927; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 2928; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2929; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2930; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 2931; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2932; 2933; GFX90A-TGSPLIT-LABEL: local_singlethread_seq_cst_seq_cst_ret_cmpxchg: 2934; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2935; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2936; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2937; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 2938; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2939; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 2940; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2941; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2942; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 2943; GFX90A-TGSPLIT-NEXT: s_endpgm 2944; 2945; 2946 i32 addrspace(3)* %out, i32 %in, i32 %old) { 2947entry: 2948 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 2949 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst 2950 %val0 = extractvalue { i32, i1 } %val, 0 2951 store i32 %val0, i32 addrspace(3)* %out, align 4 2952 ret void 2953} 2954 2955define amdgpu_kernel void @local_singlethread_one_as_unordered_load( 2956; GFX6-LABEL: local_singlethread_one_as_unordered_load: 2957; GFX6: ; %bb.0: ; %entry 2958; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 2959; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 2960; GFX6-NEXT: s_mov_b32 m0, -1 2961; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2962; GFX6-NEXT: v_mov_b32_e32 v0, s0 2963; GFX6-NEXT: ds_read_b32 v0, v0 2964; GFX6-NEXT: v_mov_b32_e32 v1, s1 2965; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2966; GFX6-NEXT: ds_write_b32 v1, v0 2967; GFX6-NEXT: s_endpgm 2968; 2969; GFX7-LABEL: local_singlethread_one_as_unordered_load: 2970; GFX7: ; %bb.0: ; %entry 2971; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2972; GFX7-NEXT: s_mov_b32 m0, -1 2973; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2974; GFX7-NEXT: v_mov_b32_e32 v0, s0 2975; GFX7-NEXT: ds_read_b32 v0, v0 2976; GFX7-NEXT: v_mov_b32_e32 v1, s1 2977; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2978; GFX7-NEXT: ds_write_b32 v1, v0 2979; GFX7-NEXT: s_endpgm 2980; 2981; GFX10-WGP-LABEL: local_singlethread_one_as_unordered_load: 2982; GFX10-WGP: ; %bb.0: ; %entry 2983; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2984; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2985; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2986; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2987; GFX10-WGP-NEXT: ds_read_b32 v0, v0 2988; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2989; GFX10-WGP-NEXT: ds_write_b32 v1, v0 2990; GFX10-WGP-NEXT: s_endpgm 2991; 2992; GFX10-CU-LABEL: local_singlethread_one_as_unordered_load: 2993; GFX10-CU: ; %bb.0: ; %entry 2994; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2995; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2996; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2997; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2998; GFX10-CU-NEXT: ds_read_b32 v0, v0 2999; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3000; GFX10-CU-NEXT: ds_write_b32 v1, v0 3001; GFX10-CU-NEXT: s_endpgm 3002; 3003; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_unordered_load: 3004; SKIP-CACHE-INV: ; %bb.0: ; %entry 3005; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3006; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3007; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3008; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3009; SKIP-CACHE-INV-NEXT: ds_read_b32 v0, v0 3010; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3011; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3012; SKIP-CACHE-INV-NEXT: ds_write_b32 v1, v0 3013; SKIP-CACHE-INV-NEXT: s_endpgm 3014; 3015; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_unordered_load: 3016; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3017; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3018; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3019; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 3020; GFX90A-NOTTGSPLIT-NEXT: ds_read_b32 v0, v0 3021; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 3022; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3023; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v1, v0 3024; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3025; 3026; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_unordered_load: 3027; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3028; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3029; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3030; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 3031; GFX90A-TGSPLIT-NEXT: ds_read_b32 v0, v0 3032; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 3033; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3034; GFX90A-TGSPLIT-NEXT: ds_write_b32 v1, v0 3035; GFX90A-TGSPLIT-NEXT: s_endpgm 3036; 3037; 3038 i32 addrspace(3)* %in, i32 addrspace(3)* %out) { 3039entry: 3040 %val = load atomic i32, i32 addrspace(3)* %in syncscope("singlethread-one-as") unordered, align 4 3041 store i32 %val, i32 addrspace(3)* %out 3042 ret void 3043} 3044 3045define amdgpu_kernel void @local_singlethread_one_as_monotonic_load( 3046; GFX6-LABEL: local_singlethread_one_as_monotonic_load: 3047; GFX6: ; %bb.0: ; %entry 3048; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 3049; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 3050; GFX6-NEXT: s_mov_b32 m0, -1 3051; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3052; GFX6-NEXT: v_mov_b32_e32 v0, s0 3053; GFX6-NEXT: ds_read_b32 v0, v0 3054; GFX6-NEXT: v_mov_b32_e32 v1, s1 3055; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3056; GFX6-NEXT: ds_write_b32 v1, v0 3057; GFX6-NEXT: s_endpgm 3058; 3059; GFX7-LABEL: local_singlethread_one_as_monotonic_load: 3060; GFX7: ; %bb.0: ; %entry 3061; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3062; GFX7-NEXT: s_mov_b32 m0, -1 3063; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3064; GFX7-NEXT: v_mov_b32_e32 v0, s0 3065; GFX7-NEXT: ds_read_b32 v0, v0 3066; GFX7-NEXT: v_mov_b32_e32 v1, s1 3067; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3068; GFX7-NEXT: ds_write_b32 v1, v0 3069; GFX7-NEXT: s_endpgm 3070; 3071; GFX10-WGP-LABEL: local_singlethread_one_as_monotonic_load: 3072; GFX10-WGP: ; %bb.0: ; %entry 3073; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3074; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3075; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3076; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3077; GFX10-WGP-NEXT: ds_read_b32 v0, v0 3078; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3079; GFX10-WGP-NEXT: ds_write_b32 v1, v0 3080; GFX10-WGP-NEXT: s_endpgm 3081; 3082; GFX10-CU-LABEL: local_singlethread_one_as_monotonic_load: 3083; GFX10-CU: ; %bb.0: ; %entry 3084; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3085; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3086; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3087; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3088; GFX10-CU-NEXT: ds_read_b32 v0, v0 3089; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3090; GFX10-CU-NEXT: ds_write_b32 v1, v0 3091; GFX10-CU-NEXT: s_endpgm 3092; 3093; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_load: 3094; SKIP-CACHE-INV: ; %bb.0: ; %entry 3095; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3096; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3097; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3098; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3099; SKIP-CACHE-INV-NEXT: ds_read_b32 v0, v0 3100; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3101; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3102; SKIP-CACHE-INV-NEXT: ds_write_b32 v1, v0 3103; SKIP-CACHE-INV-NEXT: s_endpgm 3104; 3105; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_load: 3106; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3107; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3108; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3109; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 3110; GFX90A-NOTTGSPLIT-NEXT: ds_read_b32 v0, v0 3111; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 3112; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3113; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v1, v0 3114; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3115; 3116; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_load: 3117; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3118; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3119; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3120; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 3121; GFX90A-TGSPLIT-NEXT: ds_read_b32 v0, v0 3122; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 3123; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3124; GFX90A-TGSPLIT-NEXT: ds_write_b32 v1, v0 3125; GFX90A-TGSPLIT-NEXT: s_endpgm 3126; 3127; 3128 i32 addrspace(3)* %in, i32 addrspace(3)* %out) { 3129entry: 3130 %val = load atomic i32, i32 addrspace(3)* %in syncscope("singlethread-one-as") monotonic, align 4 3131 store i32 %val, i32 addrspace(3)* %out 3132 ret void 3133} 3134 3135define amdgpu_kernel void @local_singlethread_one_as_acquire_load( 3136; GFX6-LABEL: local_singlethread_one_as_acquire_load: 3137; GFX6: ; %bb.0: ; %entry 3138; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 3139; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 3140; GFX6-NEXT: s_mov_b32 m0, -1 3141; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3142; GFX6-NEXT: v_mov_b32_e32 v0, s0 3143; GFX6-NEXT: ds_read_b32 v0, v0 3144; GFX6-NEXT: v_mov_b32_e32 v1, s1 3145; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3146; GFX6-NEXT: ds_write_b32 v1, v0 3147; GFX6-NEXT: s_endpgm 3148; 3149; GFX7-LABEL: local_singlethread_one_as_acquire_load: 3150; GFX7: ; %bb.0: ; %entry 3151; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3152; GFX7-NEXT: s_mov_b32 m0, -1 3153; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3154; GFX7-NEXT: v_mov_b32_e32 v0, s0 3155; GFX7-NEXT: ds_read_b32 v0, v0 3156; GFX7-NEXT: v_mov_b32_e32 v1, s1 3157; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3158; GFX7-NEXT: ds_write_b32 v1, v0 3159; GFX7-NEXT: s_endpgm 3160; 3161; GFX10-WGP-LABEL: local_singlethread_one_as_acquire_load: 3162; GFX10-WGP: ; %bb.0: ; %entry 3163; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3164; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3165; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3166; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3167; GFX10-WGP-NEXT: ds_read_b32 v0, v0 3168; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3169; GFX10-WGP-NEXT: ds_write_b32 v1, v0 3170; GFX10-WGP-NEXT: s_endpgm 3171; 3172; GFX10-CU-LABEL: local_singlethread_one_as_acquire_load: 3173; GFX10-CU: ; %bb.0: ; %entry 3174; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3175; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3176; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3177; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3178; GFX10-CU-NEXT: ds_read_b32 v0, v0 3179; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3180; GFX10-CU-NEXT: ds_write_b32 v1, v0 3181; GFX10-CU-NEXT: s_endpgm 3182; 3183; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_load: 3184; SKIP-CACHE-INV: ; %bb.0: ; %entry 3185; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3186; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3187; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3188; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3189; SKIP-CACHE-INV-NEXT: ds_read_b32 v0, v0 3190; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3191; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3192; SKIP-CACHE-INV-NEXT: ds_write_b32 v1, v0 3193; SKIP-CACHE-INV-NEXT: s_endpgm 3194; 3195; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_load: 3196; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3197; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3198; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3199; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 3200; GFX90A-NOTTGSPLIT-NEXT: ds_read_b32 v0, v0 3201; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 3202; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3203; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v1, v0 3204; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3205; 3206; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_acquire_load: 3207; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3208; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3209; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3210; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 3211; GFX90A-TGSPLIT-NEXT: ds_read_b32 v0, v0 3212; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 3213; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3214; GFX90A-TGSPLIT-NEXT: ds_write_b32 v1, v0 3215; GFX90A-TGSPLIT-NEXT: s_endpgm 3216; 3217; 3218 i32 addrspace(3)* %in, i32 addrspace(3)* %out) { 3219entry: 3220 %val = load atomic i32, i32 addrspace(3)* %in syncscope("singlethread-one-as") acquire, align 4 3221 store i32 %val, i32 addrspace(3)* %out 3222 ret void 3223} 3224 3225define amdgpu_kernel void @local_singlethread_one_as_seq_cst_load( 3226; GFX6-LABEL: local_singlethread_one_as_seq_cst_load: 3227; GFX6: ; %bb.0: ; %entry 3228; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 3229; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 3230; GFX6-NEXT: s_mov_b32 m0, -1 3231; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3232; GFX6-NEXT: v_mov_b32_e32 v0, s0 3233; GFX6-NEXT: ds_read_b32 v0, v0 3234; GFX6-NEXT: v_mov_b32_e32 v1, s1 3235; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3236; GFX6-NEXT: ds_write_b32 v1, v0 3237; GFX6-NEXT: s_endpgm 3238; 3239; GFX7-LABEL: local_singlethread_one_as_seq_cst_load: 3240; GFX7: ; %bb.0: ; %entry 3241; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3242; GFX7-NEXT: s_mov_b32 m0, -1 3243; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3244; GFX7-NEXT: v_mov_b32_e32 v0, s0 3245; GFX7-NEXT: ds_read_b32 v0, v0 3246; GFX7-NEXT: v_mov_b32_e32 v1, s1 3247; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3248; GFX7-NEXT: ds_write_b32 v1, v0 3249; GFX7-NEXT: s_endpgm 3250; 3251; GFX10-WGP-LABEL: local_singlethread_one_as_seq_cst_load: 3252; GFX10-WGP: ; %bb.0: ; %entry 3253; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3254; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3255; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3256; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3257; GFX10-WGP-NEXT: ds_read_b32 v0, v0 3258; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3259; GFX10-WGP-NEXT: ds_write_b32 v1, v0 3260; GFX10-WGP-NEXT: s_endpgm 3261; 3262; GFX10-CU-LABEL: local_singlethread_one_as_seq_cst_load: 3263; GFX10-CU: ; %bb.0: ; %entry 3264; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3265; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3266; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3267; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3268; GFX10-CU-NEXT: ds_read_b32 v0, v0 3269; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3270; GFX10-CU-NEXT: ds_write_b32 v1, v0 3271; GFX10-CU-NEXT: s_endpgm 3272; 3273; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_load: 3274; SKIP-CACHE-INV: ; %bb.0: ; %entry 3275; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3276; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3277; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3278; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3279; SKIP-CACHE-INV-NEXT: ds_read_b32 v0, v0 3280; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3281; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3282; SKIP-CACHE-INV-NEXT: ds_write_b32 v1, v0 3283; SKIP-CACHE-INV-NEXT: s_endpgm 3284; 3285; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_load: 3286; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3287; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3288; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3289; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 3290; GFX90A-NOTTGSPLIT-NEXT: ds_read_b32 v0, v0 3291; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 3292; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3293; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v1, v0 3294; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3295; 3296; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_load: 3297; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3298; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3299; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3300; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 3301; GFX90A-TGSPLIT-NEXT: ds_read_b32 v0, v0 3302; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 3303; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3304; GFX90A-TGSPLIT-NEXT: ds_write_b32 v1, v0 3305; GFX90A-TGSPLIT-NEXT: s_endpgm 3306; 3307; 3308 i32 addrspace(3)* %in, i32 addrspace(3)* %out) { 3309entry: 3310 %val = load atomic i32, i32 addrspace(3)* %in syncscope("singlethread-one-as") seq_cst, align 4 3311 store i32 %val, i32 addrspace(3)* %out 3312 ret void 3313} 3314 3315define amdgpu_kernel void @local_singlethread_one_as_unordered_store( 3316; GFX6-LABEL: local_singlethread_one_as_unordered_store: 3317; GFX6: ; %bb.0: ; %entry 3318; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 3319; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 3320; GFX6-NEXT: s_mov_b32 m0, -1 3321; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3322; GFX6-NEXT: v_mov_b32_e32 v1, s0 3323; GFX6-NEXT: v_mov_b32_e32 v0, s1 3324; GFX6-NEXT: ds_write_b32 v0, v1 3325; GFX6-NEXT: s_endpgm 3326; 3327; GFX7-LABEL: local_singlethread_one_as_unordered_store: 3328; GFX7: ; %bb.0: ; %entry 3329; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3330; GFX7-NEXT: s_mov_b32 m0, -1 3331; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3332; GFX7-NEXT: v_mov_b32_e32 v0, s1 3333; GFX7-NEXT: v_mov_b32_e32 v1, s0 3334; GFX7-NEXT: ds_write_b32 v0, v1 3335; GFX7-NEXT: s_endpgm 3336; 3337; GFX10-WGP-LABEL: local_singlethread_one_as_unordered_store: 3338; GFX10-WGP: ; %bb.0: ; %entry 3339; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3340; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3341; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 3342; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 3343; GFX10-WGP-NEXT: ds_write_b32 v0, v1 3344; GFX10-WGP-NEXT: s_endpgm 3345; 3346; GFX10-CU-LABEL: local_singlethread_one_as_unordered_store: 3347; GFX10-CU: ; %bb.0: ; %entry 3348; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3349; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3350; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 3351; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 3352; GFX10-CU-NEXT: ds_write_b32 v0, v1 3353; GFX10-CU-NEXT: s_endpgm 3354; 3355; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_unordered_store: 3356; SKIP-CACHE-INV: ; %bb.0: ; %entry 3357; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3358; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3359; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3360; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 3361; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 3362; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 3363; SKIP-CACHE-INV-NEXT: s_endpgm 3364; 3365; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_unordered_store: 3366; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3367; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3368; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3369; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 3370; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 3371; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 3372; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3373; 3374; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_unordered_store: 3375; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3376; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3377; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3378; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 3379; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 3380; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 3381; GFX90A-TGSPLIT-NEXT: s_endpgm 3382; 3383; 3384 i32 %in, i32 addrspace(3)* %out) { 3385entry: 3386 store atomic i32 %in, i32 addrspace(3)* %out syncscope("singlethread-one-as") unordered, align 4 3387 ret void 3388} 3389 3390define amdgpu_kernel void @local_singlethread_one_as_monotonic_store( 3391; GFX6-LABEL: local_singlethread_one_as_monotonic_store: 3392; GFX6: ; %bb.0: ; %entry 3393; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 3394; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 3395; GFX6-NEXT: s_mov_b32 m0, -1 3396; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3397; GFX6-NEXT: v_mov_b32_e32 v1, s0 3398; GFX6-NEXT: v_mov_b32_e32 v0, s1 3399; GFX6-NEXT: ds_write_b32 v0, v1 3400; GFX6-NEXT: s_endpgm 3401; 3402; GFX7-LABEL: local_singlethread_one_as_monotonic_store: 3403; GFX7: ; %bb.0: ; %entry 3404; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3405; GFX7-NEXT: s_mov_b32 m0, -1 3406; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3407; GFX7-NEXT: v_mov_b32_e32 v0, s1 3408; GFX7-NEXT: v_mov_b32_e32 v1, s0 3409; GFX7-NEXT: ds_write_b32 v0, v1 3410; GFX7-NEXT: s_endpgm 3411; 3412; GFX10-WGP-LABEL: local_singlethread_one_as_monotonic_store: 3413; GFX10-WGP: ; %bb.0: ; %entry 3414; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3415; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3416; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 3417; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 3418; GFX10-WGP-NEXT: ds_write_b32 v0, v1 3419; GFX10-WGP-NEXT: s_endpgm 3420; 3421; GFX10-CU-LABEL: local_singlethread_one_as_monotonic_store: 3422; GFX10-CU: ; %bb.0: ; %entry 3423; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3424; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3425; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 3426; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 3427; GFX10-CU-NEXT: ds_write_b32 v0, v1 3428; GFX10-CU-NEXT: s_endpgm 3429; 3430; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_store: 3431; SKIP-CACHE-INV: ; %bb.0: ; %entry 3432; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3433; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3434; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3435; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 3436; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 3437; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 3438; SKIP-CACHE-INV-NEXT: s_endpgm 3439; 3440; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_store: 3441; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3442; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3443; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3444; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 3445; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 3446; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 3447; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3448; 3449; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_store: 3450; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3451; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3452; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3453; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 3454; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 3455; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 3456; GFX90A-TGSPLIT-NEXT: s_endpgm 3457; 3458; 3459 i32 %in, i32 addrspace(3)* %out) { 3460entry: 3461 store atomic i32 %in, i32 addrspace(3)* %out syncscope("singlethread-one-as") monotonic, align 4 3462 ret void 3463} 3464 3465define amdgpu_kernel void @local_singlethread_one_as_release_store( 3466; GFX6-LABEL: local_singlethread_one_as_release_store: 3467; GFX6: ; %bb.0: ; %entry 3468; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 3469; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 3470; GFX6-NEXT: s_mov_b32 m0, -1 3471; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3472; GFX6-NEXT: v_mov_b32_e32 v1, s0 3473; GFX6-NEXT: v_mov_b32_e32 v0, s1 3474; GFX6-NEXT: ds_write_b32 v0, v1 3475; GFX6-NEXT: s_endpgm 3476; 3477; GFX7-LABEL: local_singlethread_one_as_release_store: 3478; GFX7: ; %bb.0: ; %entry 3479; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3480; GFX7-NEXT: s_mov_b32 m0, -1 3481; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3482; GFX7-NEXT: v_mov_b32_e32 v0, s1 3483; GFX7-NEXT: v_mov_b32_e32 v1, s0 3484; GFX7-NEXT: ds_write_b32 v0, v1 3485; GFX7-NEXT: s_endpgm 3486; 3487; GFX10-WGP-LABEL: local_singlethread_one_as_release_store: 3488; GFX10-WGP: ; %bb.0: ; %entry 3489; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3490; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3491; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 3492; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 3493; GFX10-WGP-NEXT: ds_write_b32 v0, v1 3494; GFX10-WGP-NEXT: s_endpgm 3495; 3496; GFX10-CU-LABEL: local_singlethread_one_as_release_store: 3497; GFX10-CU: ; %bb.0: ; %entry 3498; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3499; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3500; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 3501; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 3502; GFX10-CU-NEXT: ds_write_b32 v0, v1 3503; GFX10-CU-NEXT: s_endpgm 3504; 3505; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_store: 3506; SKIP-CACHE-INV: ; %bb.0: ; %entry 3507; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3508; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3509; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3510; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 3511; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 3512; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 3513; SKIP-CACHE-INV-NEXT: s_endpgm 3514; 3515; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_release_store: 3516; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3517; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3518; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3519; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 3520; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 3521; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 3522; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3523; 3524; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_release_store: 3525; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3526; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3527; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3528; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 3529; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 3530; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 3531; GFX90A-TGSPLIT-NEXT: s_endpgm 3532; 3533; 3534 i32 %in, i32 addrspace(3)* %out) { 3535entry: 3536 store atomic i32 %in, i32 addrspace(3)* %out syncscope("singlethread-one-as") release, align 4 3537 ret void 3538} 3539 3540define amdgpu_kernel void @local_singlethread_one_as_seq_cst_store( 3541; GFX6-LABEL: local_singlethread_one_as_seq_cst_store: 3542; GFX6: ; %bb.0: ; %entry 3543; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 3544; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 3545; GFX6-NEXT: s_mov_b32 m0, -1 3546; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3547; GFX6-NEXT: v_mov_b32_e32 v1, s0 3548; GFX6-NEXT: v_mov_b32_e32 v0, s1 3549; GFX6-NEXT: ds_write_b32 v0, v1 3550; GFX6-NEXT: s_endpgm 3551; 3552; GFX7-LABEL: local_singlethread_one_as_seq_cst_store: 3553; GFX7: ; %bb.0: ; %entry 3554; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3555; GFX7-NEXT: s_mov_b32 m0, -1 3556; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3557; GFX7-NEXT: v_mov_b32_e32 v0, s1 3558; GFX7-NEXT: v_mov_b32_e32 v1, s0 3559; GFX7-NEXT: ds_write_b32 v0, v1 3560; GFX7-NEXT: s_endpgm 3561; 3562; GFX10-WGP-LABEL: local_singlethread_one_as_seq_cst_store: 3563; GFX10-WGP: ; %bb.0: ; %entry 3564; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3565; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3566; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 3567; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 3568; GFX10-WGP-NEXT: ds_write_b32 v0, v1 3569; GFX10-WGP-NEXT: s_endpgm 3570; 3571; GFX10-CU-LABEL: local_singlethread_one_as_seq_cst_store: 3572; GFX10-CU: ; %bb.0: ; %entry 3573; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3574; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3575; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 3576; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 3577; GFX10-CU-NEXT: ds_write_b32 v0, v1 3578; GFX10-CU-NEXT: s_endpgm 3579; 3580; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_store: 3581; SKIP-CACHE-INV: ; %bb.0: ; %entry 3582; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3583; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3584; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3585; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 3586; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 3587; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 3588; SKIP-CACHE-INV-NEXT: s_endpgm 3589; 3590; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_store: 3591; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3592; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3593; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3594; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 3595; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 3596; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 3597; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3598; 3599; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_store: 3600; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3601; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3602; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3603; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 3604; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 3605; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 3606; GFX90A-TGSPLIT-NEXT: s_endpgm 3607; 3608; 3609 i32 %in, i32 addrspace(3)* %out) { 3610entry: 3611 store atomic i32 %in, i32 addrspace(3)* %out syncscope("singlethread-one-as") seq_cst, align 4 3612 ret void 3613} 3614 3615define amdgpu_kernel void @local_singlethread_one_as_monotonic_atomicrmw( 3616; GFX6-LABEL: local_singlethread_one_as_monotonic_atomicrmw: 3617; GFX6: ; %bb.0: ; %entry 3618; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 3619; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 3620; GFX6-NEXT: s_mov_b32 m0, -1 3621; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3622; GFX6-NEXT: v_mov_b32_e32 v0, s0 3623; GFX6-NEXT: v_mov_b32_e32 v1, s1 3624; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3625; GFX6-NEXT: s_endpgm 3626; 3627; GFX7-LABEL: local_singlethread_one_as_monotonic_atomicrmw: 3628; GFX7: ; %bb.0: ; %entry 3629; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3630; GFX7-NEXT: s_mov_b32 m0, -1 3631; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3632; GFX7-NEXT: v_mov_b32_e32 v0, s0 3633; GFX7-NEXT: v_mov_b32_e32 v1, s1 3634; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3635; GFX7-NEXT: s_endpgm 3636; 3637; GFX10-WGP-LABEL: local_singlethread_one_as_monotonic_atomicrmw: 3638; GFX10-WGP: ; %bb.0: ; %entry 3639; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3640; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3641; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3642; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3643; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3644; GFX10-WGP-NEXT: s_endpgm 3645; 3646; GFX10-CU-LABEL: local_singlethread_one_as_monotonic_atomicrmw: 3647; GFX10-CU: ; %bb.0: ; %entry 3648; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3649; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3650; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3651; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3652; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3653; GFX10-CU-NEXT: s_endpgm 3654; 3655; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_atomicrmw: 3656; SKIP-CACHE-INV: ; %bb.0: ; %entry 3657; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3658; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3659; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3660; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3661; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3662; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3663; SKIP-CACHE-INV-NEXT: s_endpgm 3664; 3665; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_atomicrmw: 3666; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3667; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3668; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3669; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 3670; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 3671; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3672; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3673; 3674; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_atomicrmw: 3675; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3676; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3677; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3678; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 3679; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 3680; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3681; GFX90A-TGSPLIT-NEXT: s_endpgm 3682; 3683; 3684 i32 addrspace(3)* %out, i32 %in) { 3685entry: 3686 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread-one-as") monotonic 3687 ret void 3688} 3689 3690define amdgpu_kernel void @local_singlethread_one_as_acquire_atomicrmw( 3691; GFX6-LABEL: local_singlethread_one_as_acquire_atomicrmw: 3692; GFX6: ; %bb.0: ; %entry 3693; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 3694; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 3695; GFX6-NEXT: s_mov_b32 m0, -1 3696; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3697; GFX6-NEXT: v_mov_b32_e32 v0, s0 3698; GFX6-NEXT: v_mov_b32_e32 v1, s1 3699; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3700; GFX6-NEXT: s_endpgm 3701; 3702; GFX7-LABEL: local_singlethread_one_as_acquire_atomicrmw: 3703; GFX7: ; %bb.0: ; %entry 3704; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3705; GFX7-NEXT: s_mov_b32 m0, -1 3706; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3707; GFX7-NEXT: v_mov_b32_e32 v0, s0 3708; GFX7-NEXT: v_mov_b32_e32 v1, s1 3709; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3710; GFX7-NEXT: s_endpgm 3711; 3712; GFX10-WGP-LABEL: local_singlethread_one_as_acquire_atomicrmw: 3713; GFX10-WGP: ; %bb.0: ; %entry 3714; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3715; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3716; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3717; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3718; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3719; GFX10-WGP-NEXT: s_endpgm 3720; 3721; GFX10-CU-LABEL: local_singlethread_one_as_acquire_atomicrmw: 3722; GFX10-CU: ; %bb.0: ; %entry 3723; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3724; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3725; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3726; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3727; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3728; GFX10-CU-NEXT: s_endpgm 3729; 3730; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_atomicrmw: 3731; SKIP-CACHE-INV: ; %bb.0: ; %entry 3732; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3733; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3734; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3735; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3736; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3737; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3738; SKIP-CACHE-INV-NEXT: s_endpgm 3739; 3740; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_atomicrmw: 3741; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3742; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3743; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3744; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 3745; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 3746; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3747; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3748; 3749; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_acquire_atomicrmw: 3750; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3751; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3752; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3753; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 3754; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 3755; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3756; GFX90A-TGSPLIT-NEXT: s_endpgm 3757; 3758; 3759 i32 addrspace(3)* %out, i32 %in) { 3760entry: 3761 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread-one-as") acquire 3762 ret void 3763} 3764 3765define amdgpu_kernel void @local_singlethread_one_as_release_atomicrmw( 3766; GFX6-LABEL: local_singlethread_one_as_release_atomicrmw: 3767; GFX6: ; %bb.0: ; %entry 3768; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 3769; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 3770; GFX6-NEXT: s_mov_b32 m0, -1 3771; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3772; GFX6-NEXT: v_mov_b32_e32 v0, s0 3773; GFX6-NEXT: v_mov_b32_e32 v1, s1 3774; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3775; GFX6-NEXT: s_endpgm 3776; 3777; GFX7-LABEL: local_singlethread_one_as_release_atomicrmw: 3778; GFX7: ; %bb.0: ; %entry 3779; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3780; GFX7-NEXT: s_mov_b32 m0, -1 3781; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3782; GFX7-NEXT: v_mov_b32_e32 v0, s0 3783; GFX7-NEXT: v_mov_b32_e32 v1, s1 3784; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3785; GFX7-NEXT: s_endpgm 3786; 3787; GFX10-WGP-LABEL: local_singlethread_one_as_release_atomicrmw: 3788; GFX10-WGP: ; %bb.0: ; %entry 3789; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3790; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3791; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3792; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3793; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3794; GFX10-WGP-NEXT: s_endpgm 3795; 3796; GFX10-CU-LABEL: local_singlethread_one_as_release_atomicrmw: 3797; GFX10-CU: ; %bb.0: ; %entry 3798; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3799; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3800; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3801; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3802; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3803; GFX10-CU-NEXT: s_endpgm 3804; 3805; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_atomicrmw: 3806; SKIP-CACHE-INV: ; %bb.0: ; %entry 3807; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3808; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3809; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3810; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3811; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3812; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3813; SKIP-CACHE-INV-NEXT: s_endpgm 3814; 3815; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_release_atomicrmw: 3816; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3817; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3818; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3819; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 3820; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 3821; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3822; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3823; 3824; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_release_atomicrmw: 3825; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3826; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3827; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3828; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 3829; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 3830; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3831; GFX90A-TGSPLIT-NEXT: s_endpgm 3832; 3833; 3834 i32 addrspace(3)* %out, i32 %in) { 3835entry: 3836 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread-one-as") release 3837 ret void 3838} 3839 3840define amdgpu_kernel void @local_singlethread_one_as_acq_rel_atomicrmw( 3841; GFX6-LABEL: local_singlethread_one_as_acq_rel_atomicrmw: 3842; GFX6: ; %bb.0: ; %entry 3843; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 3844; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 3845; GFX6-NEXT: s_mov_b32 m0, -1 3846; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3847; GFX6-NEXT: v_mov_b32_e32 v0, s0 3848; GFX6-NEXT: v_mov_b32_e32 v1, s1 3849; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3850; GFX6-NEXT: s_endpgm 3851; 3852; GFX7-LABEL: local_singlethread_one_as_acq_rel_atomicrmw: 3853; GFX7: ; %bb.0: ; %entry 3854; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3855; GFX7-NEXT: s_mov_b32 m0, -1 3856; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3857; GFX7-NEXT: v_mov_b32_e32 v0, s0 3858; GFX7-NEXT: v_mov_b32_e32 v1, s1 3859; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3860; GFX7-NEXT: s_endpgm 3861; 3862; GFX10-WGP-LABEL: local_singlethread_one_as_acq_rel_atomicrmw: 3863; GFX10-WGP: ; %bb.0: ; %entry 3864; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3865; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3866; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3867; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3868; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3869; GFX10-WGP-NEXT: s_endpgm 3870; 3871; GFX10-CU-LABEL: local_singlethread_one_as_acq_rel_atomicrmw: 3872; GFX10-CU: ; %bb.0: ; %entry 3873; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3874; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3875; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3876; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3877; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3878; GFX10-CU-NEXT: s_endpgm 3879; 3880; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_atomicrmw: 3881; SKIP-CACHE-INV: ; %bb.0: ; %entry 3882; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3883; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3884; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3885; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3886; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3887; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3888; SKIP-CACHE-INV-NEXT: s_endpgm 3889; 3890; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_atomicrmw: 3891; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3892; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3893; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3894; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 3895; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 3896; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3897; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3898; 3899; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_atomicrmw: 3900; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3901; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3902; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3903; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 3904; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 3905; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3906; GFX90A-TGSPLIT-NEXT: s_endpgm 3907; 3908; 3909 i32 addrspace(3)* %out, i32 %in) { 3910entry: 3911 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread-one-as") acq_rel 3912 ret void 3913} 3914 3915define amdgpu_kernel void @local_singlethread_one_as_seq_cst_atomicrmw( 3916; GFX6-LABEL: local_singlethread_one_as_seq_cst_atomicrmw: 3917; GFX6: ; %bb.0: ; %entry 3918; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 3919; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 3920; GFX6-NEXT: s_mov_b32 m0, -1 3921; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3922; GFX6-NEXT: v_mov_b32_e32 v0, s0 3923; GFX6-NEXT: v_mov_b32_e32 v1, s1 3924; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3925; GFX6-NEXT: s_endpgm 3926; 3927; GFX7-LABEL: local_singlethread_one_as_seq_cst_atomicrmw: 3928; GFX7: ; %bb.0: ; %entry 3929; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3930; GFX7-NEXT: s_mov_b32 m0, -1 3931; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3932; GFX7-NEXT: v_mov_b32_e32 v0, s0 3933; GFX7-NEXT: v_mov_b32_e32 v1, s1 3934; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3935; GFX7-NEXT: s_endpgm 3936; 3937; GFX10-WGP-LABEL: local_singlethread_one_as_seq_cst_atomicrmw: 3938; GFX10-WGP: ; %bb.0: ; %entry 3939; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3940; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3941; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3942; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3943; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3944; GFX10-WGP-NEXT: s_endpgm 3945; 3946; GFX10-CU-LABEL: local_singlethread_one_as_seq_cst_atomicrmw: 3947; GFX10-CU: ; %bb.0: ; %entry 3948; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3949; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3950; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3951; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3952; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3953; GFX10-CU-NEXT: s_endpgm 3954; 3955; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_atomicrmw: 3956; SKIP-CACHE-INV: ; %bb.0: ; %entry 3957; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3958; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3959; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3960; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3961; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3962; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3963; SKIP-CACHE-INV-NEXT: s_endpgm 3964; 3965; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_atomicrmw: 3966; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3967; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3968; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3969; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 3970; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 3971; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3972; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3973; 3974; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_atomicrmw: 3975; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3976; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3977; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3978; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 3979; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 3980; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3981; GFX90A-TGSPLIT-NEXT: s_endpgm 3982; 3983; 3984 i32 addrspace(3)* %out, i32 %in) { 3985entry: 3986 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread-one-as") seq_cst 3987 ret void 3988} 3989 3990define amdgpu_kernel void @local_singlethread_one_as_acquire_ret_atomicrmw( 3991; GFX6-LABEL: local_singlethread_one_as_acquire_ret_atomicrmw: 3992; GFX6: ; %bb.0: ; %entry 3993; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 3994; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 3995; GFX6-NEXT: s_mov_b32 m0, -1 3996; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3997; GFX6-NEXT: v_mov_b32_e32 v0, s0 3998; GFX6-NEXT: v_mov_b32_e32 v1, s1 3999; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 4000; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4001; GFX6-NEXT: ds_write_b32 v0, v1 4002; GFX6-NEXT: s_endpgm 4003; 4004; GFX7-LABEL: local_singlethread_one_as_acquire_ret_atomicrmw: 4005; GFX7: ; %bb.0: ; %entry 4006; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4007; GFX7-NEXT: s_mov_b32 m0, -1 4008; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4009; GFX7-NEXT: v_mov_b32_e32 v0, s0 4010; GFX7-NEXT: v_mov_b32_e32 v1, s1 4011; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 4012; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4013; GFX7-NEXT: ds_write_b32 v0, v1 4014; GFX7-NEXT: s_endpgm 4015; 4016; GFX10-WGP-LABEL: local_singlethread_one_as_acquire_ret_atomicrmw: 4017; GFX10-WGP: ; %bb.0: ; %entry 4018; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4019; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4020; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4021; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 4022; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 4023; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4024; GFX10-WGP-NEXT: ds_write_b32 v0, v1 4025; GFX10-WGP-NEXT: s_endpgm 4026; 4027; GFX10-CU-LABEL: local_singlethread_one_as_acquire_ret_atomicrmw: 4028; GFX10-CU: ; %bb.0: ; %entry 4029; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4030; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4031; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4032; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 4033; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 4034; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4035; GFX10-CU-NEXT: ds_write_b32 v0, v1 4036; GFX10-CU-NEXT: s_endpgm 4037; 4038; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_ret_atomicrmw: 4039; SKIP-CACHE-INV: ; %bb.0: ; %entry 4040; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 4041; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4042; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4043; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4044; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 4045; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 4046; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4047; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 4048; SKIP-CACHE-INV-NEXT: s_endpgm 4049; 4050; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_ret_atomicrmw: 4051; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4052; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4053; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4054; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4055; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 4056; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 4057; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4058; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 4059; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4060; 4061; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_acquire_ret_atomicrmw: 4062; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4063; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4064; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4065; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4066; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 4067; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 4068; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4069; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 4070; GFX90A-TGSPLIT-NEXT: s_endpgm 4071; 4072; 4073 i32 addrspace(3)* %out, i32 %in) { 4074entry: 4075 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread-one-as") acquire 4076 store i32 %val, i32 addrspace(3)* %out, align 4 4077 ret void 4078} 4079 4080define amdgpu_kernel void @local_singlethread_one_as_acq_rel_ret_atomicrmw( 4081; GFX6-LABEL: local_singlethread_one_as_acq_rel_ret_atomicrmw: 4082; GFX6: ; %bb.0: ; %entry 4083; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 4084; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 4085; GFX6-NEXT: s_mov_b32 m0, -1 4086; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4087; GFX6-NEXT: v_mov_b32_e32 v0, s0 4088; GFX6-NEXT: v_mov_b32_e32 v1, s1 4089; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 4090; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4091; GFX6-NEXT: ds_write_b32 v0, v1 4092; GFX6-NEXT: s_endpgm 4093; 4094; GFX7-LABEL: local_singlethread_one_as_acq_rel_ret_atomicrmw: 4095; GFX7: ; %bb.0: ; %entry 4096; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4097; GFX7-NEXT: s_mov_b32 m0, -1 4098; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4099; GFX7-NEXT: v_mov_b32_e32 v0, s0 4100; GFX7-NEXT: v_mov_b32_e32 v1, s1 4101; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 4102; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4103; GFX7-NEXT: ds_write_b32 v0, v1 4104; GFX7-NEXT: s_endpgm 4105; 4106; GFX10-WGP-LABEL: local_singlethread_one_as_acq_rel_ret_atomicrmw: 4107; GFX10-WGP: ; %bb.0: ; %entry 4108; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4109; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4110; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4111; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 4112; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 4113; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4114; GFX10-WGP-NEXT: ds_write_b32 v0, v1 4115; GFX10-WGP-NEXT: s_endpgm 4116; 4117; GFX10-CU-LABEL: local_singlethread_one_as_acq_rel_ret_atomicrmw: 4118; GFX10-CU: ; %bb.0: ; %entry 4119; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4120; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4121; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4122; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 4123; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 4124; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4125; GFX10-CU-NEXT: ds_write_b32 v0, v1 4126; GFX10-CU-NEXT: s_endpgm 4127; 4128; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_ret_atomicrmw: 4129; SKIP-CACHE-INV: ; %bb.0: ; %entry 4130; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 4131; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4132; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4133; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4134; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 4135; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 4136; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4137; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 4138; SKIP-CACHE-INV-NEXT: s_endpgm 4139; 4140; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_ret_atomicrmw: 4141; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4142; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4143; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4144; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4145; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 4146; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 4147; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4148; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 4149; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4150; 4151; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_ret_atomicrmw: 4152; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4153; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4154; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4155; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4156; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 4157; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 4158; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4159; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 4160; GFX90A-TGSPLIT-NEXT: s_endpgm 4161; 4162; 4163 i32 addrspace(3)* %out, i32 %in) { 4164entry: 4165 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread-one-as") acq_rel 4166 store i32 %val, i32 addrspace(3)* %out, align 4 4167 ret void 4168} 4169 4170define amdgpu_kernel void @local_singlethread_one_as_seq_cst_ret_atomicrmw( 4171; GFX6-LABEL: local_singlethread_one_as_seq_cst_ret_atomicrmw: 4172; GFX6: ; %bb.0: ; %entry 4173; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 4174; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 4175; GFX6-NEXT: s_mov_b32 m0, -1 4176; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4177; GFX6-NEXT: v_mov_b32_e32 v0, s0 4178; GFX6-NEXT: v_mov_b32_e32 v1, s1 4179; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 4180; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4181; GFX6-NEXT: ds_write_b32 v0, v1 4182; GFX6-NEXT: s_endpgm 4183; 4184; GFX7-LABEL: local_singlethread_one_as_seq_cst_ret_atomicrmw: 4185; GFX7: ; %bb.0: ; %entry 4186; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4187; GFX7-NEXT: s_mov_b32 m0, -1 4188; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4189; GFX7-NEXT: v_mov_b32_e32 v0, s0 4190; GFX7-NEXT: v_mov_b32_e32 v1, s1 4191; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 4192; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4193; GFX7-NEXT: ds_write_b32 v0, v1 4194; GFX7-NEXT: s_endpgm 4195; 4196; GFX10-WGP-LABEL: local_singlethread_one_as_seq_cst_ret_atomicrmw: 4197; GFX10-WGP: ; %bb.0: ; %entry 4198; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4199; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4200; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4201; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 4202; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 4203; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4204; GFX10-WGP-NEXT: ds_write_b32 v0, v1 4205; GFX10-WGP-NEXT: s_endpgm 4206; 4207; GFX10-CU-LABEL: local_singlethread_one_as_seq_cst_ret_atomicrmw: 4208; GFX10-CU: ; %bb.0: ; %entry 4209; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4210; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4211; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4212; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 4213; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 4214; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4215; GFX10-CU-NEXT: ds_write_b32 v0, v1 4216; GFX10-CU-NEXT: s_endpgm 4217; 4218; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_ret_atomicrmw: 4219; SKIP-CACHE-INV: ; %bb.0: ; %entry 4220; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 4221; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4222; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4223; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4224; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 4225; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 4226; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4227; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 4228; SKIP-CACHE-INV-NEXT: s_endpgm 4229; 4230; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_ret_atomicrmw: 4231; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4232; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4233; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4234; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4235; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 4236; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 4237; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4238; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 4239; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4240; 4241; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_ret_atomicrmw: 4242; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4243; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4244; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4245; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4246; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 4247; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 4248; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4249; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 4250; GFX90A-TGSPLIT-NEXT: s_endpgm 4251; 4252; 4253 i32 addrspace(3)* %out, i32 %in) { 4254entry: 4255 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("singlethread-one-as") seq_cst 4256 store i32 %val, i32 addrspace(3)* %out, align 4 4257 ret void 4258} 4259 4260define amdgpu_kernel void @local_singlethread_one_as_monotonic_monotonic_cmpxchg( 4261; GFX6-LABEL: local_singlethread_one_as_monotonic_monotonic_cmpxchg: 4262; GFX6: ; %bb.0: ; %entry 4263; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 4264; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 4265; GFX6-NEXT: s_mov_b32 m0, -1 4266; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4267; GFX6-NEXT: v_mov_b32_e32 v0, s2 4268; GFX6-NEXT: v_mov_b32_e32 v1, s1 4269; GFX6-NEXT: v_mov_b32_e32 v2, s0 4270; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4271; GFX6-NEXT: s_endpgm 4272; 4273; GFX7-LABEL: local_singlethread_one_as_monotonic_monotonic_cmpxchg: 4274; GFX7: ; %bb.0: ; %entry 4275; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4276; GFX7-NEXT: s_mov_b32 m0, -1 4277; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4278; GFX7-NEXT: v_mov_b32_e32 v0, s0 4279; GFX7-NEXT: v_mov_b32_e32 v1, s2 4280; GFX7-NEXT: v_mov_b32_e32 v2, s1 4281; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4282; GFX7-NEXT: s_endpgm 4283; 4284; GFX10-WGP-LABEL: local_singlethread_one_as_monotonic_monotonic_cmpxchg: 4285; GFX10-WGP: ; %bb.0: ; %entry 4286; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4287; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4288; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4289; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4290; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 4291; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4292; GFX10-WGP-NEXT: s_endpgm 4293; 4294; GFX10-CU-LABEL: local_singlethread_one_as_monotonic_monotonic_cmpxchg: 4295; GFX10-CU: ; %bb.0: ; %entry 4296; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4297; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4298; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4299; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4300; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 4301; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4302; GFX10-CU-NEXT: s_endpgm 4303; 4304; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_monotonic_cmpxchg: 4305; SKIP-CACHE-INV: ; %bb.0: ; %entry 4306; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 4307; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4308; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4309; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4310; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4311; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 4312; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4313; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4314; SKIP-CACHE-INV-NEXT: s_endpgm 4315; 4316; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_monotonic_cmpxchg: 4317; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4318; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4319; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4320; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4321; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4322; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 4323; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4324; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4325; 4326; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_monotonic_cmpxchg: 4327; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4328; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4329; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4330; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4331; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4332; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 4333; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4334; GFX90A-TGSPLIT-NEXT: s_endpgm 4335; 4336; 4337 i32 addrspace(3)* %out, i32 %in, i32 %old) { 4338entry: 4339 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 4340 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic monotonic 4341 ret void 4342} 4343 4344define amdgpu_kernel void @local_singlethread_one_as_acquire_monotonic_cmpxchg( 4345; GFX6-LABEL: local_singlethread_one_as_acquire_monotonic_cmpxchg: 4346; GFX6: ; %bb.0: ; %entry 4347; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 4348; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 4349; GFX6-NEXT: s_mov_b32 m0, -1 4350; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4351; GFX6-NEXT: v_mov_b32_e32 v0, s2 4352; GFX6-NEXT: v_mov_b32_e32 v1, s1 4353; GFX6-NEXT: v_mov_b32_e32 v2, s0 4354; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4355; GFX6-NEXT: s_endpgm 4356; 4357; GFX7-LABEL: local_singlethread_one_as_acquire_monotonic_cmpxchg: 4358; GFX7: ; %bb.0: ; %entry 4359; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4360; GFX7-NEXT: s_mov_b32 m0, -1 4361; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4362; GFX7-NEXT: v_mov_b32_e32 v0, s0 4363; GFX7-NEXT: v_mov_b32_e32 v1, s2 4364; GFX7-NEXT: v_mov_b32_e32 v2, s1 4365; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4366; GFX7-NEXT: s_endpgm 4367; 4368; GFX10-WGP-LABEL: local_singlethread_one_as_acquire_monotonic_cmpxchg: 4369; GFX10-WGP: ; %bb.0: ; %entry 4370; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4371; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4372; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4373; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4374; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 4375; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4376; GFX10-WGP-NEXT: s_endpgm 4377; 4378; GFX10-CU-LABEL: local_singlethread_one_as_acquire_monotonic_cmpxchg: 4379; GFX10-CU: ; %bb.0: ; %entry 4380; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4381; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4382; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4383; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4384; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 4385; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4386; GFX10-CU-NEXT: s_endpgm 4387; 4388; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_monotonic_cmpxchg: 4389; SKIP-CACHE-INV: ; %bb.0: ; %entry 4390; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 4391; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4392; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4393; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4394; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4395; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 4396; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4397; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4398; SKIP-CACHE-INV-NEXT: s_endpgm 4399; 4400; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_monotonic_cmpxchg: 4401; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4402; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4403; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4404; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4405; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4406; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 4407; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4408; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4409; 4410; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_acquire_monotonic_cmpxchg: 4411; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4412; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4413; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4414; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4415; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4416; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 4417; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4418; GFX90A-TGSPLIT-NEXT: s_endpgm 4419; 4420; 4421 i32 addrspace(3)* %out, i32 %in, i32 %old) { 4422entry: 4423 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 4424 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire monotonic 4425 ret void 4426} 4427 4428define amdgpu_kernel void @local_singlethread_one_as_release_monotonic_cmpxchg( 4429; GFX6-LABEL: local_singlethread_one_as_release_monotonic_cmpxchg: 4430; GFX6: ; %bb.0: ; %entry 4431; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 4432; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 4433; GFX6-NEXT: s_mov_b32 m0, -1 4434; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4435; GFX6-NEXT: v_mov_b32_e32 v0, s2 4436; GFX6-NEXT: v_mov_b32_e32 v1, s1 4437; GFX6-NEXT: v_mov_b32_e32 v2, s0 4438; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4439; GFX6-NEXT: s_endpgm 4440; 4441; GFX7-LABEL: local_singlethread_one_as_release_monotonic_cmpxchg: 4442; GFX7: ; %bb.0: ; %entry 4443; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4444; GFX7-NEXT: s_mov_b32 m0, -1 4445; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4446; GFX7-NEXT: v_mov_b32_e32 v0, s0 4447; GFX7-NEXT: v_mov_b32_e32 v1, s2 4448; GFX7-NEXT: v_mov_b32_e32 v2, s1 4449; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4450; GFX7-NEXT: s_endpgm 4451; 4452; GFX10-WGP-LABEL: local_singlethread_one_as_release_monotonic_cmpxchg: 4453; GFX10-WGP: ; %bb.0: ; %entry 4454; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4455; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4456; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4457; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4458; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 4459; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4460; GFX10-WGP-NEXT: s_endpgm 4461; 4462; GFX10-CU-LABEL: local_singlethread_one_as_release_monotonic_cmpxchg: 4463; GFX10-CU: ; %bb.0: ; %entry 4464; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4465; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4466; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4467; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4468; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 4469; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4470; GFX10-CU-NEXT: s_endpgm 4471; 4472; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_monotonic_cmpxchg: 4473; SKIP-CACHE-INV: ; %bb.0: ; %entry 4474; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 4475; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4476; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4477; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4478; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4479; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 4480; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4481; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4482; SKIP-CACHE-INV-NEXT: s_endpgm 4483; 4484; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_release_monotonic_cmpxchg: 4485; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4486; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4487; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4488; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4489; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4490; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 4491; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4492; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4493; 4494; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_release_monotonic_cmpxchg: 4495; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4496; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4497; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4498; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4499; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4500; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 4501; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4502; GFX90A-TGSPLIT-NEXT: s_endpgm 4503; 4504; 4505 i32 addrspace(3)* %out, i32 %in, i32 %old) { 4506entry: 4507 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 4508 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release monotonic 4509 ret void 4510} 4511 4512define amdgpu_kernel void @local_singlethread_one_as_acq_rel_monotonic_cmpxchg( 4513; GFX6-LABEL: local_singlethread_one_as_acq_rel_monotonic_cmpxchg: 4514; GFX6: ; %bb.0: ; %entry 4515; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 4516; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 4517; GFX6-NEXT: s_mov_b32 m0, -1 4518; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4519; GFX6-NEXT: v_mov_b32_e32 v0, s2 4520; GFX6-NEXT: v_mov_b32_e32 v1, s1 4521; GFX6-NEXT: v_mov_b32_e32 v2, s0 4522; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4523; GFX6-NEXT: s_endpgm 4524; 4525; GFX7-LABEL: local_singlethread_one_as_acq_rel_monotonic_cmpxchg: 4526; GFX7: ; %bb.0: ; %entry 4527; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4528; GFX7-NEXT: s_mov_b32 m0, -1 4529; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4530; GFX7-NEXT: v_mov_b32_e32 v0, s0 4531; GFX7-NEXT: v_mov_b32_e32 v1, s2 4532; GFX7-NEXT: v_mov_b32_e32 v2, s1 4533; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4534; GFX7-NEXT: s_endpgm 4535; 4536; GFX10-WGP-LABEL: local_singlethread_one_as_acq_rel_monotonic_cmpxchg: 4537; GFX10-WGP: ; %bb.0: ; %entry 4538; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4539; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4540; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4541; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4542; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 4543; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4544; GFX10-WGP-NEXT: s_endpgm 4545; 4546; GFX10-CU-LABEL: local_singlethread_one_as_acq_rel_monotonic_cmpxchg: 4547; GFX10-CU: ; %bb.0: ; %entry 4548; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4549; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4550; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4551; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4552; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 4553; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4554; GFX10-CU-NEXT: s_endpgm 4555; 4556; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_monotonic_cmpxchg: 4557; SKIP-CACHE-INV: ; %bb.0: ; %entry 4558; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 4559; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4560; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4561; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4562; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4563; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 4564; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4565; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4566; SKIP-CACHE-INV-NEXT: s_endpgm 4567; 4568; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_monotonic_cmpxchg: 4569; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4570; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4571; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4572; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4573; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4574; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 4575; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4576; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4577; 4578; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_monotonic_cmpxchg: 4579; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4580; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4581; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4582; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4583; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4584; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 4585; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4586; GFX90A-TGSPLIT-NEXT: s_endpgm 4587; 4588; 4589 i32 addrspace(3)* %out, i32 %in, i32 %old) { 4590entry: 4591 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 4592 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel monotonic 4593 ret void 4594} 4595 4596define amdgpu_kernel void @local_singlethread_one_as_seq_cst_monotonic_cmpxchg( 4597; GFX6-LABEL: local_singlethread_one_as_seq_cst_monotonic_cmpxchg: 4598; GFX6: ; %bb.0: ; %entry 4599; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 4600; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 4601; GFX6-NEXT: s_mov_b32 m0, -1 4602; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4603; GFX6-NEXT: v_mov_b32_e32 v0, s2 4604; GFX6-NEXT: v_mov_b32_e32 v1, s1 4605; GFX6-NEXT: v_mov_b32_e32 v2, s0 4606; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4607; GFX6-NEXT: s_endpgm 4608; 4609; GFX7-LABEL: local_singlethread_one_as_seq_cst_monotonic_cmpxchg: 4610; GFX7: ; %bb.0: ; %entry 4611; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4612; GFX7-NEXT: s_mov_b32 m0, -1 4613; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4614; GFX7-NEXT: v_mov_b32_e32 v0, s0 4615; GFX7-NEXT: v_mov_b32_e32 v1, s2 4616; GFX7-NEXT: v_mov_b32_e32 v2, s1 4617; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4618; GFX7-NEXT: s_endpgm 4619; 4620; GFX10-WGP-LABEL: local_singlethread_one_as_seq_cst_monotonic_cmpxchg: 4621; GFX10-WGP: ; %bb.0: ; %entry 4622; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4623; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4624; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4625; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4626; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 4627; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4628; GFX10-WGP-NEXT: s_endpgm 4629; 4630; GFX10-CU-LABEL: local_singlethread_one_as_seq_cst_monotonic_cmpxchg: 4631; GFX10-CU: ; %bb.0: ; %entry 4632; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4633; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4634; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4635; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4636; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 4637; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4638; GFX10-CU-NEXT: s_endpgm 4639; 4640; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_monotonic_cmpxchg: 4641; SKIP-CACHE-INV: ; %bb.0: ; %entry 4642; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 4643; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4644; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4645; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4646; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4647; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 4648; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4649; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4650; SKIP-CACHE-INV-NEXT: s_endpgm 4651; 4652; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_monotonic_cmpxchg: 4653; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4654; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4655; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4656; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4657; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4658; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 4659; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4660; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4661; 4662; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_monotonic_cmpxchg: 4663; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4664; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4665; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4666; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4667; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4668; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 4669; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4670; GFX90A-TGSPLIT-NEXT: s_endpgm 4671; 4672; 4673 i32 addrspace(3)* %out, i32 %in, i32 %old) { 4674entry: 4675 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 4676 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst monotonic 4677 ret void 4678} 4679 4680define amdgpu_kernel void @local_singlethread_one_as_acquire_acquire_cmpxchg( 4681; GFX6-LABEL: local_singlethread_one_as_acquire_acquire_cmpxchg: 4682; GFX6: ; %bb.0: ; %entry 4683; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 4684; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 4685; GFX6-NEXT: s_mov_b32 m0, -1 4686; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4687; GFX6-NEXT: v_mov_b32_e32 v0, s2 4688; GFX6-NEXT: v_mov_b32_e32 v1, s1 4689; GFX6-NEXT: v_mov_b32_e32 v2, s0 4690; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4691; GFX6-NEXT: s_endpgm 4692; 4693; GFX7-LABEL: local_singlethread_one_as_acquire_acquire_cmpxchg: 4694; GFX7: ; %bb.0: ; %entry 4695; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4696; GFX7-NEXT: s_mov_b32 m0, -1 4697; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4698; GFX7-NEXT: v_mov_b32_e32 v0, s0 4699; GFX7-NEXT: v_mov_b32_e32 v1, s2 4700; GFX7-NEXT: v_mov_b32_e32 v2, s1 4701; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4702; GFX7-NEXT: s_endpgm 4703; 4704; GFX10-WGP-LABEL: local_singlethread_one_as_acquire_acquire_cmpxchg: 4705; GFX10-WGP: ; %bb.0: ; %entry 4706; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4707; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4708; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4709; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4710; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 4711; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4712; GFX10-WGP-NEXT: s_endpgm 4713; 4714; GFX10-CU-LABEL: local_singlethread_one_as_acquire_acquire_cmpxchg: 4715; GFX10-CU: ; %bb.0: ; %entry 4716; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4717; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4718; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4719; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4720; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 4721; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4722; GFX10-CU-NEXT: s_endpgm 4723; 4724; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_acquire_cmpxchg: 4725; SKIP-CACHE-INV: ; %bb.0: ; %entry 4726; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 4727; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4728; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4729; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4730; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4731; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 4732; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4733; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4734; SKIP-CACHE-INV-NEXT: s_endpgm 4735; 4736; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_acquire_cmpxchg: 4737; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4738; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4739; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4740; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4741; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4742; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 4743; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4744; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4745; 4746; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_acquire_acquire_cmpxchg: 4747; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4748; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4749; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4750; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4751; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4752; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 4753; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4754; GFX90A-TGSPLIT-NEXT: s_endpgm 4755; 4756; 4757 i32 addrspace(3)* %out, i32 %in, i32 %old) { 4758entry: 4759 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 4760 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire acquire 4761 ret void 4762} 4763 4764define amdgpu_kernel void @local_singlethread_one_as_release_acquire_cmpxchg( 4765; GFX6-LABEL: local_singlethread_one_as_release_acquire_cmpxchg: 4766; GFX6: ; %bb.0: ; %entry 4767; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 4768; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 4769; GFX6-NEXT: s_mov_b32 m0, -1 4770; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4771; GFX6-NEXT: v_mov_b32_e32 v0, s2 4772; GFX6-NEXT: v_mov_b32_e32 v1, s1 4773; GFX6-NEXT: v_mov_b32_e32 v2, s0 4774; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4775; GFX6-NEXT: s_endpgm 4776; 4777; GFX7-LABEL: local_singlethread_one_as_release_acquire_cmpxchg: 4778; GFX7: ; %bb.0: ; %entry 4779; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4780; GFX7-NEXT: s_mov_b32 m0, -1 4781; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4782; GFX7-NEXT: v_mov_b32_e32 v0, s0 4783; GFX7-NEXT: v_mov_b32_e32 v1, s2 4784; GFX7-NEXT: v_mov_b32_e32 v2, s1 4785; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4786; GFX7-NEXT: s_endpgm 4787; 4788; GFX10-WGP-LABEL: local_singlethread_one_as_release_acquire_cmpxchg: 4789; GFX10-WGP: ; %bb.0: ; %entry 4790; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4791; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4792; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4793; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4794; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 4795; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4796; GFX10-WGP-NEXT: s_endpgm 4797; 4798; GFX10-CU-LABEL: local_singlethread_one_as_release_acquire_cmpxchg: 4799; GFX10-CU: ; %bb.0: ; %entry 4800; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4801; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4802; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4803; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4804; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 4805; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4806; GFX10-CU-NEXT: s_endpgm 4807; 4808; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_acquire_cmpxchg: 4809; SKIP-CACHE-INV: ; %bb.0: ; %entry 4810; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 4811; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4812; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4813; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4814; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4815; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 4816; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4817; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4818; SKIP-CACHE-INV-NEXT: s_endpgm 4819; 4820; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_release_acquire_cmpxchg: 4821; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4822; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4823; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4824; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4825; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4826; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 4827; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4828; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4829; 4830; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_release_acquire_cmpxchg: 4831; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4832; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4833; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4834; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4835; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4836; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 4837; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4838; GFX90A-TGSPLIT-NEXT: s_endpgm 4839; 4840; 4841 i32 addrspace(3)* %out, i32 %in, i32 %old) { 4842entry: 4843 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 4844 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release acquire 4845 ret void 4846} 4847 4848define amdgpu_kernel void @local_singlethread_one_as_acq_rel_acquire_cmpxchg( 4849; GFX6-LABEL: local_singlethread_one_as_acq_rel_acquire_cmpxchg: 4850; GFX6: ; %bb.0: ; %entry 4851; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 4852; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 4853; GFX6-NEXT: s_mov_b32 m0, -1 4854; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4855; GFX6-NEXT: v_mov_b32_e32 v0, s2 4856; GFX6-NEXT: v_mov_b32_e32 v1, s1 4857; GFX6-NEXT: v_mov_b32_e32 v2, s0 4858; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4859; GFX6-NEXT: s_endpgm 4860; 4861; GFX7-LABEL: local_singlethread_one_as_acq_rel_acquire_cmpxchg: 4862; GFX7: ; %bb.0: ; %entry 4863; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4864; GFX7-NEXT: s_mov_b32 m0, -1 4865; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4866; GFX7-NEXT: v_mov_b32_e32 v0, s0 4867; GFX7-NEXT: v_mov_b32_e32 v1, s2 4868; GFX7-NEXT: v_mov_b32_e32 v2, s1 4869; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4870; GFX7-NEXT: s_endpgm 4871; 4872; GFX10-WGP-LABEL: local_singlethread_one_as_acq_rel_acquire_cmpxchg: 4873; GFX10-WGP: ; %bb.0: ; %entry 4874; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4875; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4876; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4877; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4878; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 4879; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4880; GFX10-WGP-NEXT: s_endpgm 4881; 4882; GFX10-CU-LABEL: local_singlethread_one_as_acq_rel_acquire_cmpxchg: 4883; GFX10-CU: ; %bb.0: ; %entry 4884; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4885; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4886; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4887; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4888; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 4889; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4890; GFX10-CU-NEXT: s_endpgm 4891; 4892; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_acquire_cmpxchg: 4893; SKIP-CACHE-INV: ; %bb.0: ; %entry 4894; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 4895; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4896; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4897; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4898; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4899; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 4900; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4901; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4902; SKIP-CACHE-INV-NEXT: s_endpgm 4903; 4904; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_acquire_cmpxchg: 4905; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4906; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4907; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4908; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4909; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4910; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 4911; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4912; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4913; 4914; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_acquire_cmpxchg: 4915; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4916; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4917; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4918; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4919; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4920; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 4921; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4922; GFX90A-TGSPLIT-NEXT: s_endpgm 4923; 4924; 4925 i32 addrspace(3)* %out, i32 %in, i32 %old) { 4926entry: 4927 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 4928 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel acquire 4929 ret void 4930} 4931 4932define amdgpu_kernel void @local_singlethread_one_as_seq_cst_acquire_cmpxchg( 4933; GFX6-LABEL: local_singlethread_one_as_seq_cst_acquire_cmpxchg: 4934; GFX6: ; %bb.0: ; %entry 4935; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 4936; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 4937; GFX6-NEXT: s_mov_b32 m0, -1 4938; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4939; GFX6-NEXT: v_mov_b32_e32 v0, s2 4940; GFX6-NEXT: v_mov_b32_e32 v1, s1 4941; GFX6-NEXT: v_mov_b32_e32 v2, s0 4942; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4943; GFX6-NEXT: s_endpgm 4944; 4945; GFX7-LABEL: local_singlethread_one_as_seq_cst_acquire_cmpxchg: 4946; GFX7: ; %bb.0: ; %entry 4947; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4948; GFX7-NEXT: s_mov_b32 m0, -1 4949; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4950; GFX7-NEXT: v_mov_b32_e32 v0, s0 4951; GFX7-NEXT: v_mov_b32_e32 v1, s2 4952; GFX7-NEXT: v_mov_b32_e32 v2, s1 4953; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4954; GFX7-NEXT: s_endpgm 4955; 4956; GFX10-WGP-LABEL: local_singlethread_one_as_seq_cst_acquire_cmpxchg: 4957; GFX10-WGP: ; %bb.0: ; %entry 4958; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4959; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4960; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4961; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4962; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 4963; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4964; GFX10-WGP-NEXT: s_endpgm 4965; 4966; GFX10-CU-LABEL: local_singlethread_one_as_seq_cst_acquire_cmpxchg: 4967; GFX10-CU: ; %bb.0: ; %entry 4968; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4969; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4970; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4971; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4972; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 4973; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4974; GFX10-CU-NEXT: s_endpgm 4975; 4976; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_acquire_cmpxchg: 4977; SKIP-CACHE-INV: ; %bb.0: ; %entry 4978; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 4979; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4980; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4981; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4982; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4983; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 4984; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4985; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4986; SKIP-CACHE-INV-NEXT: s_endpgm 4987; 4988; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_acquire_cmpxchg: 4989; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4990; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4991; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4992; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 4993; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4994; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 4995; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4996; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4997; 4998; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_acquire_cmpxchg: 4999; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5000; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5001; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5002; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 5003; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5004; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 5005; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 5006; GFX90A-TGSPLIT-NEXT: s_endpgm 5007; 5008; 5009 i32 addrspace(3)* %out, i32 %in, i32 %old) { 5010entry: 5011 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 5012 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst acquire 5013 ret void 5014} 5015 5016define amdgpu_kernel void @local_singlethread_one_as_seq_cst_seq_cst_cmpxchg( 5017; GFX6-LABEL: local_singlethread_one_as_seq_cst_seq_cst_cmpxchg: 5018; GFX6: ; %bb.0: ; %entry 5019; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 5020; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 5021; GFX6-NEXT: s_mov_b32 m0, -1 5022; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5023; GFX6-NEXT: v_mov_b32_e32 v0, s2 5024; GFX6-NEXT: v_mov_b32_e32 v1, s1 5025; GFX6-NEXT: v_mov_b32_e32 v2, s0 5026; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 5027; GFX6-NEXT: s_endpgm 5028; 5029; GFX7-LABEL: local_singlethread_one_as_seq_cst_seq_cst_cmpxchg: 5030; GFX7: ; %bb.0: ; %entry 5031; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5032; GFX7-NEXT: s_mov_b32 m0, -1 5033; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5034; GFX7-NEXT: v_mov_b32_e32 v0, s0 5035; GFX7-NEXT: v_mov_b32_e32 v1, s2 5036; GFX7-NEXT: v_mov_b32_e32 v2, s1 5037; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 5038; GFX7-NEXT: s_endpgm 5039; 5040; GFX10-WGP-LABEL: local_singlethread_one_as_seq_cst_seq_cst_cmpxchg: 5041; GFX10-WGP: ; %bb.0: ; %entry 5042; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5043; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5044; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5045; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 5046; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 5047; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 5048; GFX10-WGP-NEXT: s_endpgm 5049; 5050; GFX10-CU-LABEL: local_singlethread_one_as_seq_cst_seq_cst_cmpxchg: 5051; GFX10-CU: ; %bb.0: ; %entry 5052; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5053; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5054; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5055; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 5056; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 5057; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 5058; GFX10-CU-NEXT: s_endpgm 5059; 5060; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_seq_cst_cmpxchg: 5061; SKIP-CACHE-INV: ; %bb.0: ; %entry 5062; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 5063; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 5064; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 5065; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5066; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 5067; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 5068; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 5069; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 5070; SKIP-CACHE-INV-NEXT: s_endpgm 5071; 5072; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_seq_cst_cmpxchg: 5073; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5074; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5075; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5076; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 5077; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5078; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 5079; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 5080; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5081; 5082; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_seq_cst_cmpxchg: 5083; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5084; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5085; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5086; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 5087; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5088; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 5089; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 5090; GFX90A-TGSPLIT-NEXT: s_endpgm 5091; 5092; 5093 i32 addrspace(3)* %out, i32 %in, i32 %old) { 5094entry: 5095 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 5096 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst seq_cst 5097 ret void 5098} 5099 5100define amdgpu_kernel void @local_singlethread_one_as_acquire_monotonic_ret_cmpxchg( 5101; GFX6-LABEL: local_singlethread_one_as_acquire_monotonic_ret_cmpxchg: 5102; GFX6: ; %bb.0: ; %entry 5103; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 5104; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 5105; GFX6-NEXT: s_mov_b32 m0, -1 5106; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5107; GFX6-NEXT: v_mov_b32_e32 v0, s2 5108; GFX6-NEXT: v_mov_b32_e32 v1, s1 5109; GFX6-NEXT: v_mov_b32_e32 v2, s0 5110; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5111; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5112; GFX6-NEXT: ds_write_b32 v0, v1 5113; GFX6-NEXT: s_endpgm 5114; 5115; GFX7-LABEL: local_singlethread_one_as_acquire_monotonic_ret_cmpxchg: 5116; GFX7: ; %bb.0: ; %entry 5117; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5118; GFX7-NEXT: s_mov_b32 m0, -1 5119; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5120; GFX7-NEXT: v_mov_b32_e32 v0, s0 5121; GFX7-NEXT: v_mov_b32_e32 v1, s2 5122; GFX7-NEXT: v_mov_b32_e32 v2, s1 5123; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5124; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5125; GFX7-NEXT: ds_write_b32 v0, v1 5126; GFX7-NEXT: s_endpgm 5127; 5128; GFX10-WGP-LABEL: local_singlethread_one_as_acquire_monotonic_ret_cmpxchg: 5129; GFX10-WGP: ; %bb.0: ; %entry 5130; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5131; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5132; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5133; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 5134; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 5135; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5136; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5137; GFX10-WGP-NEXT: ds_write_b32 v0, v1 5138; GFX10-WGP-NEXT: s_endpgm 5139; 5140; GFX10-CU-LABEL: local_singlethread_one_as_acquire_monotonic_ret_cmpxchg: 5141; GFX10-CU: ; %bb.0: ; %entry 5142; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5143; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5144; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5145; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 5146; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 5147; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5148; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5149; GFX10-CU-NEXT: ds_write_b32 v0, v1 5150; GFX10-CU-NEXT: s_endpgm 5151; 5152; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_monotonic_ret_cmpxchg: 5153; SKIP-CACHE-INV: ; %bb.0: ; %entry 5154; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 5155; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 5156; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 5157; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5158; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 5159; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 5160; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 5161; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5162; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5163; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 5164; SKIP-CACHE-INV-NEXT: s_endpgm 5165; 5166; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_monotonic_ret_cmpxchg: 5167; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5168; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5169; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5170; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 5171; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5172; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 5173; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5174; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5175; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 5176; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5177; 5178; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_acquire_monotonic_ret_cmpxchg: 5179; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5180; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5181; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5182; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 5183; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5184; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 5185; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5186; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5187; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 5188; GFX90A-TGSPLIT-NEXT: s_endpgm 5189; 5190; 5191 i32 addrspace(3)* %out, i32 %in, i32 %old) { 5192entry: 5193 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 5194 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire monotonic 5195 %val0 = extractvalue { i32, i1 } %val, 0 5196 store i32 %val0, i32 addrspace(3)* %out, align 4 5197 ret void 5198} 5199 5200define amdgpu_kernel void @local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg( 5201; GFX6-LABEL: local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: 5202; GFX6: ; %bb.0: ; %entry 5203; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 5204; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 5205; GFX6-NEXT: s_mov_b32 m0, -1 5206; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5207; GFX6-NEXT: v_mov_b32_e32 v0, s2 5208; GFX6-NEXT: v_mov_b32_e32 v1, s1 5209; GFX6-NEXT: v_mov_b32_e32 v2, s0 5210; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5211; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5212; GFX6-NEXT: ds_write_b32 v0, v1 5213; GFX6-NEXT: s_endpgm 5214; 5215; GFX7-LABEL: local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: 5216; GFX7: ; %bb.0: ; %entry 5217; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5218; GFX7-NEXT: s_mov_b32 m0, -1 5219; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5220; GFX7-NEXT: v_mov_b32_e32 v0, s0 5221; GFX7-NEXT: v_mov_b32_e32 v1, s2 5222; GFX7-NEXT: v_mov_b32_e32 v2, s1 5223; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5224; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5225; GFX7-NEXT: ds_write_b32 v0, v1 5226; GFX7-NEXT: s_endpgm 5227; 5228; GFX10-WGP-LABEL: local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: 5229; GFX10-WGP: ; %bb.0: ; %entry 5230; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5231; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5232; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5233; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 5234; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 5235; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5236; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5237; GFX10-WGP-NEXT: ds_write_b32 v0, v1 5238; GFX10-WGP-NEXT: s_endpgm 5239; 5240; GFX10-CU-LABEL: local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: 5241; GFX10-CU: ; %bb.0: ; %entry 5242; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5243; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5244; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5245; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 5246; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 5247; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5248; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5249; GFX10-CU-NEXT: ds_write_b32 v0, v1 5250; GFX10-CU-NEXT: s_endpgm 5251; 5252; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: 5253; SKIP-CACHE-INV: ; %bb.0: ; %entry 5254; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 5255; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 5256; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 5257; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5258; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 5259; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 5260; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 5261; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5262; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5263; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 5264; SKIP-CACHE-INV-NEXT: s_endpgm 5265; 5266; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: 5267; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5268; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5269; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5270; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 5271; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5272; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 5273; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5274; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5275; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 5276; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5277; 5278; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: 5279; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5280; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5281; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5282; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 5283; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5284; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 5285; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5286; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5287; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 5288; GFX90A-TGSPLIT-NEXT: s_endpgm 5289; 5290; 5291 i32 addrspace(3)* %out, i32 %in, i32 %old) { 5292entry: 5293 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 5294 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel monotonic 5295 %val0 = extractvalue { i32, i1 } %val, 0 5296 store i32 %val0, i32 addrspace(3)* %out, align 4 5297 ret void 5298} 5299 5300define amdgpu_kernel void @local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg( 5301; GFX6-LABEL: local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: 5302; GFX6: ; %bb.0: ; %entry 5303; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 5304; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 5305; GFX6-NEXT: s_mov_b32 m0, -1 5306; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5307; GFX6-NEXT: v_mov_b32_e32 v0, s2 5308; GFX6-NEXT: v_mov_b32_e32 v1, s1 5309; GFX6-NEXT: v_mov_b32_e32 v2, s0 5310; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5311; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5312; GFX6-NEXT: ds_write_b32 v0, v1 5313; GFX6-NEXT: s_endpgm 5314; 5315; GFX7-LABEL: local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: 5316; GFX7: ; %bb.0: ; %entry 5317; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5318; GFX7-NEXT: s_mov_b32 m0, -1 5319; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5320; GFX7-NEXT: v_mov_b32_e32 v0, s0 5321; GFX7-NEXT: v_mov_b32_e32 v1, s2 5322; GFX7-NEXT: v_mov_b32_e32 v2, s1 5323; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5324; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5325; GFX7-NEXT: ds_write_b32 v0, v1 5326; GFX7-NEXT: s_endpgm 5327; 5328; GFX10-WGP-LABEL: local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: 5329; GFX10-WGP: ; %bb.0: ; %entry 5330; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5331; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5332; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5333; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 5334; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 5335; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5336; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5337; GFX10-WGP-NEXT: ds_write_b32 v0, v1 5338; GFX10-WGP-NEXT: s_endpgm 5339; 5340; GFX10-CU-LABEL: local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: 5341; GFX10-CU: ; %bb.0: ; %entry 5342; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5343; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5344; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5345; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 5346; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 5347; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5348; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5349; GFX10-CU-NEXT: ds_write_b32 v0, v1 5350; GFX10-CU-NEXT: s_endpgm 5351; 5352; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: 5353; SKIP-CACHE-INV: ; %bb.0: ; %entry 5354; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 5355; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 5356; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 5357; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5358; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 5359; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 5360; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 5361; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5362; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5363; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 5364; SKIP-CACHE-INV-NEXT: s_endpgm 5365; 5366; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: 5367; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5368; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5369; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5370; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 5371; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5372; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 5373; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5374; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5375; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 5376; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5377; 5378; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: 5379; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5380; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5381; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5382; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 5383; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5384; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 5385; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5386; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5387; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 5388; GFX90A-TGSPLIT-NEXT: s_endpgm 5389; 5390; 5391 i32 addrspace(3)* %out, i32 %in, i32 %old) { 5392entry: 5393 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 5394 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst monotonic 5395 %val0 = extractvalue { i32, i1 } %val, 0 5396 store i32 %val0, i32 addrspace(3)* %out, align 4 5397 ret void 5398} 5399 5400define amdgpu_kernel void @local_singlethread_one_as_acquire_acquire_ret_cmpxchg( 5401; GFX6-LABEL: local_singlethread_one_as_acquire_acquire_ret_cmpxchg: 5402; GFX6: ; %bb.0: ; %entry 5403; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 5404; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 5405; GFX6-NEXT: s_mov_b32 m0, -1 5406; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5407; GFX6-NEXT: v_mov_b32_e32 v0, s2 5408; GFX6-NEXT: v_mov_b32_e32 v1, s1 5409; GFX6-NEXT: v_mov_b32_e32 v2, s0 5410; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5411; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5412; GFX6-NEXT: ds_write_b32 v0, v1 5413; GFX6-NEXT: s_endpgm 5414; 5415; GFX7-LABEL: local_singlethread_one_as_acquire_acquire_ret_cmpxchg: 5416; GFX7: ; %bb.0: ; %entry 5417; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5418; GFX7-NEXT: s_mov_b32 m0, -1 5419; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5420; GFX7-NEXT: v_mov_b32_e32 v0, s0 5421; GFX7-NEXT: v_mov_b32_e32 v1, s2 5422; GFX7-NEXT: v_mov_b32_e32 v2, s1 5423; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5424; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5425; GFX7-NEXT: ds_write_b32 v0, v1 5426; GFX7-NEXT: s_endpgm 5427; 5428; GFX10-WGP-LABEL: local_singlethread_one_as_acquire_acquire_ret_cmpxchg: 5429; GFX10-WGP: ; %bb.0: ; %entry 5430; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5431; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5432; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5433; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 5434; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 5435; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5436; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5437; GFX10-WGP-NEXT: ds_write_b32 v0, v1 5438; GFX10-WGP-NEXT: s_endpgm 5439; 5440; GFX10-CU-LABEL: local_singlethread_one_as_acquire_acquire_ret_cmpxchg: 5441; GFX10-CU: ; %bb.0: ; %entry 5442; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5443; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5444; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5445; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 5446; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 5447; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5448; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5449; GFX10-CU-NEXT: ds_write_b32 v0, v1 5450; GFX10-CU-NEXT: s_endpgm 5451; 5452; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_acquire_ret_cmpxchg: 5453; SKIP-CACHE-INV: ; %bb.0: ; %entry 5454; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 5455; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 5456; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 5457; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5458; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 5459; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 5460; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 5461; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5462; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5463; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 5464; SKIP-CACHE-INV-NEXT: s_endpgm 5465; 5466; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_acquire_ret_cmpxchg: 5467; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5468; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5469; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5470; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 5471; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5472; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 5473; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5474; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5475; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 5476; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5477; 5478; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_acquire_acquire_ret_cmpxchg: 5479; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5480; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5481; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5482; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 5483; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5484; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 5485; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5486; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5487; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 5488; GFX90A-TGSPLIT-NEXT: s_endpgm 5489; 5490; 5491 i32 addrspace(3)* %out, i32 %in, i32 %old) { 5492entry: 5493 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 5494 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire acquire 5495 %val0 = extractvalue { i32, i1 } %val, 0 5496 store i32 %val0, i32 addrspace(3)* %out, align 4 5497 ret void 5498} 5499 5500define amdgpu_kernel void @local_singlethread_one_as_release_acquire_ret_cmpxchg( 5501; GFX6-LABEL: local_singlethread_one_as_release_acquire_ret_cmpxchg: 5502; GFX6: ; %bb.0: ; %entry 5503; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 5504; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 5505; GFX6-NEXT: s_mov_b32 m0, -1 5506; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5507; GFX6-NEXT: v_mov_b32_e32 v0, s2 5508; GFX6-NEXT: v_mov_b32_e32 v1, s1 5509; GFX6-NEXT: v_mov_b32_e32 v2, s0 5510; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5511; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5512; GFX6-NEXT: ds_write_b32 v0, v1 5513; GFX6-NEXT: s_endpgm 5514; 5515; GFX7-LABEL: local_singlethread_one_as_release_acquire_ret_cmpxchg: 5516; GFX7: ; %bb.0: ; %entry 5517; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5518; GFX7-NEXT: s_mov_b32 m0, -1 5519; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5520; GFX7-NEXT: v_mov_b32_e32 v0, s0 5521; GFX7-NEXT: v_mov_b32_e32 v1, s2 5522; GFX7-NEXT: v_mov_b32_e32 v2, s1 5523; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5524; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5525; GFX7-NEXT: ds_write_b32 v0, v1 5526; GFX7-NEXT: s_endpgm 5527; 5528; GFX10-WGP-LABEL: local_singlethread_one_as_release_acquire_ret_cmpxchg: 5529; GFX10-WGP: ; %bb.0: ; %entry 5530; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5531; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5532; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5533; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 5534; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 5535; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5536; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5537; GFX10-WGP-NEXT: ds_write_b32 v0, v1 5538; GFX10-WGP-NEXT: s_endpgm 5539; 5540; GFX10-CU-LABEL: local_singlethread_one_as_release_acquire_ret_cmpxchg: 5541; GFX10-CU: ; %bb.0: ; %entry 5542; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5543; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5544; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5545; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 5546; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 5547; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5548; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5549; GFX10-CU-NEXT: ds_write_b32 v0, v1 5550; GFX10-CU-NEXT: s_endpgm 5551; 5552; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_acquire_ret_cmpxchg: 5553; SKIP-CACHE-INV: ; %bb.0: ; %entry 5554; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 5555; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 5556; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 5557; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5558; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 5559; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 5560; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 5561; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5562; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5563; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 5564; SKIP-CACHE-INV-NEXT: s_endpgm 5565; 5566; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_release_acquire_ret_cmpxchg: 5567; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5568; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5569; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5570; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 5571; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5572; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 5573; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5574; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5575; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 5576; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5577; 5578; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_release_acquire_ret_cmpxchg: 5579; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5580; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5581; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5582; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 5583; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5584; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 5585; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5586; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5587; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 5588; GFX90A-TGSPLIT-NEXT: s_endpgm 5589; 5590; 5591 i32 addrspace(3)* %out, i32 %in, i32 %old) { 5592entry: 5593 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 5594 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release acquire 5595 %val0 = extractvalue { i32, i1 } %val, 0 5596 store i32 %val0, i32 addrspace(3)* %out, align 4 5597 ret void 5598} 5599 5600define amdgpu_kernel void @local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg( 5601; GFX6-LABEL: local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: 5602; GFX6: ; %bb.0: ; %entry 5603; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 5604; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 5605; GFX6-NEXT: s_mov_b32 m0, -1 5606; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5607; GFX6-NEXT: v_mov_b32_e32 v0, s2 5608; GFX6-NEXT: v_mov_b32_e32 v1, s1 5609; GFX6-NEXT: v_mov_b32_e32 v2, s0 5610; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5611; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5612; GFX6-NEXT: ds_write_b32 v0, v1 5613; GFX6-NEXT: s_endpgm 5614; 5615; GFX7-LABEL: local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: 5616; GFX7: ; %bb.0: ; %entry 5617; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5618; GFX7-NEXT: s_mov_b32 m0, -1 5619; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5620; GFX7-NEXT: v_mov_b32_e32 v0, s0 5621; GFX7-NEXT: v_mov_b32_e32 v1, s2 5622; GFX7-NEXT: v_mov_b32_e32 v2, s1 5623; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5624; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5625; GFX7-NEXT: ds_write_b32 v0, v1 5626; GFX7-NEXT: s_endpgm 5627; 5628; GFX10-WGP-LABEL: local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: 5629; GFX10-WGP: ; %bb.0: ; %entry 5630; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5631; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5632; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5633; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 5634; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 5635; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5636; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5637; GFX10-WGP-NEXT: ds_write_b32 v0, v1 5638; GFX10-WGP-NEXT: s_endpgm 5639; 5640; GFX10-CU-LABEL: local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: 5641; GFX10-CU: ; %bb.0: ; %entry 5642; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5643; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5644; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5645; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 5646; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 5647; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5648; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5649; GFX10-CU-NEXT: ds_write_b32 v0, v1 5650; GFX10-CU-NEXT: s_endpgm 5651; 5652; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: 5653; SKIP-CACHE-INV: ; %bb.0: ; %entry 5654; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 5655; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 5656; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 5657; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5658; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 5659; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 5660; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 5661; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5662; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5663; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 5664; SKIP-CACHE-INV-NEXT: s_endpgm 5665; 5666; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: 5667; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5668; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5669; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5670; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 5671; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5672; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 5673; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5674; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5675; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 5676; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5677; 5678; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: 5679; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5680; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5681; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5682; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 5683; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5684; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 5685; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5686; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5687; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 5688; GFX90A-TGSPLIT-NEXT: s_endpgm 5689; 5690; 5691 i32 addrspace(3)* %out, i32 %in, i32 %old) { 5692entry: 5693 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 5694 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel acquire 5695 %val0 = extractvalue { i32, i1 } %val, 0 5696 store i32 %val0, i32 addrspace(3)* %out, align 4 5697 ret void 5698} 5699 5700define amdgpu_kernel void @local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg( 5701; GFX6-LABEL: local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: 5702; GFX6: ; %bb.0: ; %entry 5703; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 5704; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 5705; GFX6-NEXT: s_mov_b32 m0, -1 5706; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5707; GFX6-NEXT: v_mov_b32_e32 v0, s2 5708; GFX6-NEXT: v_mov_b32_e32 v1, s1 5709; GFX6-NEXT: v_mov_b32_e32 v2, s0 5710; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5711; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5712; GFX6-NEXT: ds_write_b32 v0, v1 5713; GFX6-NEXT: s_endpgm 5714; 5715; GFX7-LABEL: local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: 5716; GFX7: ; %bb.0: ; %entry 5717; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5718; GFX7-NEXT: s_mov_b32 m0, -1 5719; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5720; GFX7-NEXT: v_mov_b32_e32 v0, s0 5721; GFX7-NEXT: v_mov_b32_e32 v1, s2 5722; GFX7-NEXT: v_mov_b32_e32 v2, s1 5723; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5724; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5725; GFX7-NEXT: ds_write_b32 v0, v1 5726; GFX7-NEXT: s_endpgm 5727; 5728; GFX10-WGP-LABEL: local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: 5729; GFX10-WGP: ; %bb.0: ; %entry 5730; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5731; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5732; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5733; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 5734; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 5735; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5736; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5737; GFX10-WGP-NEXT: ds_write_b32 v0, v1 5738; GFX10-WGP-NEXT: s_endpgm 5739; 5740; GFX10-CU-LABEL: local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: 5741; GFX10-CU: ; %bb.0: ; %entry 5742; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5743; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5744; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5745; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 5746; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 5747; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5748; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5749; GFX10-CU-NEXT: ds_write_b32 v0, v1 5750; GFX10-CU-NEXT: s_endpgm 5751; 5752; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: 5753; SKIP-CACHE-INV: ; %bb.0: ; %entry 5754; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 5755; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 5756; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 5757; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5758; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 5759; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 5760; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 5761; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5762; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5763; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 5764; SKIP-CACHE-INV-NEXT: s_endpgm 5765; 5766; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: 5767; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5768; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5769; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5770; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 5771; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5772; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 5773; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5774; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5775; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 5776; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5777; 5778; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: 5779; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5780; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5781; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5782; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 5783; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5784; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 5785; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5786; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5787; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 5788; GFX90A-TGSPLIT-NEXT: s_endpgm 5789; 5790; 5791 i32 addrspace(3)* %out, i32 %in, i32 %old) { 5792entry: 5793 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 5794 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst acquire 5795 %val0 = extractvalue { i32, i1 } %val, 0 5796 store i32 %val0, i32 addrspace(3)* %out, align 4 5797 ret void 5798} 5799 5800define amdgpu_kernel void @local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg( 5801; GFX6-LABEL: local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: 5802; GFX6: ; %bb.0: ; %entry 5803; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 5804; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 5805; GFX6-NEXT: s_mov_b32 m0, -1 5806; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5807; GFX6-NEXT: v_mov_b32_e32 v0, s2 5808; GFX6-NEXT: v_mov_b32_e32 v1, s1 5809; GFX6-NEXT: v_mov_b32_e32 v2, s0 5810; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5811; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5812; GFX6-NEXT: ds_write_b32 v0, v1 5813; GFX6-NEXT: s_endpgm 5814; 5815; GFX7-LABEL: local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: 5816; GFX7: ; %bb.0: ; %entry 5817; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5818; GFX7-NEXT: s_mov_b32 m0, -1 5819; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5820; GFX7-NEXT: v_mov_b32_e32 v0, s0 5821; GFX7-NEXT: v_mov_b32_e32 v1, s2 5822; GFX7-NEXT: v_mov_b32_e32 v2, s1 5823; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5824; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5825; GFX7-NEXT: ds_write_b32 v0, v1 5826; GFX7-NEXT: s_endpgm 5827; 5828; GFX10-WGP-LABEL: local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: 5829; GFX10-WGP: ; %bb.0: ; %entry 5830; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5831; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5832; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5833; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 5834; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 5835; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5836; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5837; GFX10-WGP-NEXT: ds_write_b32 v0, v1 5838; GFX10-WGP-NEXT: s_endpgm 5839; 5840; GFX10-CU-LABEL: local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: 5841; GFX10-CU: ; %bb.0: ; %entry 5842; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5843; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5844; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5845; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 5846; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 5847; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5848; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5849; GFX10-CU-NEXT: ds_write_b32 v0, v1 5850; GFX10-CU-NEXT: s_endpgm 5851; 5852; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: 5853; SKIP-CACHE-INV: ; %bb.0: ; %entry 5854; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 5855; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 5856; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 5857; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5858; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 5859; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 5860; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 5861; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5862; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5863; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 5864; SKIP-CACHE-INV-NEXT: s_endpgm 5865; 5866; GFX90A-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: 5867; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5868; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5869; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5870; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 5871; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5872; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 5873; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5874; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5875; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 5876; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5877; 5878; GFX90A-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: 5879; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5880; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5881; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5882; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 5883; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5884; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 5885; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 5886; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5887; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 5888; GFX90A-TGSPLIT-NEXT: s_endpgm 5889; 5890; 5891 i32 addrspace(3)* %out, i32 %in, i32 %old) { 5892entry: 5893 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 5894 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst seq_cst 5895 %val0 = extractvalue { i32, i1 } %val, 0 5896 store i32 %val0, i32 addrspace(3)* %out, align 4 5897 ret void 5898} 5899 5900