1; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s 3 4; GCN-LABEL: {{^}}system_monotonic_monotonic: 5; GCN-NOT: s_waitcnt vmcnt(0){{$}} 6; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 7; GCN-NOT: s_waitcnt vmcnt(0){{$}} 8; GCN-NOT: buffer_wbinvl1_vol 9define amdgpu_kernel void @system_monotonic_monotonic( 10 i32* %out, i32 %in, i32 %old) { 11entry: 12 %gep = getelementptr i32, i32* %out, i32 4 13 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in monotonic monotonic 14 ret void 15} 16 17; GCN-LABEL: {{^}}system_acquire_monotonic: 18; GCN-NOT: s_waitcnt vmcnt(0){{$}} 19; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 20; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 21; GFX8-NEXT: buffer_wbinvl1_vol 22define amdgpu_kernel void @system_acquire_monotonic( 23 i32* %out, i32 %in, i32 %old) { 24entry: 25 %gep = getelementptr i32, i32* %out, i32 4 26 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire monotonic 27 ret void 28} 29 30; GCN-LABEL: {{^}}system_release_monotonic: 31; GCN: s_waitcnt vmcnt(0){{$}} 32; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 33; GCN-NOT: s_waitcnt vmcnt(0){{$}} 34; GCN-NOT: buffer_wbinvl1_vol 35define amdgpu_kernel void @system_release_monotonic( 36 i32* %out, i32 %in, i32 %old) { 37entry: 38 %gep = getelementptr i32, i32* %out, i32 4 39 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release monotonic 40 ret void 41} 42 43; GCN-LABEL: {{^}}system_acq_rel_monotonic: 44; GCN: s_waitcnt vmcnt(0){{$}} 45; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 46; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 47; GFX8-NEXT: buffer_wbinvl1_vol 48define amdgpu_kernel void @system_acq_rel_monotonic( 49 i32* %out, i32 %in, i32 %old) { 50entry: 51 %gep = getelementptr i32, i32* %out, i32 4 52 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel monotonic 53 ret void 54} 55 56; GCN-LABEL: {{^}}system_seq_cst_monotonic: 57; GCN: s_waitcnt vmcnt(0){{$}} 58; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 59; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 60; GFX8-NEXT: buffer_wbinvl1_vol 61define amdgpu_kernel void @system_seq_cst_monotonic( 62 i32* %out, i32 %in, i32 %old) { 63entry: 64 %gep = getelementptr i32, i32* %out, i32 4 65 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst monotonic 66 ret void 67} 68 69; GCN-LABEL: {{^}}system_acquire_acquire: 70; GCN-NOT: s_waitcnt vmcnt(0){{$}} 71; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 72; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 73; GFX8-NEXT: buffer_wbinvl1_vol 74define amdgpu_kernel void @system_acquire_acquire( 75 i32* %out, i32 %in, i32 %old) { 76entry: 77 %gep = getelementptr i32, i32* %out, i32 4 78 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire acquire 79 ret void 80} 81 82; GCN-LABEL: {{^}}system_release_acquire: 83; GCN: s_waitcnt vmcnt(0){{$}} 84; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 85; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 86; GFX8-NEXT: buffer_wbinvl1_vol 87define amdgpu_kernel void @system_release_acquire( 88 i32* %out, i32 %in, i32 %old) { 89entry: 90 %gep = getelementptr i32, i32* %out, i32 4 91 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release acquire 92 ret void 93} 94 95; GCN-LABEL: {{^}}system_acq_rel_acquire: 96; GCN: s_waitcnt vmcnt(0){{$}} 97; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 98; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 99; GFX8-NEXT: buffer_wbinvl1_vol 100define amdgpu_kernel void @system_acq_rel_acquire( 101 i32* %out, i32 %in, i32 %old) { 102entry: 103 %gep = getelementptr i32, i32* %out, i32 4 104 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel acquire 105 ret void 106} 107 108; GCN-LABEL: {{^}}system_seq_cst_acquire: 109; GCN: s_waitcnt vmcnt(0){{$}} 110; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 111; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 112; GFX8-NEXT: buffer_wbinvl1_vol 113define amdgpu_kernel void @system_seq_cst_acquire( 114 i32* %out, i32 %in, i32 %old) { 115entry: 116 %gep = getelementptr i32, i32* %out, i32 4 117 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst acquire 118 ret void 119} 120 121; GCN-LABEL: {{^}}system_seq_cst_seq_cst: 122; GCN: s_waitcnt vmcnt(0){{$}} 123; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 124; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 125; GFX8-NEXT: buffer_wbinvl1_vol 126define amdgpu_kernel void @system_seq_cst_seq_cst( 127 i32* %out, i32 %in, i32 %old) { 128entry: 129 %gep = getelementptr i32, i32* %out, i32 4 130 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst 131 ret void 132} 133 134; GCN-LABEL: {{^}}singlethread_monotonic_monotonic: 135; GCN-NOT: s_waitcnt vmcnt(0){{$}} 136; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 137; GCN-NOT: s_waitcnt vmcnt(0){{$}} 138; GCN-NOT: buffer_wbinvl1_vol 139define amdgpu_kernel void @singlethread_monotonic_monotonic( 140 i32* %out, i32 %in, i32 %old) { 141entry: 142 %gep = getelementptr i32, i32* %out, i32 4 143 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic 144 ret void 145} 146 147; GCN-LABEL: {{^}}singlethread_acquire_monotonic: 148; GCN-NOT: s_waitcnt vmcnt(0){{$}} 149; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 150; GCN-NOT: s_waitcnt vmcnt(0){{$}} 151; GCN-NOT: buffer_wbinvl1_vol 152define amdgpu_kernel void @singlethread_acquire_monotonic( 153 i32* %out, i32 %in, i32 %old) { 154entry: 155 %gep = getelementptr i32, i32* %out, i32 4 156 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic 157 ret void 158} 159 160; GCN-LABEL: {{^}}singlethread_release_monotonic: 161; GCN-NOT: s_waitcnt vmcnt(0){{$}} 162; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 163; GCN-NOT: s_waitcnt vmcnt(0){{$}} 164; GCN-NOT: buffer_wbinvl1_vol 165define amdgpu_kernel void @singlethread_release_monotonic( 166 i32* %out, i32 %in, i32 %old) { 167entry: 168 %gep = getelementptr i32, i32* %out, i32 4 169 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic 170 ret void 171} 172 173; GCN-LABEL: {{^}}singlethread_acq_rel_monotonic: 174; GCN-NOT: s_waitcnt vmcnt(0){{$}} 175; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 176; GCN-NOT: s_waitcnt vmcnt(0){{$}} 177; GCN-NOT: buffer_wbinvl1_vol 178define amdgpu_kernel void @singlethread_acq_rel_monotonic( 179 i32* %out, i32 %in, i32 %old) { 180entry: 181 %gep = getelementptr i32, i32* %out, i32 4 182 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic 183 ret void 184} 185 186; GCN-LABEL: {{^}}singlethread_seq_cst_monotonic: 187; GCN-NOT: s_waitcnt vmcnt(0){{$}} 188; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 189; GCN-NOT: s_waitcnt vmcnt(0){{$}} 190; GCN-NOT: buffer_wbinvl1_vol 191define amdgpu_kernel void @singlethread_seq_cst_monotonic( 192 i32* %out, i32 %in, i32 %old) { 193entry: 194 %gep = getelementptr i32, i32* %out, i32 4 195 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic 196 ret void 197} 198 199; GCN-LABEL: {{^}}singlethread_acquire_acquire: 200; GCN-NOT: s_waitcnt vmcnt(0){{$}} 201; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 202; GCN-NOT: s_waitcnt vmcnt(0){{$}} 203; GCN-NOT: buffer_wbinvl1_vol 204define amdgpu_kernel void @singlethread_acquire_acquire( 205 i32* %out, i32 %in, i32 %old) { 206entry: 207 %gep = getelementptr i32, i32* %out, i32 4 208 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire 209 ret void 210} 211 212; GCN-LABEL: {{^}}singlethread_release_acquire: 213; GCN-NOT: s_waitcnt vmcnt(0){{$}} 214; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 215; GCN-NOT: s_waitcnt vmcnt(0){{$}} 216; GCN-NOT: buffer_wbinvl1_vol 217define amdgpu_kernel void @singlethread_release_acquire( 218 i32* %out, i32 %in, i32 %old) { 219entry: 220 %gep = getelementptr i32, i32* %out, i32 4 221 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire 222 ret void 223} 224 225; GCN-LABEL: {{^}}singlethread_acq_rel_acquire: 226; GCN-NOT: s_waitcnt vmcnt(0){{$}} 227; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 228; GCN-NOT: s_waitcnt vmcnt(0){{$}} 229; GCN-NOT: buffer_wbinvl1_vol 230define amdgpu_kernel void @singlethread_acq_rel_acquire( 231 i32* %out, i32 %in, i32 %old) { 232entry: 233 %gep = getelementptr i32, i32* %out, i32 4 234 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire 235 ret void 236} 237 238; GCN-LABEL: {{^}}singlethread_seq_cst_acquire: 239; GCN-NOT: s_waitcnt vmcnt(0){{$}} 240; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 241; GCN-NOT: s_waitcnt vmcnt(0){{$}} 242; GCN-NOT: buffer_wbinvl1_vol 243define amdgpu_kernel void @singlethread_seq_cst_acquire( 244 i32* %out, i32 %in, i32 %old) { 245entry: 246 %gep = getelementptr i32, i32* %out, i32 4 247 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire 248 ret void 249} 250 251; GCN-LABEL: {{^}}singlethread_seq_cst_seq_cst: 252; GCN-NOT: s_waitcnt vmcnt(0){{$}} 253; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 254; GCN-NOT: s_waitcnt vmcnt(0){{$}} 255; GCN-NOT: buffer_wbinvl1_vol 256define amdgpu_kernel void @singlethread_seq_cst_seq_cst( 257 i32* %out, i32 %in, i32 %old) { 258entry: 259 %gep = getelementptr i32, i32* %out, i32 4 260 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst 261 ret void 262} 263 264; GCN-LABEL: {{^}}agent_monotonic_monotonic: 265; GCN-NOT: s_waitcnt vmcnt(0){{$}} 266; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 267; GCN-NOT: s_waitcnt vmcnt(0){{$}} 268; GCN-NOT: buffer_wbinvl1_vol 269define amdgpu_kernel void @agent_monotonic_monotonic( 270 i32* %out, i32 %in, i32 %old) { 271entry: 272 %gep = getelementptr i32, i32* %out, i32 4 273 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic 274 ret void 275} 276 277; GCN-LABEL: {{^}}agent_acquire_monotonic: 278; GCN-NOT: s_waitcnt vmcnt(0){{$}} 279; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 280; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 281; GFX8-NEXT: buffer_wbinvl1_vol 282define amdgpu_kernel void @agent_acquire_monotonic( 283 i32* %out, i32 %in, i32 %old) { 284entry: 285 %gep = getelementptr i32, i32* %out, i32 4 286 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic 287 ret void 288} 289 290; GCN-LABEL: {{^}}agent_release_monotonic: 291; GCN: s_waitcnt vmcnt(0){{$}} 292; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 293; GCN-NOT: s_waitcnt vmcnt(0){{$}} 294; GCN-NOT: buffer_wbinvl1_vol 295define amdgpu_kernel void @agent_release_monotonic( 296 i32* %out, i32 %in, i32 %old) { 297entry: 298 %gep = getelementptr i32, i32* %out, i32 4 299 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release monotonic 300 ret void 301} 302 303; GCN-LABEL: {{^}}agent_acq_rel_monotonic: 304; GCN: s_waitcnt vmcnt(0){{$}} 305; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 306; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 307; GFX8-NEXT: buffer_wbinvl1_vol 308define amdgpu_kernel void @agent_acq_rel_monotonic( 309 i32* %out, i32 %in, i32 %old) { 310entry: 311 %gep = getelementptr i32, i32* %out, i32 4 312 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic 313 ret void 314} 315 316; GCN-LABEL: {{^}}agent_seq_cst_monotonic: 317; GCN: s_waitcnt vmcnt(0){{$}} 318; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 319; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 320; GFX8-NEXT: buffer_wbinvl1_vol 321define amdgpu_kernel void @agent_seq_cst_monotonic( 322 i32* %out, i32 %in, i32 %old) { 323entry: 324 %gep = getelementptr i32, i32* %out, i32 4 325 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic 326 ret void 327} 328 329; GCN-LABEL: {{^}}agent_acquire_acquire: 330; GCN-NOT: s_waitcnt vmcnt(0){{$}} 331; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 332; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 333; GFX8-NEXT: buffer_wbinvl1_vol 334define amdgpu_kernel void @agent_acquire_acquire( 335 i32* %out, i32 %in, i32 %old) { 336entry: 337 %gep = getelementptr i32, i32* %out, i32 4 338 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire 339 ret void 340} 341 342; GCN-LABEL: {{^}}agent_release_acquire: 343; GCN: s_waitcnt vmcnt(0){{$}} 344; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 345; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 346; GFX8-NEXT: buffer_wbinvl1_vol 347define amdgpu_kernel void @agent_release_acquire( 348 i32* %out, i32 %in, i32 %old) { 349entry: 350 %gep = getelementptr i32, i32* %out, i32 4 351 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release acquire 352 ret void 353} 354 355; GCN-LABEL: {{^}}agent_acq_rel_acquire: 356; GCN: s_waitcnt vmcnt(0){{$}} 357; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 358; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 359; GFX8-NEXT: buffer_wbinvl1_vol 360define amdgpu_kernel void @agent_acq_rel_acquire( 361 i32* %out, i32 %in, i32 %old) { 362entry: 363 %gep = getelementptr i32, i32* %out, i32 4 364 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire 365 ret void 366} 367 368; GCN-LABEL: {{^}}agent_seq_cst_acquire: 369; GCN: s_waitcnt vmcnt(0){{$}} 370; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 371; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 372; GFX8-NEXT: buffer_wbinvl1_vol 373define amdgpu_kernel void @agent_seq_cst_acquire( 374 i32* %out, i32 %in, i32 %old) { 375entry: 376 %gep = getelementptr i32, i32* %out, i32 4 377 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire 378 ret void 379} 380 381; GCN-LABEL: {{^}}agent_seq_cst_seq_cst: 382; GCN: s_waitcnt vmcnt(0){{$}} 383; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 384; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 385; GFX8-NEXT: buffer_wbinvl1_vol 386define amdgpu_kernel void @agent_seq_cst_seq_cst( 387 i32* %out, i32 %in, i32 %old) { 388entry: 389 %gep = getelementptr i32, i32* %out, i32 4 390 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst 391 ret void 392} 393 394; GCN-LABEL: {{^}}workgroup_monotonic_monotonic: 395; GCN-NOT: s_waitcnt vmcnt(0){{$}} 396; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 397; GCN-NOT: s_waitcnt vmcnt(0){{$}} 398; GCN-NOT: buffer_wbinvl1_vol 399define amdgpu_kernel void @workgroup_monotonic_monotonic( 400 i32* %out, i32 %in, i32 %old) { 401entry: 402 %gep = getelementptr i32, i32* %out, i32 4 403 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic 404 ret void 405} 406 407; GCN-LABEL: {{^}}workgroup_acquire_monotonic: 408; GCN-NOT: s_waitcnt vmcnt(0){{$}} 409; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 410; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 411; GFX8-NOT: buffer_wbinvl1_vol 412define amdgpu_kernel void @workgroup_acquire_monotonic( 413 i32* %out, i32 %in, i32 %old) { 414entry: 415 %gep = getelementptr i32, i32* %out, i32 4 416 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic 417 ret void 418} 419 420; GCN-LABEL: {{^}}workgroup_release_monotonic: 421; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 422; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 423; GCN-NOT: s_waitcnt vmcnt(0){{$}} 424; GCN-NOT: buffer_wbinvl1_vol 425define amdgpu_kernel void @workgroup_release_monotonic( 426 i32* %out, i32 %in, i32 %old) { 427entry: 428 %gep = getelementptr i32, i32* %out, i32 4 429 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic 430 ret void 431} 432 433; GCN-LABEL: {{^}}workgroup_acq_rel_monotonic: 434; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 435; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 436; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 437; GFX8-NOT: buffer_wbinvl1_vol 438define amdgpu_kernel void @workgroup_acq_rel_monotonic( 439 i32* %out, i32 %in, i32 %old) { 440entry: 441 %gep = getelementptr i32, i32* %out, i32 4 442 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic 443 ret void 444} 445 446; GCN-LABEL: {{^}}workgroup_seq_cst_monotonic: 447; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 448; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 449; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 450; GFX8-NOT: buffer_wbinvl1_vol 451define amdgpu_kernel void @workgroup_seq_cst_monotonic( 452 i32* %out, i32 %in, i32 %old) { 453entry: 454 %gep = getelementptr i32, i32* %out, i32 4 455 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic 456 ret void 457} 458 459; GCN-LABEL: {{^}}workgroup_acquire_acquire: 460; GCN-NOT: s_waitcnt vmcnt(0){{$}} 461; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 462; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 463; GFX8-NOT: buffer_wbinvl1_vol 464define amdgpu_kernel void @workgroup_acquire_acquire( 465 i32* %out, i32 %in, i32 %old) { 466entry: 467 %gep = getelementptr i32, i32* %out, i32 4 468 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire 469 ret void 470} 471 472; GCN-LABEL: {{^}}workgroup_release_acquire: 473; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 474; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 475; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 476; GFX8-NOT: buffer_wbinvl1_vol 477define amdgpu_kernel void @workgroup_release_acquire( 478 i32* %out, i32 %in, i32 %old) { 479entry: 480 %gep = getelementptr i32, i32* %out, i32 4 481 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire 482 ret void 483} 484 485; GCN-LABEL: {{^}}workgroup_acq_rel_acquire: 486; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 487; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 488; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 489; GFX8-NOT: buffer_wbinvl1_vol 490define amdgpu_kernel void @workgroup_acq_rel_acquire( 491 i32* %out, i32 %in, i32 %old) { 492entry: 493 %gep = getelementptr i32, i32* %out, i32 4 494 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire 495 ret void 496} 497 498; GCN-LABEL: {{^}}workgroup_seq_cst_acquire: 499; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 500; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 501; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 502; GFX8-NOT: buffer_wbinvl1_vol 503define amdgpu_kernel void @workgroup_seq_cst_acquire( 504 i32* %out, i32 %in, i32 %old) { 505entry: 506 %gep = getelementptr i32, i32* %out, i32 4 507 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire 508 ret void 509} 510 511; GCN-LABEL: {{^}}workgroup_seq_cst_seq_cst: 512; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 513; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 514; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 515; GFX8-NOT: buffer_wbinvl1_vol 516define amdgpu_kernel void @workgroup_seq_cst_seq_cst( 517 i32* %out, i32 %in, i32 %old) { 518entry: 519 %gep = getelementptr i32, i32* %out, i32 4 520 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst 521 ret void 522} 523 524; GCN-LABEL: {{^}}wavefront_monotonic_monotonic: 525; GCN-NOT: s_waitcnt vmcnt(0){{$}} 526; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 527; GCN-NOT: s_waitcnt vmcnt(0){{$}} 528; GCN-NOT: buffer_wbinvl1_vol 529define amdgpu_kernel void @wavefront_monotonic_monotonic( 530 i32* %out, i32 %in, i32 %old) { 531entry: 532 %gep = getelementptr i32, i32* %out, i32 4 533 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic 534 ret void 535} 536 537; GCN-LABEL: {{^}}wavefront_acquire_monotonic: 538; GCN-NOT: s_waitcnt vmcnt(0){{$}} 539; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 540; GCN-NOT: s_waitcnt vmcnt(0){{$}} 541; GCN-NOT: buffer_wbinvl1_vol 542define amdgpu_kernel void @wavefront_acquire_monotonic( 543 i32* %out, i32 %in, i32 %old) { 544entry: 545 %gep = getelementptr i32, i32* %out, i32 4 546 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic 547 ret void 548} 549 550; GCN-LABEL: {{^}}wavefront_release_monotonic: 551; GCN-NOT: s_waitcnt vmcnt(0){{$}} 552; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 553; GCN-NOT: s_waitcnt vmcnt(0){{$}} 554; GCN-NOT: buffer_wbinvl1_vol 555define amdgpu_kernel void @wavefront_release_monotonic( 556 i32* %out, i32 %in, i32 %old) { 557entry: 558 %gep = getelementptr i32, i32* %out, i32 4 559 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic 560 ret void 561} 562 563; GCN-LABEL: {{^}}wavefront_acq_rel_monotonic: 564; GCN-NOT: s_waitcnt vmcnt(0){{$}} 565; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 566; GCN-NOT: s_waitcnt vmcnt(0){{$}} 567; GCN-NOT: buffer_wbinvl1_vol 568define amdgpu_kernel void @wavefront_acq_rel_monotonic( 569 i32* %out, i32 %in, i32 %old) { 570entry: 571 %gep = getelementptr i32, i32* %out, i32 4 572 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic 573 ret void 574} 575 576; GCN-LABEL: {{^}}wavefront_seq_cst_monotonic: 577; GCN-NOT: s_waitcnt vmcnt(0){{$}} 578; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 579; GCN-NOT: s_waitcnt vmcnt(0){{$}} 580; GCN-NOT: buffer_wbinvl1_vol 581define amdgpu_kernel void @wavefront_seq_cst_monotonic( 582 i32* %out, i32 %in, i32 %old) { 583entry: 584 %gep = getelementptr i32, i32* %out, i32 4 585 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic 586 ret void 587} 588 589; GCN-LABEL: {{^}}wavefront_acquire_acquire: 590; GCN-NOT: s_waitcnt vmcnt(0){{$}} 591; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 592; GCN-NOT: s_waitcnt vmcnt(0){{$}} 593; GCN-NOT: buffer_wbinvl1_vol 594define amdgpu_kernel void @wavefront_acquire_acquire( 595 i32* %out, i32 %in, i32 %old) { 596entry: 597 %gep = getelementptr i32, i32* %out, i32 4 598 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire 599 ret void 600} 601 602; GCN-LABEL: {{^}}wavefront_release_acquire: 603; GCN-NOT: s_waitcnt vmcnt(0){{$}} 604; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 605; GCN-NOT: s_waitcnt vmcnt(0){{$}} 606; GCN-NOT: buffer_wbinvl1_vol 607define amdgpu_kernel void @wavefront_release_acquire( 608 i32* %out, i32 %in, i32 %old) { 609entry: 610 %gep = getelementptr i32, i32* %out, i32 4 611 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release acquire 612 ret void 613} 614 615; GCN-LABEL: {{^}}wavefront_acq_rel_acquire: 616; GCN-NOT: s_waitcnt vmcnt(0){{$}} 617; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 618; GCN-NOT: s_waitcnt vmcnt(0){{$}} 619; GCN-NOT: buffer_wbinvl1_vol 620define amdgpu_kernel void @wavefront_acq_rel_acquire( 621 i32* %out, i32 %in, i32 %old) { 622entry: 623 %gep = getelementptr i32, i32* %out, i32 4 624 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire 625 ret void 626} 627 628; GCN-LABEL: {{^}}wavefront_seq_cst_acquire: 629; GCN-NOT: s_waitcnt vmcnt(0){{$}} 630; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 631; GCN-NOT: s_waitcnt vmcnt(0){{$}} 632; GCN-NOT: buffer_wbinvl1_vol 633define amdgpu_kernel void @wavefront_seq_cst_acquire( 634 i32* %out, i32 %in, i32 %old) { 635entry: 636 %gep = getelementptr i32, i32* %out, i32 4 637 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire 638 ret void 639} 640 641; GCN-LABEL: {{^}}wavefront_seq_cst_seq_cst: 642; GCN-NOT: s_waitcnt vmcnt(0){{$}} 643; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 644; GCN-NOT: s_waitcnt vmcnt(0){{$}} 645; GCN-NOT: buffer_wbinvl1_vol 646define amdgpu_kernel void @wavefront_seq_cst_seq_cst( 647 i32* %out, i32 %in, i32 %old) { 648entry: 649 %gep = getelementptr i32, i32* %out, i32 4 650 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst 651 ret void 652} 653