1; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
3
4; GCN-LABEL: {{^}}system_monotonic_monotonic:
5; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
6; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
7; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
8; GCN-NOT:   buffer_wbinvl1_vol
9define amdgpu_kernel void @system_monotonic_monotonic(
10    i32* %out, i32 %in, i32 %old) {
11entry:
12  %gep = getelementptr i32, i32* %out, i32 4
13  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in monotonic monotonic
14  ret void
15}
16
17; GCN-LABEL: {{^}}system_acquire_monotonic:
18; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
19; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
20; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
21; GFX8-NEXT:  buffer_wbinvl1_vol
22define amdgpu_kernel void @system_acquire_monotonic(
23    i32* %out, i32 %in, i32 %old) {
24entry:
25  %gep = getelementptr i32, i32* %out, i32 4
26  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire monotonic
27  ret void
28}
29
30; GCN-LABEL: {{^}}system_release_monotonic:
31; GCN:        s_waitcnt vmcnt(0){{$}}
32; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
33; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
34; GCN-NOT:    buffer_wbinvl1_vol
35define amdgpu_kernel void @system_release_monotonic(
36    i32* %out, i32 %in, i32 %old) {
37entry:
38  %gep = getelementptr i32, i32* %out, i32 4
39  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release monotonic
40  ret void
41}
42
43; GCN-LABEL: {{^}}system_acq_rel_monotonic:
44; GCN:        s_waitcnt vmcnt(0){{$}}
45; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
46; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
47; GFX8-NEXT:  buffer_wbinvl1_vol
48define amdgpu_kernel void @system_acq_rel_monotonic(
49    i32* %out, i32 %in, i32 %old) {
50entry:
51  %gep = getelementptr i32, i32* %out, i32 4
52  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel monotonic
53  ret void
54}
55
56; GCN-LABEL: {{^}}system_seq_cst_monotonic:
57; GCN:        s_waitcnt vmcnt(0){{$}}
58; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
59; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
60; GFX8-NEXT:  buffer_wbinvl1_vol
61define amdgpu_kernel void @system_seq_cst_monotonic(
62    i32* %out, i32 %in, i32 %old) {
63entry:
64  %gep = getelementptr i32, i32* %out, i32 4
65  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst monotonic
66  ret void
67}
68
69; GCN-LABEL: {{^}}system_acquire_acquire:
70; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
71; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
72; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
73; GFX8-NEXT:  buffer_wbinvl1_vol
74define amdgpu_kernel void @system_acquire_acquire(
75    i32* %out, i32 %in, i32 %old) {
76entry:
77  %gep = getelementptr i32, i32* %out, i32 4
78  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire acquire
79  ret void
80}
81
82; GCN-LABEL: {{^}}system_release_acquire:
83; GCN:        s_waitcnt vmcnt(0){{$}}
84; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
85; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
86; GFX8-NEXT:  buffer_wbinvl1_vol
87define amdgpu_kernel void @system_release_acquire(
88    i32* %out, i32 %in, i32 %old) {
89entry:
90  %gep = getelementptr i32, i32* %out, i32 4
91  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release acquire
92  ret void
93}
94
95; GCN-LABEL: {{^}}system_acq_rel_acquire:
96; GCN:        s_waitcnt vmcnt(0){{$}}
97; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
98; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
99; GFX8-NEXT:  buffer_wbinvl1_vol
100define amdgpu_kernel void @system_acq_rel_acquire(
101    i32* %out, i32 %in, i32 %old) {
102entry:
103  %gep = getelementptr i32, i32* %out, i32 4
104  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel acquire
105  ret void
106}
107
108; GCN-LABEL: {{^}}system_seq_cst_acquire:
109; GCN:        s_waitcnt vmcnt(0){{$}}
110; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
111; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
112; GFX8-NEXT:  buffer_wbinvl1_vol
113define amdgpu_kernel void @system_seq_cst_acquire(
114    i32* %out, i32 %in, i32 %old) {
115entry:
116  %gep = getelementptr i32, i32* %out, i32 4
117  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst acquire
118  ret void
119}
120
121; GCN-LABEL: {{^}}system_seq_cst_seq_cst:
122; GCN:        s_waitcnt vmcnt(0){{$}}
123; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
124; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
125; GFX8-NEXT:  buffer_wbinvl1_vol
126define amdgpu_kernel void @system_seq_cst_seq_cst(
127    i32* %out, i32 %in, i32 %old) {
128entry:
129  %gep = getelementptr i32, i32* %out, i32 4
130  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
131  ret void
132}
133
134; GCN-LABEL: {{^}}singlethread_monotonic_monotonic:
135; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
136; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
137; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
138; GCN-NOT:   buffer_wbinvl1_vol
139define amdgpu_kernel void @singlethread_monotonic_monotonic(
140    i32* %out, i32 %in, i32 %old) {
141entry:
142  %gep = getelementptr i32, i32* %out, i32 4
143  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic
144  ret void
145}
146
147; GCN-LABEL: {{^}}singlethread_acquire_monotonic:
148; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
149; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
150; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
151; GCN-NOT:   buffer_wbinvl1_vol
152define amdgpu_kernel void @singlethread_acquire_monotonic(
153    i32* %out, i32 %in, i32 %old) {
154entry:
155  %gep = getelementptr i32, i32* %out, i32 4
156  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic
157  ret void
158}
159
160; GCN-LABEL: {{^}}singlethread_release_monotonic:
161; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
162; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
163; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
164; GCN-NOT:   buffer_wbinvl1_vol
165define amdgpu_kernel void @singlethread_release_monotonic(
166    i32* %out, i32 %in, i32 %old) {
167entry:
168  %gep = getelementptr i32, i32* %out, i32 4
169  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic
170  ret void
171}
172
173; GCN-LABEL: {{^}}singlethread_acq_rel_monotonic:
174; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
175; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
176; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
177; GCN-NOT:   buffer_wbinvl1_vol
178define amdgpu_kernel void @singlethread_acq_rel_monotonic(
179    i32* %out, i32 %in, i32 %old) {
180entry:
181  %gep = getelementptr i32, i32* %out, i32 4
182  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic
183  ret void
184}
185
186; GCN-LABEL: {{^}}singlethread_seq_cst_monotonic:
187; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
188; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
189; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
190; GCN-NOT:   buffer_wbinvl1_vol
191define amdgpu_kernel void @singlethread_seq_cst_monotonic(
192    i32* %out, i32 %in, i32 %old) {
193entry:
194  %gep = getelementptr i32, i32* %out, i32 4
195  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic
196  ret void
197}
198
199; GCN-LABEL: {{^}}singlethread_acquire_acquire:
200; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
201; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
202; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
203; GCN-NOT:   buffer_wbinvl1_vol
204define amdgpu_kernel void @singlethread_acquire_acquire(
205    i32* %out, i32 %in, i32 %old) {
206entry:
207  %gep = getelementptr i32, i32* %out, i32 4
208  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire
209  ret void
210}
211
212; GCN-LABEL: {{^}}singlethread_release_acquire:
213; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
214; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
215; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
216; GCN-NOT:   buffer_wbinvl1_vol
217define amdgpu_kernel void @singlethread_release_acquire(
218    i32* %out, i32 %in, i32 %old) {
219entry:
220  %gep = getelementptr i32, i32* %out, i32 4
221  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire
222  ret void
223}
224
225; GCN-LABEL: {{^}}singlethread_acq_rel_acquire:
226; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
227; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
228; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
229; GCN-NOT:   buffer_wbinvl1_vol
230define amdgpu_kernel void @singlethread_acq_rel_acquire(
231    i32* %out, i32 %in, i32 %old) {
232entry:
233  %gep = getelementptr i32, i32* %out, i32 4
234  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire
235  ret void
236}
237
238; GCN-LABEL: {{^}}singlethread_seq_cst_acquire:
239; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
240; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
241; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
242; GCN-NOT:   buffer_wbinvl1_vol
243define amdgpu_kernel void @singlethread_seq_cst_acquire(
244    i32* %out, i32 %in, i32 %old) {
245entry:
246  %gep = getelementptr i32, i32* %out, i32 4
247  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire
248  ret void
249}
250
251; GCN-LABEL: {{^}}singlethread_seq_cst_seq_cst:
252; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
253; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
254; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
255; GCN-NOT:   buffer_wbinvl1_vol
256define amdgpu_kernel void @singlethread_seq_cst_seq_cst(
257    i32* %out, i32 %in, i32 %old) {
258entry:
259  %gep = getelementptr i32, i32* %out, i32 4
260  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst
261  ret void
262}
263
264; GCN-LABEL: {{^}}agent_monotonic_monotonic:
265; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
266; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
267; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
268; GCN-NOT:   buffer_wbinvl1_vol
269define amdgpu_kernel void @agent_monotonic_monotonic(
270    i32* %out, i32 %in, i32 %old) {
271entry:
272  %gep = getelementptr i32, i32* %out, i32 4
273  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic
274  ret void
275}
276
277; GCN-LABEL: {{^}}agent_acquire_monotonic:
278; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
279; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
280; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
281; GFX8-NEXT:  buffer_wbinvl1_vol
282define amdgpu_kernel void @agent_acquire_monotonic(
283    i32* %out, i32 %in, i32 %old) {
284entry:
285  %gep = getelementptr i32, i32* %out, i32 4
286  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic
287  ret void
288}
289
290; GCN-LABEL: {{^}}agent_release_monotonic:
291; GCN:        s_waitcnt vmcnt(0){{$}}
292; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
293; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
294; GCN-NOT:    buffer_wbinvl1_vol
295define amdgpu_kernel void @agent_release_monotonic(
296    i32* %out, i32 %in, i32 %old) {
297entry:
298  %gep = getelementptr i32, i32* %out, i32 4
299  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release monotonic
300  ret void
301}
302
303; GCN-LABEL: {{^}}agent_acq_rel_monotonic:
304; GCN:        s_waitcnt vmcnt(0){{$}}
305; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
306; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
307; GFX8-NEXT:  buffer_wbinvl1_vol
308define amdgpu_kernel void @agent_acq_rel_monotonic(
309    i32* %out, i32 %in, i32 %old) {
310entry:
311  %gep = getelementptr i32, i32* %out, i32 4
312  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic
313  ret void
314}
315
316; GCN-LABEL: {{^}}agent_seq_cst_monotonic:
317; GCN:        s_waitcnt vmcnt(0){{$}}
318; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
319; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
320; GFX8-NEXT:  buffer_wbinvl1_vol
321define amdgpu_kernel void @agent_seq_cst_monotonic(
322    i32* %out, i32 %in, i32 %old) {
323entry:
324  %gep = getelementptr i32, i32* %out, i32 4
325  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic
326  ret void
327}
328
329; GCN-LABEL: {{^}}agent_acquire_acquire:
330; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
331; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
332; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
333; GFX8-NEXT:  buffer_wbinvl1_vol
334define amdgpu_kernel void @agent_acquire_acquire(
335    i32* %out, i32 %in, i32 %old) {
336entry:
337  %gep = getelementptr i32, i32* %out, i32 4
338  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire
339  ret void
340}
341
342; GCN-LABEL: {{^}}agent_release_acquire:
343; GCN:        s_waitcnt vmcnt(0){{$}}
344; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
345; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
346; GFX8-NEXT:  buffer_wbinvl1_vol
347define amdgpu_kernel void @agent_release_acquire(
348    i32* %out, i32 %in, i32 %old) {
349entry:
350  %gep = getelementptr i32, i32* %out, i32 4
351  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release acquire
352  ret void
353}
354
355; GCN-LABEL: {{^}}agent_acq_rel_acquire:
356; GCN:        s_waitcnt vmcnt(0){{$}}
357; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
358; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
359; GFX8-NEXT:  buffer_wbinvl1_vol
360define amdgpu_kernel void @agent_acq_rel_acquire(
361    i32* %out, i32 %in, i32 %old) {
362entry:
363  %gep = getelementptr i32, i32* %out, i32 4
364  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire
365  ret void
366}
367
368; GCN-LABEL: {{^}}agent_seq_cst_acquire:
369; GCN:        s_waitcnt vmcnt(0){{$}}
370; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
371; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
372; GFX8-NEXT:  buffer_wbinvl1_vol
373define amdgpu_kernel void @agent_seq_cst_acquire(
374    i32* %out, i32 %in, i32 %old) {
375entry:
376  %gep = getelementptr i32, i32* %out, i32 4
377  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire
378  ret void
379}
380
381; GCN-LABEL: {{^}}agent_seq_cst_seq_cst:
382; GCN:        s_waitcnt vmcnt(0){{$}}
383; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
384; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
385; GFX8-NEXT:  buffer_wbinvl1_vol
386define amdgpu_kernel void @agent_seq_cst_seq_cst(
387    i32* %out, i32 %in, i32 %old) {
388entry:
389  %gep = getelementptr i32, i32* %out, i32 4
390  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
391  ret void
392}
393
394; GCN-LABEL: {{^}}workgroup_monotonic_monotonic:
395; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
396; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
397; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
398; GCN-NOT:   buffer_wbinvl1_vol
399define amdgpu_kernel void @workgroup_monotonic_monotonic(
400    i32* %out, i32 %in, i32 %old) {
401entry:
402  %gep = getelementptr i32, i32* %out, i32 4
403  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic
404  ret void
405}
406
407; GCN-LABEL: {{^}}workgroup_acquire_monotonic:
408; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
409; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
410; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
411; GFX8-NOT:   buffer_wbinvl1_vol
412define amdgpu_kernel void @workgroup_acquire_monotonic(
413    i32* %out, i32 %in, i32 %old) {
414entry:
415  %gep = getelementptr i32, i32* %out, i32 4
416  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic
417  ret void
418}
419
420; GCN-LABEL: {{^}}workgroup_release_monotonic:
421; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
422; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
423; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
424; GCN-NOT:    buffer_wbinvl1_vol
425define amdgpu_kernel void @workgroup_release_monotonic(
426    i32* %out, i32 %in, i32 %old) {
427entry:
428  %gep = getelementptr i32, i32* %out, i32 4
429  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic
430  ret void
431}
432
433; GCN-LABEL: {{^}}workgroup_acq_rel_monotonic:
434; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
435; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
436; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
437; GFX8-NOT:   buffer_wbinvl1_vol
438define amdgpu_kernel void @workgroup_acq_rel_monotonic(
439    i32* %out, i32 %in, i32 %old) {
440entry:
441  %gep = getelementptr i32, i32* %out, i32 4
442  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic
443  ret void
444}
445
446; GCN-LABEL: {{^}}workgroup_seq_cst_monotonic:
447; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
448; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
449; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
450; GFX8-NOT:   buffer_wbinvl1_vol
451define amdgpu_kernel void @workgroup_seq_cst_monotonic(
452    i32* %out, i32 %in, i32 %old) {
453entry:
454  %gep = getelementptr i32, i32* %out, i32 4
455  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic
456  ret void
457}
458
459; GCN-LABEL: {{^}}workgroup_acquire_acquire:
460; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
461; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
462; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
463; GFX8-NOT:   buffer_wbinvl1_vol
464define amdgpu_kernel void @workgroup_acquire_acquire(
465    i32* %out, i32 %in, i32 %old) {
466entry:
467  %gep = getelementptr i32, i32* %out, i32 4
468  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire
469  ret void
470}
471
472; GCN-LABEL: {{^}}workgroup_release_acquire:
473; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
474; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
475; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
476; GFX8-NOT:   buffer_wbinvl1_vol
477define amdgpu_kernel void @workgroup_release_acquire(
478    i32* %out, i32 %in, i32 %old) {
479entry:
480  %gep = getelementptr i32, i32* %out, i32 4
481  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire
482  ret void
483}
484
485; GCN-LABEL: {{^}}workgroup_acq_rel_acquire:
486; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
487; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
488; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
489; GFX8-NOT:   buffer_wbinvl1_vol
490define amdgpu_kernel void @workgroup_acq_rel_acquire(
491    i32* %out, i32 %in, i32 %old) {
492entry:
493  %gep = getelementptr i32, i32* %out, i32 4
494  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire
495  ret void
496}
497
498; GCN-LABEL: {{^}}workgroup_seq_cst_acquire:
499; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
500; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
501; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
502; GFX8-NOT:   buffer_wbinvl1_vol
503define amdgpu_kernel void @workgroup_seq_cst_acquire(
504    i32* %out, i32 %in, i32 %old) {
505entry:
506  %gep = getelementptr i32, i32* %out, i32 4
507  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire
508  ret void
509}
510
511; GCN-LABEL: {{^}}workgroup_seq_cst_seq_cst:
512; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
513; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
514; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
515; GFX8-NOT:   buffer_wbinvl1_vol
516define amdgpu_kernel void @workgroup_seq_cst_seq_cst(
517    i32* %out, i32 %in, i32 %old) {
518entry:
519  %gep = getelementptr i32, i32* %out, i32 4
520  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst
521  ret void
522}
523
524; GCN-LABEL: {{^}}wavefront_monotonic_monotonic:
525; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
526; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
527; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
528; GCN-NOT:   buffer_wbinvl1_vol
529define amdgpu_kernel void @wavefront_monotonic_monotonic(
530    i32* %out, i32 %in, i32 %old) {
531entry:
532  %gep = getelementptr i32, i32* %out, i32 4
533  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic
534  ret void
535}
536
537; GCN-LABEL: {{^}}wavefront_acquire_monotonic:
538; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
539; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
540; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
541; GCN-NOT:   buffer_wbinvl1_vol
542define amdgpu_kernel void @wavefront_acquire_monotonic(
543    i32* %out, i32 %in, i32 %old) {
544entry:
545  %gep = getelementptr i32, i32* %out, i32 4
546  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic
547  ret void
548}
549
550; GCN-LABEL: {{^}}wavefront_release_monotonic:
551; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
552; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
553; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
554; GCN-NOT:   buffer_wbinvl1_vol
555define amdgpu_kernel void @wavefront_release_monotonic(
556    i32* %out, i32 %in, i32 %old) {
557entry:
558  %gep = getelementptr i32, i32* %out, i32 4
559  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic
560  ret void
561}
562
563; GCN-LABEL: {{^}}wavefront_acq_rel_monotonic:
564; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
565; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
566; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
567; GCN-NOT:   buffer_wbinvl1_vol
568define amdgpu_kernel void @wavefront_acq_rel_monotonic(
569    i32* %out, i32 %in, i32 %old) {
570entry:
571  %gep = getelementptr i32, i32* %out, i32 4
572  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic
573  ret void
574}
575
576; GCN-LABEL: {{^}}wavefront_seq_cst_monotonic:
577; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
578; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
579; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
580; GCN-NOT:   buffer_wbinvl1_vol
581define amdgpu_kernel void @wavefront_seq_cst_monotonic(
582    i32* %out, i32 %in, i32 %old) {
583entry:
584  %gep = getelementptr i32, i32* %out, i32 4
585  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic
586  ret void
587}
588
589; GCN-LABEL: {{^}}wavefront_acquire_acquire:
590; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
591; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
592; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
593; GCN-NOT:   buffer_wbinvl1_vol
594define amdgpu_kernel void @wavefront_acquire_acquire(
595    i32* %out, i32 %in, i32 %old) {
596entry:
597  %gep = getelementptr i32, i32* %out, i32 4
598  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire
599  ret void
600}
601
602; GCN-LABEL: {{^}}wavefront_release_acquire:
603; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
604; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
605; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
606; GCN-NOT:   buffer_wbinvl1_vol
607define amdgpu_kernel void @wavefront_release_acquire(
608    i32* %out, i32 %in, i32 %old) {
609entry:
610  %gep = getelementptr i32, i32* %out, i32 4
611  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release acquire
612  ret void
613}
614
615; GCN-LABEL: {{^}}wavefront_acq_rel_acquire:
616; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
617; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
618; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
619; GCN-NOT:   buffer_wbinvl1_vol
620define amdgpu_kernel void @wavefront_acq_rel_acquire(
621    i32* %out, i32 %in, i32 %old) {
622entry:
623  %gep = getelementptr i32, i32* %out, i32 4
624  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire
625  ret void
626}
627
628; GCN-LABEL: {{^}}wavefront_seq_cst_acquire:
629; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
630; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
631; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
632; GCN-NOT:   buffer_wbinvl1_vol
633define amdgpu_kernel void @wavefront_seq_cst_acquire(
634    i32* %out, i32 %in, i32 %old) {
635entry:
636  %gep = getelementptr i32, i32* %out, i32 4
637  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire
638  ret void
639}
640
641; GCN-LABEL: {{^}}wavefront_seq_cst_seq_cst:
642; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
643; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
644; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
645; GCN-NOT:   buffer_wbinvl1_vol
646define amdgpu_kernel void @wavefront_seq_cst_seq_cst(
647    i32* %out, i32 %in, i32 %old) {
648entry:
649  %gep = getelementptr i32, i32* %out, i32 4
650  %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst
651  ret void
652}
653