1; RUN: llc -march=amdgcn -mcpu=bonaire -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s
2; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-atomic-optimizations=false -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
4
5; GCN-LABEL: {{^}}atomic_add_i64_offset:
6; CIVI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
7
8; GFX9: global_atomic_add_x2 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:32{{$}}
9define amdgpu_kernel void @atomic_add_i64_offset(i64 addrspace(1)* %out, i64 %in) {
10entry:
11  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
12  %tmp0 = atomicrmw volatile add i64 addrspace(1)* %gep, i64 %in seq_cst
13  ret void
14}
15
16; GCN-LABEL: {{^}}atomic_add_i64_ret_offset:
17; CIVI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
18; CIVI: buffer_store_dwordx2 [[RET]]
19
20; GFX9: global_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:32 glc{{$}}
21define amdgpu_kernel void @atomic_add_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
22entry:
23  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
24  %tmp0 = atomicrmw volatile add i64 addrspace(1)* %gep, i64 %in seq_cst
25  store i64 %tmp0, i64 addrspace(1)* %out2
26  ret void
27}
28
29; GCN-LABEL: {{^}}atomic_add_i64_addr64_offset:
30; CI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
31; VI: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
32; GFX9: global_atomic_add_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
33define amdgpu_kernel void @atomic_add_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
34entry:
35  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
36  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
37  %tmp0 = atomicrmw volatile add i64 addrspace(1)* %gep, i64 %in seq_cst
38  ret void
39}
40
41; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64_offset:
42; CI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
43; VI: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
44; CIVI: buffer_store_dwordx2 [[RET]]
45
46; GFX9: global_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
47define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
48entry:
49  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
50  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
51  %tmp0 = atomicrmw volatile add i64 addrspace(1)* %gep, i64 %in seq_cst
52  store i64 %tmp0, i64 addrspace(1)* %out2
53  ret void
54}
55
56; GCN-LABEL: {{^}}atomic_add_i64:
57; SIVI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
58; GFX9: global_atomic_add_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]$}}
59define amdgpu_kernel void @atomic_add_i64(i64 addrspace(1)* %out, i64 %in) {
60entry:
61  %tmp0 = atomicrmw volatile add i64 addrspace(1)* %out, i64 %in seq_cst
62  ret void
63}
64
65; GCN-LABEL: {{^}}atomic_add_i64_ret:
66; CIVI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
67; CIVI: buffer_store_dwordx2 [[RET]]
68
69; GFX9: global_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
70define amdgpu_kernel void @atomic_add_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
71entry:
72  %tmp0 = atomicrmw volatile add i64 addrspace(1)* %out, i64 %in seq_cst
73  store i64 %tmp0, i64 addrspace(1)* %out2
74  ret void
75}
76
77; GCN-LABEL: {{^}}atomic_add_i64_addr64:
78; CI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
79; VI: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
80; GFX9: global_atomic_add_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
81define amdgpu_kernel void @atomic_add_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
82entry:
83  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
84  %tmp0 = atomicrmw volatile add i64 addrspace(1)* %ptr, i64 %in seq_cst
85  ret void
86}
87
88; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64:
89; CI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
90; VI: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
91; CIVI: buffer_store_dwordx2 [[RET]]
92
93; GFX9: global_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
94define amdgpu_kernel void @atomic_add_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
95entry:
96  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
97  %tmp0 = atomicrmw volatile add i64 addrspace(1)* %ptr, i64 %in seq_cst
98  store i64 %tmp0, i64 addrspace(1)* %out2
99  ret void
100}
101
102; GCN-LABEL: {{^}}atomic_and_i64_offset:
103; CIVI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
104; GFX9: global_atomic_and_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
105define amdgpu_kernel void @atomic_and_i64_offset(i64 addrspace(1)* %out, i64 %in) {
106entry:
107  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
108  %tmp0 = atomicrmw volatile and i64 addrspace(1)* %gep, i64 %in seq_cst
109  ret void
110}
111
112; GCN-LABEL: {{^}}atomic_and_i64_ret_offset:
113; CIVI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
114; CIVI: buffer_store_dwordx2 [[RET]]
115
116; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
117define amdgpu_kernel void @atomic_and_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
118entry:
119  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
120  %tmp0 = atomicrmw volatile and i64 addrspace(1)* %gep, i64 %in seq_cst
121  store i64 %tmp0, i64 addrspace(1)* %out2
122  ret void
123}
124
125; GCN-LABEL: {{^}}atomic_and_i64_addr64_offset:
126; CI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
127; VI: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
128; GFX9: global_atomic_and_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
129define amdgpu_kernel void @atomic_and_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
130entry:
131  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
132  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
133  %tmp0 = atomicrmw volatile and i64 addrspace(1)* %gep, i64 %in seq_cst
134  ret void
135}
136
137; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64_offset:
138; CI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
139; VI: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
140; CIVI: buffer_store_dwordx2 [[RET]]
141
142; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
143define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
144entry:
145  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
146  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
147  %tmp0 = atomicrmw volatile and i64 addrspace(1)* %gep, i64 %in seq_cst
148  store i64 %tmp0, i64 addrspace(1)* %out2
149  ret void
150}
151
152; GCN-LABEL: {{^}}atomic_and_i64:
153; CIVI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
154; GFX9: global_atomic_and_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]$}}
155define amdgpu_kernel void @atomic_and_i64(i64 addrspace(1)* %out, i64 %in) {
156entry:
157  %tmp0 = atomicrmw volatile and i64 addrspace(1)* %out, i64 %in seq_cst
158  ret void
159}
160
161; GCN-LABEL: {{^}}atomic_and_i64_ret:
162; CIVI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
163; CIVI: buffer_store_dwordx2 [[RET]]
164
165; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
166define amdgpu_kernel void @atomic_and_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
167entry:
168  %tmp0 = atomicrmw volatile and i64 addrspace(1)* %out, i64 %in seq_cst
169  store i64 %tmp0, i64 addrspace(1)* %out2
170  ret void
171}
172
173; GCN-LABEL: {{^}}atomic_and_i64_addr64:
174; CI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
175; VI: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
176; GFX9: global_atomic_and_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
177define amdgpu_kernel void @atomic_and_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
178entry:
179  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
180  %tmp0 = atomicrmw volatile and i64 addrspace(1)* %ptr, i64 %in seq_cst
181  ret void
182}
183
184; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64:
185; CI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
186; VI: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
187; CIVI: buffer_store_dwordx2 [[RET]]
188
189; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
190define amdgpu_kernel void @atomic_and_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
191entry:
192  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
193  %tmp0 = atomicrmw volatile and i64 addrspace(1)* %ptr, i64 %in seq_cst
194  store i64 %tmp0, i64 addrspace(1)* %out2
195  ret void
196}
197
198; GCN-LABEL: {{^}}atomic_sub_i64_offset:
199; CIVI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
200; GFX9: global_atomic_sub_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
201define amdgpu_kernel void @atomic_sub_i64_offset(i64 addrspace(1)* %out, i64 %in) {
202entry:
203  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
204  %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %gep, i64 %in seq_cst
205  ret void
206}
207
208; GCN-LABEL: {{^}}atomic_sub_i64_ret_offset:
209; CIVI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
210; CIVI: buffer_store_dwordx2 [[RET]]
211
212; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
213define amdgpu_kernel void @atomic_sub_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
214entry:
215  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
216  %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %gep, i64 %in seq_cst
217  store i64 %tmp0, i64 addrspace(1)* %out2
218  ret void
219}
220
221; GCN-LABEL: {{^}}atomic_sub_i64_addr64_offset:
222; CI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
223; VI: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
224; GFX9: global_atomic_sub_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
225define amdgpu_kernel void @atomic_sub_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
226entry:
227  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
228  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
229  %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %gep, i64 %in seq_cst
230  ret void
231}
232
233; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64_offset:
234; CI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
235; VI: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
236; CIVI: buffer_store_dwordx2 [[RET]]
237
238; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
239define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
240entry:
241  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
242  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
243  %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %gep, i64 %in seq_cst
244  store i64 %tmp0, i64 addrspace(1)* %out2
245  ret void
246}
247
248; GCN-LABEL: {{^}}atomic_sub_i64:
249; CIVI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
250; GFX9: global_atomic_sub_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]$}}
251define amdgpu_kernel void @atomic_sub_i64(i64 addrspace(1)* %out, i64 %in) {
252entry:
253  %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %out, i64 %in seq_cst
254  ret void
255}
256
257; GCN-LABEL: {{^}}atomic_sub_i64_ret:
258; CIVI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
259; CIVI: buffer_store_dwordx2 [[RET]]
260
261; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
262define amdgpu_kernel void @atomic_sub_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
263entry:
264  %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %out, i64 %in seq_cst
265  store i64 %tmp0, i64 addrspace(1)* %out2
266  ret void
267}
268
269; GCN-LABEL: {{^}}atomic_sub_i64_addr64:
270; CI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
271; VI: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
272; GFX9: global_atomic_sub_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
273define amdgpu_kernel void @atomic_sub_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
274entry:
275  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
276  %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %ptr, i64 %in seq_cst
277  ret void
278}
279
280; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64:
281; CI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
282; VI: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
283; CIVI: buffer_store_dwordx2 [[RET]]
284
285; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
286define amdgpu_kernel void @atomic_sub_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
287entry:
288  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
289  %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %ptr, i64 %in seq_cst
290  store i64 %tmp0, i64 addrspace(1)* %out2
291  ret void
292}
293
294; GCN-LABEL: {{^}}atomic_max_i64_offset:
295; CIVI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
296; GFX9: global_atomic_smax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
297define amdgpu_kernel void @atomic_max_i64_offset(i64 addrspace(1)* %out, i64 %in) {
298entry:
299  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
300  %tmp0 = atomicrmw volatile max i64 addrspace(1)* %gep, i64 %in seq_cst
301  ret void
302}
303
304; GCN-LABEL: {{^}}atomic_max_i64_ret_offset:
305; CIVI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
306; CIVI: buffer_store_dwordx2 [[RET]]
307
308; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
309define amdgpu_kernel void @atomic_max_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
310entry:
311  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
312  %tmp0 = atomicrmw volatile max i64 addrspace(1)* %gep, i64 %in seq_cst
313  store i64 %tmp0, i64 addrspace(1)* %out2
314  ret void
315}
316
317; GCN-LABEL: {{^}}atomic_max_i64_addr64_offset:
318; CI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
319; VI: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
320; GFX9: global_atomic_smax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
321define amdgpu_kernel void @atomic_max_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
322entry:
323  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
324  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
325  %tmp0 = atomicrmw volatile max i64 addrspace(1)* %gep, i64 %in seq_cst
326  ret void
327}
328
329; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64_offset:
330; CI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
331; VI: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
332; CIVI: buffer_store_dwordx2 [[RET]]
333
334; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
335define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
336entry:
337  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
338  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
339  %tmp0 = atomicrmw volatile max i64 addrspace(1)* %gep, i64 %in seq_cst
340  store i64 %tmp0, i64 addrspace(1)* %out2
341  ret void
342}
343
344; GCN-LABEL: {{^}}atomic_max_i64:
345; CIVI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
346; GFX9: global_atomic_smax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]$}}
347define amdgpu_kernel void @atomic_max_i64(i64 addrspace(1)* %out, i64 %in) {
348entry:
349  %tmp0 = atomicrmw volatile max i64 addrspace(1)* %out, i64 %in seq_cst
350  ret void
351}
352
353; GCN-LABEL: {{^}}atomic_max_i64_ret:
354; CIVI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
355; CIVI: buffer_store_dwordx2 [[RET]]
356
357; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
358define amdgpu_kernel void @atomic_max_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
359entry:
360  %tmp0 = atomicrmw volatile max i64 addrspace(1)* %out, i64 %in seq_cst
361  store i64 %tmp0, i64 addrspace(1)* %out2
362  ret void
363}
364
365; GCN-LABEL: {{^}}atomic_max_i64_addr64:
366; CI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
367; VI: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
368; GFX9: global_atomic_smax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
369define amdgpu_kernel void @atomic_max_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
370entry:
371  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
372  %tmp0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %in seq_cst
373  ret void
374}
375
376; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64:
377; CI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
378; VI: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
379; CIVI: buffer_store_dwordx2 [[RET]]
380
381; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
382define amdgpu_kernel void @atomic_max_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
383entry:
384  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
385  %tmp0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %in seq_cst
386  store i64 %tmp0, i64 addrspace(1)* %out2
387  ret void
388}
389
390; GCN-LABEL: {{^}}atomic_umax_i64_offset:
391; CIVI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
392; GFX9: global_atomic_umax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
393define amdgpu_kernel void @atomic_umax_i64_offset(i64 addrspace(1)* %out, i64 %in) {
394entry:
395  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
396  %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %gep, i64 %in seq_cst
397  ret void
398}
399
400; GCN-LABEL: {{^}}atomic_umax_i64_ret_offset:
401; CIVI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
402; CIVI: buffer_store_dwordx2 [[RET]]
403
404; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
405define amdgpu_kernel void @atomic_umax_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
406entry:
407  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
408  %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %gep, i64 %in seq_cst
409  store i64 %tmp0, i64 addrspace(1)* %out2
410  ret void
411}
412
413; GCN-LABEL: {{^}}atomic_umax_i64_addr64_offset:
414; CI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
415; VI: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
416; GFX9: global_atomic_umax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
417define amdgpu_kernel void @atomic_umax_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
418entry:
419  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
420  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
421  %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %gep, i64 %in seq_cst
422  ret void
423}
424
425; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64_offset:
426; CI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
427; VI: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
428; CIVI: buffer_store_dwordx2 [[RET]]
429
430; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
431define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
432entry:
433  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
434  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
435  %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %gep, i64 %in seq_cst
436  store i64 %tmp0, i64 addrspace(1)* %out2
437  ret void
438}
439
440; GCN-LABEL: {{^}}atomic_umax_i64:
441; CIVI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
442; GFX9: global_atomic_umax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]$}}
443define amdgpu_kernel void @atomic_umax_i64(i64 addrspace(1)* %out, i64 %in) {
444entry:
445  %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %out, i64 %in seq_cst
446  ret void
447}
448
449; GCN-LABEL: {{^}}atomic_umax_i64_ret:
450; CIVI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
451; CIVI: buffer_store_dwordx2 [[RET]]
452
453; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
454define amdgpu_kernel void @atomic_umax_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
455entry:
456  %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %out, i64 %in seq_cst
457  store i64 %tmp0, i64 addrspace(1)* %out2
458  ret void
459}
460
461; GCN-LABEL: {{^}}atomic_umax_i64_addr64:
462; CI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
463; VI: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
464; GFX9: global_atomic_umax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
465define amdgpu_kernel void @atomic_umax_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
466entry:
467  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
468  %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %in seq_cst
469  ret void
470}
471
472; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64:
473; CI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
474; VI: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
475; CIVI: buffer_store_dwordx2 [[RET]]
476
477; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
478define amdgpu_kernel void @atomic_umax_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
479entry:
480  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
481  %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %in seq_cst
482  store i64 %tmp0, i64 addrspace(1)* %out2
483  ret void
484}
485
486; GCN-LABEL: {{^}}atomic_min_i64_offset:
487; CIVI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
488; GFX9: global_atomic_smin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
489define amdgpu_kernel void @atomic_min_i64_offset(i64 addrspace(1)* %out, i64 %in) {
490entry:
491  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
492  %tmp0 = atomicrmw volatile min i64 addrspace(1)* %gep, i64 %in seq_cst
493  ret void
494}
495
496; GCN-LABEL: {{^}}atomic_min_i64_ret_offset:
497; CIVI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
498; CIVI: buffer_store_dwordx2 [[RET]]
499
500; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
501define amdgpu_kernel void @atomic_min_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
502entry:
503  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
504  %tmp0 = atomicrmw volatile min i64 addrspace(1)* %gep, i64 %in seq_cst
505  store i64 %tmp0, i64 addrspace(1)* %out2
506  ret void
507}
508
509; GCN-LABEL: {{^}}atomic_min_i64_addr64_offset:
510; CI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
511; VI: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
512; GFX9: global_atomic_smin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
513define amdgpu_kernel void @atomic_min_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
514entry:
515  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
516  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
517  %tmp0 = atomicrmw volatile min i64 addrspace(1)* %gep, i64 %in seq_cst
518  ret void
519}
520
521; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64_offset:
522; CI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
523; VI: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
524; CIVI: buffer_store_dwordx2 [[RET]]
525
526; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
527define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
528entry:
529  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
530  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
531  %tmp0 = atomicrmw volatile min i64 addrspace(1)* %gep, i64 %in seq_cst
532  store i64 %tmp0, i64 addrspace(1)* %out2
533  ret void
534}
535
536; GCN-LABEL: {{^}}atomic_min_i64:
537; CIVI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
538; GFX9: global_atomic_smin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]$}}
539define amdgpu_kernel void @atomic_min_i64(i64 addrspace(1)* %out, i64 %in) {
540entry:
541  %tmp0 = atomicrmw volatile min i64 addrspace(1)* %out, i64 %in seq_cst
542  ret void
543}
544
545; GCN-LABEL: {{^}}atomic_min_i64_ret:
546; CIVI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
547; CIVI: buffer_store_dwordx2 [[RET]]
548
549; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
550define amdgpu_kernel void @atomic_min_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
551entry:
552  %tmp0 = atomicrmw volatile min i64 addrspace(1)* %out, i64 %in seq_cst
553  store i64 %tmp0, i64 addrspace(1)* %out2
554  ret void
555}
556
557; GCN-LABEL: {{^}}atomic_min_i64_addr64:
558; CI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
559; VI: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
560; GFX9: global_atomic_smin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
561define amdgpu_kernel void @atomic_min_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
562entry:
563  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
564  %tmp0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %in seq_cst
565  ret void
566}
567
568; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64:
569; CI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
570; VI: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
571; CIVI: buffer_store_dwordx2 [[RET]]
572
573; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
574define amdgpu_kernel void @atomic_min_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
575entry:
576  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
577  %tmp0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %in seq_cst
578  store i64 %tmp0, i64 addrspace(1)* %out2
579  ret void
580}
581
582; GCN-LABEL: {{^}}atomic_umin_i64_offset:
583; CIVI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
584
585; GFX9: global_atomic_umin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
586define amdgpu_kernel void @atomic_umin_i64_offset(i64 addrspace(1)* %out, i64 %in) {
587entry:
588  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
589  %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %gep, i64 %in seq_cst
590  ret void
591}
592
593; GCN-LABEL: {{^}}atomic_umin_i64_ret_offset:
594; CIVI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
595; CIVI: buffer_store_dwordx2 [[RET]]
596
597; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
598define amdgpu_kernel void @atomic_umin_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
599entry:
600  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
601  %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %gep, i64 %in seq_cst
602  store i64 %tmp0, i64 addrspace(1)* %out2
603  ret void
604}
605
606; GCN-LABEL: {{^}}atomic_umin_i64_addr64_offset:
607; CI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
608; VI: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
609; GFX9: global_atomic_umin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
610define amdgpu_kernel void @atomic_umin_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
611entry:
612  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
613  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
614  %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %gep, i64 %in seq_cst
615  ret void
616}
617
618; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64_offset:
619; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
620; VI: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
621; CIVI: buffer_store_dwordx2 [[RET]]
622
623; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
624define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
625entry:
626  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
627  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
628  %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %gep, i64 %in seq_cst
629  store i64 %tmp0, i64 addrspace(1)* %out2
630  ret void
631}
632
633; GCN-LABEL: {{^}}atomic_umin_i64:
634; CIVI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
635; GFX9: global_atomic_umin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]$}}
636define amdgpu_kernel void @atomic_umin_i64(i64 addrspace(1)* %out, i64 %in) {
637entry:
638  %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %out, i64 %in seq_cst
639  ret void
640}
641
642; GCN-LABEL: {{^}}atomic_umin_i64_ret:
643; CIVI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
644; CIVI: buffer_store_dwordx2 [[RET]]
645
646; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
647define amdgpu_kernel void @atomic_umin_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
648entry:
649  %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %out, i64 %in seq_cst
650  store i64 %tmp0, i64 addrspace(1)* %out2
651  ret void
652}
653
654; GCN-LABEL: {{^}}atomic_umin_i64_addr64:
655; CI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
656; VI: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
657; GFX9: global_atomic_umin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
658define amdgpu_kernel void @atomic_umin_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
659entry:
660  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
661  %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %in seq_cst
662  ret void
663}
664
665; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64:
666; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
667; VI: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
668; CIVI: buffer_store_dwordx2 [[RET]]
669
670; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
671define amdgpu_kernel void @atomic_umin_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
672entry:
673  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
674  %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %in seq_cst
675  store i64 %tmp0, i64 addrspace(1)* %out2
676  ret void
677}
678
679; GCN-LABEL: {{^}}atomic_or_i64_offset:
680; CIVI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
681; GFX9: global_atomic_or_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
682define amdgpu_kernel void @atomic_or_i64_offset(i64 addrspace(1)* %out, i64 %in) {
683entry:
684  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
685  %tmp0 = atomicrmw volatile or i64 addrspace(1)* %gep, i64 %in seq_cst
686  ret void
687}
688
689; GCN-LABEL: {{^}}atomic_or_i64_ret_offset:
690; CIVI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
691; CIVI: buffer_store_dwordx2 [[RET]]
692
693; GFX9: global_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
694define amdgpu_kernel void @atomic_or_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
695entry:
696  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
697  %tmp0 = atomicrmw volatile or i64 addrspace(1)* %gep, i64 %in seq_cst
698  store i64 %tmp0, i64 addrspace(1)* %out2
699  ret void
700}
701
702; GCN-LABEL: {{^}}atomic_or_i64_addr64_offset:
703; CI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
704; VI: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
705; GFX9: global_atomic_or_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
706define amdgpu_kernel void @atomic_or_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
707entry:
708  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
709  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
710  %tmp0 = atomicrmw volatile or i64 addrspace(1)* %gep, i64 %in seq_cst
711  ret void
712}
713
714; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64_offset:
715; CI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
716; VI: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
717; CIVI: buffer_store_dwordx2 [[RET]]
718
719; GFX9: global_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
720define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
721entry:
722  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
723  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
724  %tmp0 = atomicrmw volatile or i64 addrspace(1)* %gep, i64 %in seq_cst
725  store i64 %tmp0, i64 addrspace(1)* %out2
726  ret void
727}
728
729; GCN-LABEL: {{^}}atomic_or_i64:
730; CIVI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
731; GFX9: global_atomic_or_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
732define amdgpu_kernel void @atomic_or_i64(i64 addrspace(1)* %out, i64 %in) {
733entry:
734  %tmp0 = atomicrmw volatile or i64 addrspace(1)* %out, i64 %in seq_cst
735  ret void
736}
737
738; GCN-LABEL: {{^}}atomic_or_i64_ret:
739; CIVI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
740; CIVI: buffer_store_dwordx2 [[RET]]
741
742; GFX9: global_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
743define amdgpu_kernel void @atomic_or_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
744entry:
745  %tmp0 = atomicrmw volatile or i64 addrspace(1)* %out, i64 %in seq_cst
746  store i64 %tmp0, i64 addrspace(1)* %out2
747  ret void
748}
749
750; GCN-LABEL: {{^}}atomic_or_i64_addr64:
751; CI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
752; VI: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
753; GFX9: global_atomic_or_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
754define amdgpu_kernel void @atomic_or_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
755entry:
756  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
757  %tmp0 = atomicrmw volatile or i64 addrspace(1)* %ptr, i64 %in seq_cst
758  ret void
759}
760
761; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64:
762; CI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
763; VI: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
764; CIVI: buffer_store_dwordx2 [[RET]]
765
766; GFX9: global_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
767define amdgpu_kernel void @atomic_or_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
768entry:
769  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
770  %tmp0 = atomicrmw volatile or i64 addrspace(1)* %ptr, i64 %in seq_cst
771  store i64 %tmp0, i64 addrspace(1)* %out2
772  ret void
773}
774
775; GCN-LABEL: {{^}}atomic_xchg_i64_offset:
776; CIVI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
777
778; GFX9: global_atomic_swap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
779define amdgpu_kernel void @atomic_xchg_i64_offset(i64 addrspace(1)* %out, i64 %in) {
780entry:
781  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
782  %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %gep, i64 %in seq_cst
783  ret void
784}
785
786; GCN-LABEL: {{^}}atomic_xchg_f64_offset:
787; CIVI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
788
789; GFX9: global_atomic_swap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
790define amdgpu_kernel void @atomic_xchg_f64_offset(double addrspace(1)* %out, double %in) {
791entry:
792  %gep = getelementptr double, double addrspace(1)* %out, i64 4
793  %tmp0 = atomicrmw volatile xchg double addrspace(1)* %gep, double %in seq_cst
794  ret void
795}
796
797; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:
798; CIVI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
799; CIVI: buffer_store_dwordx2 [[RET]]
800
801; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
802define amdgpu_kernel void @atomic_xchg_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
803entry:
804  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
805  %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %gep, i64 %in seq_cst
806  store i64 %tmp0, i64 addrspace(1)* %out2
807  ret void
808}
809
810; GCN-LABEL: {{^}}atomic_xchg_i64_addr64_offset:
811; CI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
812; VI: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
813; GFX9: global_atomic_swap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
814define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
815entry:
816  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
817  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
818  %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %gep, i64 %in seq_cst
819  ret void
820}
821
822; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64_offset:
823; CI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
824; VI: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
825; CIVI: buffer_store_dwordx2 [[RET]]
826
827; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
828define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
829entry:
830  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
831  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
832  %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %gep, i64 %in seq_cst
833  store i64 %tmp0, i64 addrspace(1)* %out2
834  ret void
835}
836
837; GCN-LABEL: {{^}}atomic_xchg_i64:
838; CIVI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
839; GFX9: global_atomic_swap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
840define amdgpu_kernel void @atomic_xchg_i64(i64 addrspace(1)* %out, i64 %in) {
841entry:
842  %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %out, i64 %in seq_cst
843  ret void
844}
845
846; GCN-LABEL: {{^}}atomic_xchg_i64_ret:
847; CIVI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
848; CIVI: buffer_store_dwordx2 [[RET]]
849
850; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
851define amdgpu_kernel void @atomic_xchg_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
852entry:
853  %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %out, i64 %in seq_cst
854  store i64 %tmp0, i64 addrspace(1)* %out2
855  ret void
856}
857
858; GCN-LABEL: {{^}}atomic_xchg_i64_addr64:
859; CI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
860; VI: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
861; GFX9: global_atomic_swap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
862define amdgpu_kernel void @atomic_xchg_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
863entry:
864  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
865  %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %ptr, i64 %in seq_cst
866  ret void
867}
868
869; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64:
870; CI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
871; VI: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]],  v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
872; CIVI: buffer_store_dwordx2 [[RET]]
873
874; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
875define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
876entry:
877  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
878  %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %ptr, i64 %in seq_cst
879  store i64 %tmp0, i64 addrspace(1)* %out2
880  ret void
881}
882
883; GCN-LABEL: {{^}}atomic_xor_i64_offset:
884; CIVI: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
885; GFX9: global_atomic_xor_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
886define amdgpu_kernel void @atomic_xor_i64_offset(i64 addrspace(1)* %out, i64 %in) {
887entry:
888  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
889  %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %gep, i64 %in seq_cst
890  ret void
891}
892
893; GCN-LABEL: {{^}}atomic_xor_i64_ret_offset:
894; CIVI: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
895; CIVI: buffer_store_dwordx2 [[RET]]
896
897; GFX9: global_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
898define amdgpu_kernel void @atomic_xor_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
899entry:
900  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
901  %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %gep, i64 %in seq_cst
902  store i64 %tmp0, i64 addrspace(1)* %out2
903  ret void
904}
905
906; GCN-LABEL: {{^}}atomic_xor_i64_addr64_offset:
907; CI: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
908; VI: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
909; GFX9: global_atomic_xor_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
910define amdgpu_kernel void @atomic_xor_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
911entry:
912  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
913  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
914  %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %gep, i64 %in seq_cst
915  ret void
916}
917
918; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64_offset:
919; CI: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
920; VI: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
921; CIVI: buffer_store_dwordx2 [[RET]]
922
923; GFX9: global_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
924define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
925entry:
926  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
927  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
928  %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %gep, i64 %in seq_cst
929  store i64 %tmp0, i64 addrspace(1)* %out2
930  ret void
931}
932
933; GCN-LABEL: {{^}}atomic_xor_i64:
934; CIVI: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
935; GFX9: global_atomic_xor_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
936define amdgpu_kernel void @atomic_xor_i64(i64 addrspace(1)* %out, i64 %in) {
937entry:
938  %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %out, i64 %in seq_cst
939  ret void
940}
941
942; GCN-LABEL: {{^}}atomic_xor_i64_ret:
943; CIVI: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
944; CIVI: buffer_store_dwordx2 [[RET]]
945
946; GFX9: global_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
947define amdgpu_kernel void @atomic_xor_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
948entry:
949  %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %out, i64 %in seq_cst
950  store i64 %tmp0, i64 addrspace(1)* %out2
951  ret void
952}
953
954; GCN-LABEL: {{^}}atomic_xor_i64_addr64:
955; CI: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
956; VI: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
957; GFX9: global_atomic_xor_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
958define amdgpu_kernel void @atomic_xor_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
959entry:
960  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
961  %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %ptr, i64 %in seq_cst
962  ret void
963}
964
965; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64:
966; CI: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
967; VI: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
968; CIVI: buffer_store_dwordx2 [[RET]]
969
970; GFX9: global_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
971define amdgpu_kernel void @atomic_xor_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
972entry:
973  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
974  %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %ptr, i64 %in seq_cst
975  store i64 %tmp0, i64 addrspace(1)* %out2
976  ret void
977}
978
979
980; GCN-LABEL: {{^}}atomic_cmpxchg_i64_offset:
981; CIVI: buffer_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
982; GFX9: global_atomic_cmpswap_x2 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
983define amdgpu_kernel void @atomic_cmpxchg_i64_offset(i64 addrspace(1)* %out, i64 %in, i64 %old) {
984entry:
985  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
986  %val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst
987  ret void
988}
989
990; GCN-LABEL: {{^}}atomic_cmpxchg_i64_soffset:
991; CIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x11940
992; CIVI: buffer_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}}
993
994; GFX9: v_mov_b32_e32 [[VOFFSET:v[0-9]+]], 0x11000{{$}}
995; GFX9: global_atomic_cmpswap_x2 [[VOFFSET]], v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:2368{{$}}
996define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(i64 addrspace(1)* %out, i64 %in, i64 %old) {
997entry:
998  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 9000
999  %val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst
1000  ret void
1001}
1002
1003; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_offset:
1004; CIVI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
1005; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]:
1006
1007; GFX9: global_atomic_cmpswap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
1008define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %old) {
1009entry:
1010  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
1011  %val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst
1012  %extract0 = extractvalue { i64, i1 } %val, 0
1013  store i64 %extract0, i64 addrspace(1)* %out2
1014  ret void
1015}
1016
1017; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64_offset:
1018; CI: buffer_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
1019; VI: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
1020; GFX9: global_atomic_cmpswap_x2 v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}] offset:32{{$}}
1021define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index, i64 %old) {
1022entry:
1023  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
1024  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
1025  %val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst
1026  ret void
1027}
1028
1029; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64_offset:
1030; CI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
1031; VI: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1032; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]:
1033
1034; GFX9: global_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] offset:32 glc{{$}}
1035define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index, i64 %old) {
1036entry:
1037  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
1038  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
1039  %val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst
1040  %extract0 = extractvalue { i64, i1 } %val, 0
1041  store i64 %extract0, i64 addrspace(1)* %out2
1042  ret void
1043}
1044
1045; GCN-LABEL: {{^}}atomic_cmpxchg_i64:
1046; CIVI: buffer_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1047; GFX9: global_atomic_cmpswap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]{{$}}
1048define amdgpu_kernel void @atomic_cmpxchg_i64(i64 addrspace(1)* %out, i64 %in, i64 %old) {
1049entry:
1050  %val = cmpxchg volatile i64 addrspace(1)* %out, i64 %old, i64 %in seq_cst seq_cst
1051  ret void
1052}
1053
1054; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret:
1055; CIVI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1056; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]:
1057
1058; GFX9: global_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+:[0-9]+}}] glc{{$}}
1059define amdgpu_kernel void @atomic_cmpxchg_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %old) {
1060entry:
1061  %val = cmpxchg volatile i64 addrspace(1)* %out, i64 %old, i64 %in seq_cst seq_cst
1062  %extract0 = extractvalue { i64, i1 } %val, 0
1063  store i64 %extract0, i64 addrspace(1)* %out2
1064  ret void
1065}
1066
1067; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64:
1068; CI: buffer_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1069; VI: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
1070; GFX9: global_atomic_cmpswap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]{{$}}
1071define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index, i64 %old) {
1072entry:
1073  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
1074  %val = cmpxchg volatile i64 addrspace(1)* %ptr, i64 %old, i64 %in seq_cst seq_cst
1075  ret void
1076}
1077
1078; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64:
1079; CI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1080; VI: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1081; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]:
1082
1083; GFX9: global_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] glc{{$}}
1084define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index, i64 %old) {
1085entry:
1086  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
1087  %val = cmpxchg volatile i64 addrspace(1)* %ptr, i64 %old, i64 %in seq_cst seq_cst
1088  %extract0 = extractvalue { i64, i1 } %val, 0
1089  store i64 %extract0, i64 addrspace(1)* %out2
1090  ret void
1091}
1092
1093; GCN-LABEL: {{^}}atomic_load_i64_offset:
1094; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
1095; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1096; CIVI: buffer_store_dwordx2 [[RET]]
1097
1098; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:32 glc{{$}}
1099define amdgpu_kernel void @atomic_load_i64_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out) {
1100entry:
1101  %gep = getelementptr i64, i64 addrspace(1)* %in, i64 4
1102  %val = load atomic i64, i64 addrspace(1)* %gep  seq_cst, align 8
1103  store i64 %val, i64 addrspace(1)* %out
1104  ret void
1105}
1106
1107; GCN-LABEL: {{^}}atomic_load_i64_neg_offset:
1108; CI: v_mov_b32_e32 v[[LO:[0-9]+]], 0xffffffe0
1109; CI: v_mov_b32_e32 v[[HI:[0-9]+]], -1
1110; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]{{\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1111
1112; VI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0xffffffe0
1113; VI-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, -1
1114; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1115
1116; CIVI: buffer_store_dwordx2 [[RET]]
1117
1118; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-32 glc{{$}}
1119define amdgpu_kernel void @atomic_load_i64_neg_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out) {
1120entry:
1121  %gep = getelementptr i64, i64 addrspace(1)* %in, i64 -4
1122  %val = load atomic i64, i64 addrspace(1)* %gep  seq_cst, align 8
1123  store i64 %val, i64 addrspace(1)* %out
1124  ret void
1125}
1126
1127; GCN-LABEL: {{^}}atomic_load_i64:
1128; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1129; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc
1130; CIVI: buffer_store_dwordx2 [[RET]]
1131
1132; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1133define amdgpu_kernel void @atomic_load_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %out) {
1134entry:
1135  %val = load atomic i64, i64 addrspace(1)* %in seq_cst, align 8
1136  store i64 %val, i64 addrspace(1)* %out
1137  ret void
1138}
1139
1140; GCN-LABEL: {{^}}atomic_load_i64_addr64_offset:
1141; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
1142; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1143; CIVI: buffer_store_dwordx2 [[RET]]
1144
1145; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:32 glc{{$}}
1146define amdgpu_kernel void @atomic_load_i64_addr64_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out, i64 %index) {
1147entry:
1148  %ptr = getelementptr i64, i64 addrspace(1)* %in, i64 %index
1149  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
1150  %val = load atomic i64, i64 addrspace(1)* %gep seq_cst, align 8
1151  store i64 %val, i64 addrspace(1)* %out
1152  ret void
1153}
1154
1155; GCN-LABEL: {{^}}atomic_load_i64_addr64:
1156; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1157; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1158; CIVI: buffer_store_dwordx2 [[RET]]
1159
1160; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
1161define amdgpu_kernel void @atomic_load_i64_addr64(i64 addrspace(1)* %in, i64 addrspace(1)* %out, i64 %index) {
1162entry:
1163  %ptr = getelementptr i64, i64 addrspace(1)* %in, i64 %index
1164  %val = load atomic i64, i64 addrspace(1)* %ptr seq_cst, align 8
1165  store i64 %val, i64 addrspace(1)* %out
1166  ret void
1167}
1168
1169; GCN-LABEL: {{^}}atomic_load_f64_addr64_offset:
1170; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
1171; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1172; CIVI: buffer_store_dwordx2 [[RET]]
1173
1174; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:32 glc{{$}}
1175define amdgpu_kernel void @atomic_load_f64_addr64_offset(double addrspace(1)* %in, double addrspace(1)* %out, i64 %index) {
1176entry:
1177  %ptr = getelementptr double, double addrspace(1)* %in, i64 %index
1178  %gep = getelementptr double, double addrspace(1)* %ptr, i64 4
1179  %val = load atomic double, double addrspace(1)* %gep seq_cst, align 8
1180  store double %val, double addrspace(1)* %out
1181  ret void
1182}
1183
1184; GCN-LABEL: {{^}}atomic_store_i64_offset:
1185; CI: buffer_store_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
1186; VI: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
1187; GFX9: global_store_dwordx2 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:32{{$}}
1188define amdgpu_kernel void @atomic_store_i64_offset(i64 %in, i64 addrspace(1)* %out) {
1189entry:
1190  %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
1191  store atomic i64 %in, i64 addrspace(1)* %gep  seq_cst, align 8
1192  ret void
1193}
1194
1195; GCN-LABEL: {{^}}atomic_store_i64:
1196; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1197; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
1198; GFX9: global_store_dwordx2 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]\]}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}}
1199define amdgpu_kernel void @atomic_store_i64(i64 %in, i64 addrspace(1)* %out) {
1200entry:
1201  store atomic i64 %in, i64 addrspace(1)* %out seq_cst, align 8
1202  ret void
1203}
1204
1205; GCN-LABEL: {{^}}atomic_store_i64_addr64_offset:
1206; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
1207; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
1208; GFX9: global_store_dwordx2 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}] offset:32{{$}}
1209define amdgpu_kernel void @atomic_store_i64_addr64_offset(i64 %in, i64 addrspace(1)* %out, i64 %index) {
1210entry:
1211  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
1212  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
1213  store atomic i64 %in, i64 addrspace(1)* %gep seq_cst, align 8
1214  ret void
1215}
1216
1217; GCN-LABEL: {{^}}atomic_store_i64_addr64:
1218; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1219; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
1220; GFX9: global_store_dwordx2 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}]{{$}}
1221define amdgpu_kernel void @atomic_store_i64_addr64(i64 %in, i64 addrspace(1)* %out, i64 %index) {
1222entry:
1223  %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
1224  store atomic i64 %in, i64 addrspace(1)* %ptr seq_cst, align 8
1225  ret void
1226}
1227
1228; GCN-LABEL: {{^}}atomic_store_f64_addr64_offset:
1229; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
1230; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
1231; GFX9: global_store_dwordx2 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}] offset:32{{$}}
1232define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, double addrspace(1)* %out, i64 %index) {
1233entry:
1234  %ptr = getelementptr double, double addrspace(1)* %out, i64 %index
1235  %gep = getelementptr double, double addrspace(1)* %ptr, i64 4
1236  store atomic double %in, double addrspace(1)* %gep seq_cst, align 8
1237  ret void
1238}
1239