1; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4; GCN-LABEL: {{^}}atomic_add_i64_offset:
5; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
6define amdgpu_kernel void @atomic_add_i64_offset(i64* %out, i64 %in) {
7entry:
8  %gep = getelementptr i64, i64* %out, i64 4
9  %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst
10  ret void
11}
12
13; GCN-LABEL: {{^}}atomic_add_i64_ret_offset:
14; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
15; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
16define amdgpu_kernel void @atomic_add_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
17entry:
18  %gep = getelementptr i64, i64* %out, i64 4
19  %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst
20  store i64 %tmp0, i64* %out2
21  ret void
22}
23
24; GCN-LABEL: {{^}}atomic_add_i64_addr64_offset:
25; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
26define amdgpu_kernel void @atomic_add_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
27entry:
28  %ptr = getelementptr i64, i64* %out, i64 %index
29  %gep = getelementptr i64, i64* %ptr, i64 4
30  %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst
31  ret void
32}
33
34; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64_offset:
35; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
36; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
37define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
38entry:
39  %ptr = getelementptr i64, i64* %out, i64 %index
40  %gep = getelementptr i64, i64* %ptr, i64 4
41  %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst
42  store i64 %tmp0, i64* %out2
43  ret void
44}
45
46; GCN-LABEL: {{^}}atomic_add_i64:
47; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
48define amdgpu_kernel void @atomic_add_i64(i64* %out, i64 %in) {
49entry:
50  %tmp0 = atomicrmw volatile add i64* %out, i64 %in seq_cst
51  ret void
52}
53
54; GCN-LABEL: {{^}}atomic_add_i64_ret:
55; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
56; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
57define amdgpu_kernel void @atomic_add_i64_ret(i64* %out, i64* %out2, i64 %in) {
58entry:
59  %tmp0 = atomicrmw volatile add i64* %out, i64 %in seq_cst
60  store i64 %tmp0, i64* %out2
61  ret void
62}
63
64; GCN-LABEL: {{^}}atomic_add_i64_addr64:
65; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
66define amdgpu_kernel void @atomic_add_i64_addr64(i64* %out, i64 %in, i64 %index) {
67entry:
68  %ptr = getelementptr i64, i64* %out, i64 %index
69  %tmp0 = atomicrmw volatile add i64* %ptr, i64 %in seq_cst
70  ret void
71}
72
73; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64:
74; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
75; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
76define amdgpu_kernel void @atomic_add_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
77entry:
78  %ptr = getelementptr i64, i64* %out, i64 %index
79  %tmp0 = atomicrmw volatile add i64* %ptr, i64 %in seq_cst
80  store i64 %tmp0, i64* %out2
81  ret void
82}
83
84; GCN-LABEL: {{^}}atomic_and_i64_offset:
85; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
86define amdgpu_kernel void @atomic_and_i64_offset(i64* %out, i64 %in) {
87entry:
88  %gep = getelementptr i64, i64* %out, i64 4
89  %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst
90  ret void
91}
92
93; GCN-LABEL: {{^}}atomic_and_i64_ret_offset:
94; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
95; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
96define amdgpu_kernel void @atomic_and_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
97entry:
98  %gep = getelementptr i64, i64* %out, i64 4
99  %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst
100  store i64 %tmp0, i64* %out2
101  ret void
102}
103
104; GCN-LABEL: {{^}}atomic_and_i64_addr64_offset:
105; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
106define amdgpu_kernel void @atomic_and_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
107entry:
108  %ptr = getelementptr i64, i64* %out, i64 %index
109  %gep = getelementptr i64, i64* %ptr, i64 4
110  %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst
111  ret void
112}
113
114; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64_offset:
115; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
116; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
117define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
118entry:
119  %ptr = getelementptr i64, i64* %out, i64 %index
120  %gep = getelementptr i64, i64* %ptr, i64 4
121  %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst
122  store i64 %tmp0, i64* %out2
123  ret void
124}
125
126; GCN-LABEL: {{^}}atomic_and_i64:
127; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
128define amdgpu_kernel void @atomic_and_i64(i64* %out, i64 %in) {
129entry:
130  %tmp0 = atomicrmw volatile and i64* %out, i64 %in seq_cst
131  ret void
132}
133
134; GCN-LABEL: {{^}}atomic_and_i64_ret:
135; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
136; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
137define amdgpu_kernel void @atomic_and_i64_ret(i64* %out, i64* %out2, i64 %in) {
138entry:
139  %tmp0 = atomicrmw volatile and i64* %out, i64 %in seq_cst
140  store i64 %tmp0, i64* %out2
141  ret void
142}
143
144; GCN-LABEL: {{^}}atomic_and_i64_addr64:
145; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
146define amdgpu_kernel void @atomic_and_i64_addr64(i64* %out, i64 %in, i64 %index) {
147entry:
148  %ptr = getelementptr i64, i64* %out, i64 %index
149  %tmp0 = atomicrmw volatile and i64* %ptr, i64 %in seq_cst
150  ret void
151}
152
153; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64:
154; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
155; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
156define amdgpu_kernel void @atomic_and_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
157entry:
158  %ptr = getelementptr i64, i64* %out, i64 %index
159  %tmp0 = atomicrmw volatile and i64* %ptr, i64 %in seq_cst
160  store i64 %tmp0, i64* %out2
161  ret void
162}
163
164; GCN-LABEL: {{^}}atomic_sub_i64_offset:
165; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
166define amdgpu_kernel void @atomic_sub_i64_offset(i64* %out, i64 %in) {
167entry:
168  %gep = getelementptr i64, i64* %out, i64 4
169  %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst
170  ret void
171}
172
173; GCN-LABEL: {{^}}atomic_sub_i64_ret_offset:
174; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
175; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
176define amdgpu_kernel void @atomic_sub_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
177entry:
178  %gep = getelementptr i64, i64* %out, i64 4
179  %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst
180  store i64 %tmp0, i64* %out2
181  ret void
182}
183
184; GCN-LABEL: {{^}}atomic_sub_i64_addr64_offset:
185; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
186define amdgpu_kernel void @atomic_sub_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
187entry:
188  %ptr = getelementptr i64, i64* %out, i64 %index
189  %gep = getelementptr i64, i64* %ptr, i64 4
190  %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst
191  ret void
192}
193
194; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64_offset:
195; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
196; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
197define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
198entry:
199  %ptr = getelementptr i64, i64* %out, i64 %index
200  %gep = getelementptr i64, i64* %ptr, i64 4
201  %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst
202  store i64 %tmp0, i64* %out2
203  ret void
204}
205
206; GCN-LABEL: {{^}}atomic_sub_i64:
207; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
208define amdgpu_kernel void @atomic_sub_i64(i64* %out, i64 %in) {
209entry:
210  %tmp0 = atomicrmw volatile sub i64* %out, i64 %in seq_cst
211  ret void
212}
213
214; GCN-LABEL: {{^}}atomic_sub_i64_ret:
215; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
216; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
217define amdgpu_kernel void @atomic_sub_i64_ret(i64* %out, i64* %out2, i64 %in) {
218entry:
219  %tmp0 = atomicrmw volatile sub i64* %out, i64 %in seq_cst
220  store i64 %tmp0, i64* %out2
221  ret void
222}
223
224; GCN-LABEL: {{^}}atomic_sub_i64_addr64:
225; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
226define amdgpu_kernel void @atomic_sub_i64_addr64(i64* %out, i64 %in, i64 %index) {
227entry:
228  %ptr = getelementptr i64, i64* %out, i64 %index
229  %tmp0 = atomicrmw volatile sub i64* %ptr, i64 %in seq_cst
230  ret void
231}
232
233; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64:
234; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
235; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
236define amdgpu_kernel void @atomic_sub_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
237entry:
238  %ptr = getelementptr i64, i64* %out, i64 %index
239  %tmp0 = atomicrmw volatile sub i64* %ptr, i64 %in seq_cst
240  store i64 %tmp0, i64* %out2
241  ret void
242}
243
244; GCN-LABEL: {{^}}atomic_max_i64_offset:
245; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
246define amdgpu_kernel void @atomic_max_i64_offset(i64* %out, i64 %in) {
247entry:
248  %gep = getelementptr i64, i64* %out, i64 4
249  %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst
250  ret void
251}
252
253; GCN-LABEL: {{^}}atomic_max_i64_ret_offset:
254; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
255; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
256define amdgpu_kernel void @atomic_max_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
257entry:
258  %gep = getelementptr i64, i64* %out, i64 4
259  %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst
260  store i64 %tmp0, i64* %out2
261  ret void
262}
263
264; GCN-LABEL: {{^}}atomic_max_i64_addr64_offset:
265; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
266define amdgpu_kernel void @atomic_max_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
267entry:
268  %ptr = getelementptr i64, i64* %out, i64 %index
269  %gep = getelementptr i64, i64* %ptr, i64 4
270  %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst
271  ret void
272}
273
274; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64_offset:
275; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
276; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
277define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
278entry:
279  %ptr = getelementptr i64, i64* %out, i64 %index
280  %gep = getelementptr i64, i64* %ptr, i64 4
281  %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst
282  store i64 %tmp0, i64* %out2
283  ret void
284}
285
286; GCN-LABEL: {{^}}atomic_max_i64:
287; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
288define amdgpu_kernel void @atomic_max_i64(i64* %out, i64 %in) {
289entry:
290  %tmp0 = atomicrmw volatile max i64* %out, i64 %in seq_cst
291  ret void
292}
293
294; GCN-LABEL: {{^}}atomic_max_i64_ret:
295; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
296; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
297define amdgpu_kernel void @atomic_max_i64_ret(i64* %out, i64* %out2, i64 %in) {
298entry:
299  %tmp0 = atomicrmw volatile max i64* %out, i64 %in seq_cst
300  store i64 %tmp0, i64* %out2
301  ret void
302}
303
304; GCN-LABEL: {{^}}atomic_max_i64_addr64:
305; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
306define amdgpu_kernel void @atomic_max_i64_addr64(i64* %out, i64 %in, i64 %index) {
307entry:
308  %ptr = getelementptr i64, i64* %out, i64 %index
309  %tmp0 = atomicrmw volatile max i64* %ptr, i64 %in seq_cst
310  ret void
311}
312
313; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64:
314; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
315; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
316define amdgpu_kernel void @atomic_max_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
317entry:
318  %ptr = getelementptr i64, i64* %out, i64 %index
319  %tmp0 = atomicrmw volatile max i64* %ptr, i64 %in seq_cst
320  store i64 %tmp0, i64* %out2
321  ret void
322}
323
324; GCN-LABEL: {{^}}atomic_umax_i64_offset:
325; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
326define amdgpu_kernel void @atomic_umax_i64_offset(i64* %out, i64 %in) {
327entry:
328  %gep = getelementptr i64, i64* %out, i64 4
329  %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst
330  ret void
331}
332
333; GCN-LABEL: {{^}}atomic_umax_i64_ret_offset:
334; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
335; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
336define amdgpu_kernel void @atomic_umax_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
337entry:
338  %gep = getelementptr i64, i64* %out, i64 4
339  %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst
340  store i64 %tmp0, i64* %out2
341  ret void
342}
343
344; GCN-LABEL: {{^}}atomic_umax_i64_addr64_offset:
345; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
346define amdgpu_kernel void @atomic_umax_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
347entry:
348  %ptr = getelementptr i64, i64* %out, i64 %index
349  %gep = getelementptr i64, i64* %ptr, i64 4
350  %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst
351  ret void
352}
353
354; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64_offset:
355; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
356; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
357define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
358entry:
359  %ptr = getelementptr i64, i64* %out, i64 %index
360  %gep = getelementptr i64, i64* %ptr, i64 4
361  %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst
362  store i64 %tmp0, i64* %out2
363  ret void
364}
365
366; GCN-LABEL: {{^}}atomic_umax_i64:
367; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
368define amdgpu_kernel void @atomic_umax_i64(i64* %out, i64 %in) {
369entry:
370  %tmp0 = atomicrmw volatile umax i64* %out, i64 %in seq_cst
371  ret void
372}
373
374; GCN-LABEL: {{^}}atomic_umax_i64_ret:
375; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
376; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
377define amdgpu_kernel void @atomic_umax_i64_ret(i64* %out, i64* %out2, i64 %in) {
378entry:
379  %tmp0 = atomicrmw volatile umax i64* %out, i64 %in seq_cst
380  store i64 %tmp0, i64* %out2
381  ret void
382}
383
384; GCN-LABEL: {{^}}atomic_umax_i64_addr64:
385; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
386define amdgpu_kernel void @atomic_umax_i64_addr64(i64* %out, i64 %in, i64 %index) {
387entry:
388  %ptr = getelementptr i64, i64* %out, i64 %index
389  %tmp0 = atomicrmw volatile umax i64* %ptr, i64 %in seq_cst
390  ret void
391}
392
393; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64:
394; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
395; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
396define amdgpu_kernel void @atomic_umax_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
397entry:
398  %ptr = getelementptr i64, i64* %out, i64 %index
399  %tmp0 = atomicrmw volatile umax i64* %ptr, i64 %in seq_cst
400  store i64 %tmp0, i64* %out2
401  ret void
402}
403
404; GCN-LABEL: {{^}}atomic_min_i64_offset:
405; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
406define amdgpu_kernel void @atomic_min_i64_offset(i64* %out, i64 %in) {
407entry:
408  %gep = getelementptr i64, i64* %out, i64 4
409  %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst
410  ret void
411}
412
413; GCN-LABEL: {{^}}atomic_min_i64_ret_offset:
414; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
415; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
416define amdgpu_kernel void @atomic_min_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
417entry:
418  %gep = getelementptr i64, i64* %out, i64 4
419  %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst
420  store i64 %tmp0, i64* %out2
421  ret void
422}
423
424; GCN-LABEL: {{^}}atomic_min_i64_addr64_offset:
425; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
426define amdgpu_kernel void @atomic_min_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
427entry:
428  %ptr = getelementptr i64, i64* %out, i64 %index
429  %gep = getelementptr i64, i64* %ptr, i64 4
430  %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst
431  ret void
432}
433
434; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64_offset:
435; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
436; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
437define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
438entry:
439  %ptr = getelementptr i64, i64* %out, i64 %index
440  %gep = getelementptr i64, i64* %ptr, i64 4
441  %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst
442  store i64 %tmp0, i64* %out2
443  ret void
444}
445
446; GCN-LABEL: {{^}}atomic_min_i64:
447; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
448define amdgpu_kernel void @atomic_min_i64(i64* %out, i64 %in) {
449entry:
450  %tmp0 = atomicrmw volatile min i64* %out, i64 %in seq_cst
451  ret void
452}
453
454; GCN-LABEL: {{^}}atomic_min_i64_ret:
455; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
456; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
457define amdgpu_kernel void @atomic_min_i64_ret(i64* %out, i64* %out2, i64 %in) {
458entry:
459  %tmp0 = atomicrmw volatile min i64* %out, i64 %in seq_cst
460  store i64 %tmp0, i64* %out2
461  ret void
462}
463
464; GCN-LABEL: {{^}}atomic_min_i64_addr64:
465; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
466define amdgpu_kernel void @atomic_min_i64_addr64(i64* %out, i64 %in, i64 %index) {
467entry:
468  %ptr = getelementptr i64, i64* %out, i64 %index
469  %tmp0 = atomicrmw volatile min i64* %ptr, i64 %in seq_cst
470  ret void
471}
472
473; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64:
474; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
475; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
476define amdgpu_kernel void @atomic_min_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
477entry:
478  %ptr = getelementptr i64, i64* %out, i64 %index
479  %tmp0 = atomicrmw volatile min i64* %ptr, i64 %in seq_cst
480  store i64 %tmp0, i64* %out2
481  ret void
482}
483
484; GCN-LABEL: {{^}}atomic_umin_i64_offset:
485; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
486define amdgpu_kernel void @atomic_umin_i64_offset(i64* %out, i64 %in) {
487entry:
488  %gep = getelementptr i64, i64* %out, i64 4
489  %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst
490  ret void
491}
492
493; GCN-LABEL: {{^}}atomic_umin_i64_ret_offset:
494; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
495; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
496define amdgpu_kernel void @atomic_umin_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
497entry:
498  %gep = getelementptr i64, i64* %out, i64 4
499  %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst
500  store i64 %tmp0, i64* %out2
501  ret void
502}
503
504; GCN-LABEL: {{^}}atomic_umin_i64_addr64_offset:
505; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
506define amdgpu_kernel void @atomic_umin_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
507entry:
508  %ptr = getelementptr i64, i64* %out, i64 %index
509  %gep = getelementptr i64, i64* %ptr, i64 4
510  %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst
511  ret void
512}
513
514; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64_offset:
515; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
516; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
517define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
518entry:
519  %ptr = getelementptr i64, i64* %out, i64 %index
520  %gep = getelementptr i64, i64* %ptr, i64 4
521  %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst
522  store i64 %tmp0, i64* %out2
523  ret void
524}
525
526; GCN-LABEL: {{^}}atomic_umin_i64:
527; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
528define amdgpu_kernel void @atomic_umin_i64(i64* %out, i64 %in) {
529entry:
530  %tmp0 = atomicrmw volatile umin i64* %out, i64 %in seq_cst
531  ret void
532}
533
534; GCN-LABEL: {{^}}atomic_umin_i64_ret:
535; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
536; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
537define amdgpu_kernel void @atomic_umin_i64_ret(i64* %out, i64* %out2, i64 %in) {
538entry:
539  %tmp0 = atomicrmw volatile umin i64* %out, i64 %in seq_cst
540  store i64 %tmp0, i64* %out2
541  ret void
542}
543
544; GCN-LABEL: {{^}}atomic_umin_i64_addr64:
545; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
546define amdgpu_kernel void @atomic_umin_i64_addr64(i64* %out, i64 %in, i64 %index) {
547entry:
548  %ptr = getelementptr i64, i64* %out, i64 %index
549  %tmp0 = atomicrmw volatile umin i64* %ptr, i64 %in seq_cst
550  ret void
551}
552
553; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64:
554; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
555; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
556define amdgpu_kernel void @atomic_umin_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
557entry:
558  %ptr = getelementptr i64, i64* %out, i64 %index
559  %tmp0 = atomicrmw volatile umin i64* %ptr, i64 %in seq_cst
560  store i64 %tmp0, i64* %out2
561  ret void
562}
563
564; GCN-LABEL: {{^}}atomic_or_i64_offset:
565; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
566define amdgpu_kernel void @atomic_or_i64_offset(i64* %out, i64 %in) {
567entry:
568  %gep = getelementptr i64, i64* %out, i64 4
569  %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst
570  ret void
571}
572
573; GCN-LABEL: {{^}}atomic_or_i64_ret_offset:
574; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
575; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
576define amdgpu_kernel void @atomic_or_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
577entry:
578  %gep = getelementptr i64, i64* %out, i64 4
579  %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst
580  store i64 %tmp0, i64* %out2
581  ret void
582}
583
584; GCN-LABEL: {{^}}atomic_or_i64_addr64_offset:
585; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
586define amdgpu_kernel void @atomic_or_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
587entry:
588  %ptr = getelementptr i64, i64* %out, i64 %index
589  %gep = getelementptr i64, i64* %ptr, i64 4
590  %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst
591  ret void
592}
593
594; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64_offset:
595; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
596; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
597define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
598entry:
599  %ptr = getelementptr i64, i64* %out, i64 %index
600  %gep = getelementptr i64, i64* %ptr, i64 4
601  %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst
602  store i64 %tmp0, i64* %out2
603  ret void
604}
605
606; GCN-LABEL: {{^}}atomic_or_i64:
607; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
608define amdgpu_kernel void @atomic_or_i64(i64* %out, i64 %in) {
609entry:
610  %tmp0 = atomicrmw volatile or i64* %out, i64 %in seq_cst
611  ret void
612}
613
614; GCN-LABEL: {{^}}atomic_or_i64_ret:
615; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
616; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
617define amdgpu_kernel void @atomic_or_i64_ret(i64* %out, i64* %out2, i64 %in) {
618entry:
619  %tmp0 = atomicrmw volatile or i64* %out, i64 %in seq_cst
620  store i64 %tmp0, i64* %out2
621  ret void
622}
623
624; GCN-LABEL: {{^}}atomic_or_i64_addr64:
625; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
626define amdgpu_kernel void @atomic_or_i64_addr64(i64* %out, i64 %in, i64 %index) {
627entry:
628  %ptr = getelementptr i64, i64* %out, i64 %index
629  %tmp0 = atomicrmw volatile or i64* %ptr, i64 %in seq_cst
630  ret void
631}
632
633; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64:
634; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
635; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
636define amdgpu_kernel void @atomic_or_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
637entry:
638  %ptr = getelementptr i64, i64* %out, i64 %index
639  %tmp0 = atomicrmw volatile or i64* %ptr, i64 %in seq_cst
640  store i64 %tmp0, i64* %out2
641  ret void
642}
643
644; GCN-LABEL: {{^}}atomic_xchg_i64_offset:
645; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
646define amdgpu_kernel void @atomic_xchg_i64_offset(i64* %out, i64 %in) {
647entry:
648  %gep = getelementptr i64, i64* %out, i64 4
649  %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
650  ret void
651}
652
653; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:
654; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
655; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
656define amdgpu_kernel void @atomic_xchg_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
657entry:
658  %gep = getelementptr i64, i64* %out, i64 4
659  %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
660  store i64 %tmp0, i64* %out2
661  ret void
662}
663
664; GCN-LABEL: {{^}}atomic_xchg_i64_addr64_offset:
665; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
666define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
667entry:
668  %ptr = getelementptr i64, i64* %out, i64 %index
669  %gep = getelementptr i64, i64* %ptr, i64 4
670  %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
671  ret void
672}
673
674; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64_offset:
675; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
676; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
677define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
678entry:
679  %ptr = getelementptr i64, i64* %out, i64 %index
680  %gep = getelementptr i64, i64* %ptr, i64 4
681  %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
682  store i64 %tmp0, i64* %out2
683  ret void
684}
685
686; GCN-LABEL: {{^}}atomic_xchg_i64:
687; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
688define amdgpu_kernel void @atomic_xchg_i64(i64* %out, i64 %in) {
689entry:
690  %tmp0 = atomicrmw volatile xchg i64* %out, i64 %in seq_cst
691  ret void
692}
693
694; GCN-LABEL: {{^}}atomic_xchg_i64_ret:
695; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
696; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
697define amdgpu_kernel void @atomic_xchg_i64_ret(i64* %out, i64* %out2, i64 %in) {
698entry:
699  %tmp0 = atomicrmw volatile xchg i64* %out, i64 %in seq_cst
700  store i64 %tmp0, i64* %out2
701  ret void
702}
703
704; GCN-LABEL: {{^}}atomic_xchg_i64_addr64:
705; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
706define amdgpu_kernel void @atomic_xchg_i64_addr64(i64* %out, i64 %in, i64 %index) {
707entry:
708  %ptr = getelementptr i64, i64* %out, i64 %index
709  %tmp0 = atomicrmw volatile xchg i64* %ptr, i64 %in seq_cst
710  ret void
711}
712
713; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64:
714; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]],  v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
715; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
716define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
717entry:
718  %ptr = getelementptr i64, i64* %out, i64 %index
719  %tmp0 = atomicrmw volatile xchg i64* %ptr, i64 %in seq_cst
720  store i64 %tmp0, i64* %out2
721  ret void
722}
723
724; GCN-LABEL: {{^}}atomic_xor_i64_offset:
725; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
726define amdgpu_kernel void @atomic_xor_i64_offset(i64* %out, i64 %in) {
727entry:
728  %gep = getelementptr i64, i64* %out, i64 4
729  %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst
730  ret void
731}
732
733; GCN-LABEL: {{^}}atomic_xor_i64_ret_offset:
734; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
735; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
736define amdgpu_kernel void @atomic_xor_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
737entry:
738  %gep = getelementptr i64, i64* %out, i64 4
739  %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst
740  store i64 %tmp0, i64* %out2
741  ret void
742}
743
744; GCN-LABEL: {{^}}atomic_xor_i64_addr64_offset:
745; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
746define amdgpu_kernel void @atomic_xor_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
747entry:
748  %ptr = getelementptr i64, i64* %out, i64 %index
749  %gep = getelementptr i64, i64* %ptr, i64 4
750  %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst
751  ret void
752}
753
754; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64_offset:
755; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
756; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
757define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
758entry:
759  %ptr = getelementptr i64, i64* %out, i64 %index
760  %gep = getelementptr i64, i64* %ptr, i64 4
761  %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst
762  store i64 %tmp0, i64* %out2
763  ret void
764}
765
766; GCN-LABEL: {{^}}atomic_xor_i64:
767; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
768define amdgpu_kernel void @atomic_xor_i64(i64* %out, i64 %in) {
769entry:
770  %tmp0 = atomicrmw volatile xor i64* %out, i64 %in seq_cst
771  ret void
772}
773
774; GCN-LABEL: {{^}}atomic_xor_i64_ret:
775; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
776; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
777define amdgpu_kernel void @atomic_xor_i64_ret(i64* %out, i64* %out2, i64 %in) {
778entry:
779  %tmp0 = atomicrmw volatile xor i64* %out, i64 %in seq_cst
780  store i64 %tmp0, i64* %out2
781  ret void
782}
783
784; GCN-LABEL: {{^}}atomic_xor_i64_addr64:
785; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
786define amdgpu_kernel void @atomic_xor_i64_addr64(i64* %out, i64 %in, i64 %index) {
787entry:
788  %ptr = getelementptr i64, i64* %out, i64 %index
789  %tmp0 = atomicrmw volatile xor i64* %ptr, i64 %in seq_cst
790  ret void
791}
792
793; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64:
794; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
795; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
796define amdgpu_kernel void @atomic_xor_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
797entry:
798  %ptr = getelementptr i64, i64* %out, i64 %index
799  %tmp0 = atomicrmw volatile xor i64* %ptr, i64 %in seq_cst
800  store i64 %tmp0, i64* %out2
801  ret void
802}
803
804; GCN-LABEL: {{^}}atomic_load_i64_offset:
805; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
806; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
807define amdgpu_kernel void @atomic_load_i64_offset(i64* %in, i64* %out) {
808entry:
809  %gep = getelementptr i64, i64* %in, i64 4
810  %val = load atomic i64, i64* %gep  seq_cst, align 8
811  store i64 %val, i64* %out
812  ret void
813}
814
815; GCN-LABEL: {{^}}atomic_load_i64:
816; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc
817; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
818define amdgpu_kernel void @atomic_load_i64(i64* %in, i64* %out) {
819entry:
820  %val = load atomic i64, i64* %in seq_cst, align 8
821  store i64 %val, i64* %out
822  ret void
823}
824
825; GCN-LABEL: {{^}}atomic_load_i64_addr64_offset:
826; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
827; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
828define amdgpu_kernel void @atomic_load_i64_addr64_offset(i64* %in, i64* %out, i64 %index) {
829entry:
830  %ptr = getelementptr i64, i64* %in, i64 %index
831  %gep = getelementptr i64, i64* %ptr, i64 4
832  %val = load atomic i64, i64* %gep seq_cst, align 8
833  store i64 %val, i64* %out
834  ret void
835}
836
837; GCN-LABEL: {{^}}atomic_load_i64_addr64:
838; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
839; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
840define amdgpu_kernel void @atomic_load_i64_addr64(i64* %in, i64* %out, i64 %index) {
841entry:
842  %ptr = getelementptr i64, i64* %in, i64 %index
843  %val = load atomic i64, i64* %ptr seq_cst, align 8
844  store i64 %val, i64* %out
845  ret void
846}
847
848; GCN-LABEL: {{^}}atomic_store_i64_offset:
849; GCN: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
850define amdgpu_kernel void @atomic_store_i64_offset(i64 %in, i64* %out) {
851entry:
852  %gep = getelementptr i64, i64* %out, i64 4
853  store atomic i64 %in, i64* %gep  seq_cst, align 8
854  ret void
855}
856
857; GCN-LABEL: {{^}}atomic_store_i64:
858; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}]
859define amdgpu_kernel void @atomic_store_i64(i64 %in, i64* %out) {
860entry:
861  store atomic i64 %in, i64* %out seq_cst, align 8
862  ret void
863}
864
865; GCN-LABEL: {{^}}atomic_store_i64_addr64_offset:
866; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
867define amdgpu_kernel void @atomic_store_i64_addr64_offset(i64 %in, i64* %out, i64 %index) {
868entry:
869  %ptr = getelementptr i64, i64* %out, i64 %index
870  %gep = getelementptr i64, i64* %ptr, i64 4
871  store atomic i64 %in, i64* %gep seq_cst, align 8
872  ret void
873}
874
875; GCN-LABEL: {{^}}atomic_store_i64_addr64:
876; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
877define amdgpu_kernel void @atomic_store_i64_addr64(i64 %in, i64* %out, i64 %index) {
878entry:
879  %ptr = getelementptr i64, i64* %out, i64 %index
880  store atomic i64 %in, i64* %ptr seq_cst, align 8
881  ret void
882}
883
884; GCN-LABEL: {{^}}atomic_cmpxchg_i64_offset:
885; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
886define amdgpu_kernel void @atomic_cmpxchg_i64_offset(i64* %out, i64 %in, i64 %old) {
887entry:
888  %gep = getelementptr i64, i64* %out, i64 4
889  %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
890  ret void
891}
892
893; GCN-LABEL: {{^}}atomic_cmpxchg_i64_soffset:
894; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
895define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(i64* %out, i64 %in, i64 %old) {
896entry:
897  %gep = getelementptr i64, i64* %out, i64 9000
898  %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
899  ret void
900}
901
902; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_offset:
903; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
904; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
905define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(i64* %out, i64* %out2, i64 %in, i64 %old) {
906entry:
907  %gep = getelementptr i64, i64* %out, i64 4
908  %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
909  %extract0 = extractvalue { i64, i1 } %val, 0
910  store i64 %extract0, i64* %out2
911  ret void
912}
913
914; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64_offset:
915; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
916define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(i64* %out, i64 %in, i64 %index, i64 %old) {
917entry:
918  %ptr = getelementptr i64, i64* %out, i64 %index
919  %gep = getelementptr i64, i64* %ptr, i64 4
920  %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
921  ret void
922}
923
924; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64_offset:
925; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
926; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
927define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index, i64 %old) {
928entry:
929  %ptr = getelementptr i64, i64* %out, i64 %index
930  %gep = getelementptr i64, i64* %ptr, i64 4
931  %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
932  %extract0 = extractvalue { i64, i1 } %val, 0
933  store i64 %extract0, i64* %out2
934  ret void
935}
936
937; GCN-LABEL: {{^}}atomic_cmpxchg_i64:
938; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
939define amdgpu_kernel void @atomic_cmpxchg_i64(i64* %out, i64 %in, i64 %old) {
940entry:
941  %val = cmpxchg volatile i64* %out, i64 %old, i64 %in seq_cst seq_cst
942  ret void
943}
944
945; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret:
946; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
947; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
948define amdgpu_kernel void @atomic_cmpxchg_i64_ret(i64* %out, i64* %out2, i64 %in, i64 %old) {
949entry:
950  %val = cmpxchg volatile i64* %out, i64 %old, i64 %in seq_cst seq_cst
951  %extract0 = extractvalue { i64, i1 } %val, 0
952  store i64 %extract0, i64* %out2
953  ret void
954}
955
956; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64:
957; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
958define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(i64* %out, i64 %in, i64 %index, i64 %old) {
959entry:
960  %ptr = getelementptr i64, i64* %out, i64 %index
961  %val = cmpxchg volatile i64* %ptr, i64 %old, i64 %in seq_cst seq_cst
962  ret void
963}
964
965; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64:
966; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
967; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
968define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index, i64 %old) {
969entry:
970  %ptr = getelementptr i64, i64* %out, i64 %index
971  %val = cmpxchg volatile i64* %ptr, i64 %old, i64 %in seq_cst seq_cst
972  %extract0 = extractvalue { i64, i1 } %val, 0
973  store i64 %extract0, i64* %out2
974  ret void
975}
976