1; RUN: llc -march=amdgcn -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,SI,SICIVI %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,SICIVI,GFX89 %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9,GFX89 %s
4
5; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64:
6; SICIVI: s_mov_b32 m0
7; GFX9-NOT: m0
8
9; GCN: ds_wrxchg_rtn_b64
10; GCN: s_endpgm
11define amdgpu_kernel void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
12  %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
13  store i64 %result, i64 addrspace(1)* %out, align 8
14  ret void
15}
16
17; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset:
18; SICIVI: s_mov_b32 m0
19; GFX9-NOT: m0
20
21; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
22; GCN: s_endpgm
23define amdgpu_kernel void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
24  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
25  %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
26  store i64 %result, i64 addrspace(1)* %out, align 8
27  ret void
28}
29
30; GCN-LABEL: {{^}}lds_atomic_xchg_ret_f64_offset:
31; SICIVI: s_mov_b32 m0
32; GFX9-NOT: m0
33
34; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
35; GCN: s_endpgm
36define amdgpu_kernel void @lds_atomic_xchg_ret_f64_offset(double addrspace(1)* %out, double addrspace(3)* %ptr) nounwind {
37  %gep = getelementptr double, double addrspace(3)* %ptr, i32 4
38  %result = atomicrmw xchg double addrspace(3)* %gep, double 4.0 seq_cst
39  store double %result, double addrspace(1)* %out, align 8
40  ret void
41}
42
43; GCN-LABEL: {{^}}lds_atomic_add_ret_i64:
44; SICIVI: s_mov_b32 m0
45; GFX9-NOT: m0
46
47; GCN: ds_add_rtn_u64
48; GCN: s_endpgm
49define amdgpu_kernel void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
50  %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
51  store i64 %result, i64 addrspace(1)* %out, align 8
52  ret void
53}
54
55; GCN-LABEL: {{^}}lds_atomic_add_ret_i64_offset:
56; SICIVI-DAG: s_mov_b32 m0
57; GFX9-NOT: m0
58
59; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
60; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
61; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
62; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
63; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
64; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32
65; GCN: buffer_store_dwordx2 [[RESULT]],
66; GCN: s_endpgm
67define amdgpu_kernel void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
68  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4
69  %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
70  store i64 %result, i64 addrspace(1)* %out, align 8
71  ret void
72}
73
74; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64:
75; SICIVI-DAG: s_mov_b32 m0
76; GFX9-NOT: m0
77
78; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
79; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
80; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
81; GCN: buffer_store_dwordx2 [[RESULT]],
82; GCN: s_endpgm
83define amdgpu_kernel void @lds_atomic_add1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
84  %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
85  store i64 %result, i64 addrspace(1)* %out, align 8
86  ret void
87}
88
89; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64_offset:
90; SICIVI: s_mov_b32 m0
91; GFX9-NOT: m0
92
93; GCN: ds_add_rtn_u64 {{.*}} offset:32
94; GCN: s_endpgm
95define amdgpu_kernel void @lds_atomic_add1_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
96  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
97  %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
98  store i64 %result, i64 addrspace(1)* %out, align 8
99  ret void
100}
101
102; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64:
103; SICIVI: s_mov_b32 m0
104; GFX9-NOT: m0
105
106; GCN: ds_sub_rtn_u64
107; GCN: s_endpgm
108define amdgpu_kernel void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
109  %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
110  store i64 %result, i64 addrspace(1)* %out, align 8
111  ret void
112}
113
114; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64_offset:
115; SICIVI: s_mov_b32 m0
116; GFX9-NOT: m0
117
118; GCN: ds_sub_rtn_u64 {{.*}} offset:32
119; GCN: s_endpgm
120define amdgpu_kernel void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
121  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
122  %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
123  store i64 %result, i64 addrspace(1)* %out, align 8
124  ret void
125}
126
127; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64:
128; SICIVI-DAG: s_mov_b32 m0
129; GFX9-NOT: m0
130
131; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
132; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
133; GCN: ds_sub_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
134; GCN: buffer_store_dwordx2 [[RESULT]],
135; GCN: s_endpgm
136define amdgpu_kernel void @lds_atomic_sub1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
137  %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
138  store i64 %result, i64 addrspace(1)* %out, align 8
139  ret void
140}
141
142; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64_offset:
143; SICIVI: s_mov_b32 m0
144; GFX9-NOT: m0
145
146; GCN: ds_sub_rtn_u64 {{.*}} offset:32
147; GCN: s_endpgm
148define amdgpu_kernel void @lds_atomic_sub1_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
149  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
150  %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
151  store i64 %result, i64 addrspace(1)* %out, align 8
152  ret void
153}
154
155; GCN-LABEL: {{^}}lds_atomic_and_ret_i64:
156; SICIVI: s_mov_b32 m0
157; GFX9-NOT: m0
158
159; GCN: ds_and_rtn_b64
160; GCN: s_endpgm
161define amdgpu_kernel void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
162  %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
163  store i64 %result, i64 addrspace(1)* %out, align 8
164  ret void
165}
166
167; GCN-LABEL: {{^}}lds_atomic_and_ret_i64_offset:
168; SICIVI: s_mov_b32 m0
169; GFX9-NOT: m0
170
171; GCN: ds_and_rtn_b64 {{.*}} offset:32
172; GCN: s_endpgm
173define amdgpu_kernel void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
174  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
175  %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
176  store i64 %result, i64 addrspace(1)* %out, align 8
177  ret void
178}
179
180; GCN-LABEL: {{^}}lds_atomic_or_ret_i64:
181; SICIVI: s_mov_b32 m0
182; GFX9-NOT: m0
183
184; GCN: ds_or_rtn_b64
185; GCN: s_endpgm
186define amdgpu_kernel void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
187  %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
188  store i64 %result, i64 addrspace(1)* %out, align 8
189  ret void
190}
191
192; GCN-LABEL: {{^}}lds_atomic_or_ret_i64_offset:
193; SICIVI: s_mov_b32 m0
194; GFX9-NOT: m0
195
196; GCN: ds_or_rtn_b64 {{.*}} offset:32
197; GCN: s_endpgm
198define amdgpu_kernel void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
199  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
200  %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
201  store i64 %result, i64 addrspace(1)* %out, align 8
202  ret void
203}
204
205; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64:
206; SICIVI: s_mov_b32 m0
207; GFX9-NOT: m0
208
209; GCN: ds_xor_rtn_b64
210; GCN: s_endpgm
211define amdgpu_kernel void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
212  %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
213  store i64 %result, i64 addrspace(1)* %out, align 8
214  ret void
215}
216
217; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64_offset:
218; SICIVI: s_mov_b32 m0
219; GFX9-NOT: m0
220
221; GCN: ds_xor_rtn_b64 {{.*}} offset:32
222; GCN: s_endpgm
223define amdgpu_kernel void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
224  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
225  %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
226  store i64 %result, i64 addrspace(1)* %out, align 8
227  ret void
228}
229
230; FIXME: There is no atomic nand instr
231; XGCN-LABEL: {{^}}lds_atomic_nand_ret_i64:uction, so we somehow need to expand this.
232; define amdgpu_kernel void @lds_atomic_nand_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
233;   %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst
234;   store i64 %result, i64 addrspace(1)* %out, align 8
235;   ret void
236; }
237
238; GCN-LABEL: {{^}}lds_atomic_min_ret_i64:
239; SICIVI: s_mov_b32 m0
240; GFX9-NOT: m0
241
242; GCN: ds_min_rtn_i64
243; GCN: s_endpgm
244define amdgpu_kernel void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
245  %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
246  store i64 %result, i64 addrspace(1)* %out, align 8
247  ret void
248}
249
250; GCN-LABEL: {{^}}lds_atomic_min_ret_i64_offset:
251; SICIVI: s_mov_b32 m0
252; GFX9-NOT: m0
253
254; GCN: ds_min_rtn_i64 {{.*}} offset:32
255; GCN: s_endpgm
256define amdgpu_kernel void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
257  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
258  %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
259  store i64 %result, i64 addrspace(1)* %out, align 8
260  ret void
261}
262
263; GCN-LABEL: {{^}}lds_atomic_max_ret_i64:
264; SICIVI: s_mov_b32 m0
265; GFX9-NOT: m0
266
267; GCN: ds_max_rtn_i64
268; GCN: s_endpgm
269define amdgpu_kernel void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
270  %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
271  store i64 %result, i64 addrspace(1)* %out, align 8
272  ret void
273}
274
275; GCN-LABEL: {{^}}lds_atomic_max_ret_i64_offset:
276; SICIVI: s_mov_b32 m0
277; GFX9-NOT: m0
278
279; GCN: ds_max_rtn_i64 {{.*}} offset:32
280; GCN: s_endpgm
281define amdgpu_kernel void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
282  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
283  %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
284  store i64 %result, i64 addrspace(1)* %out, align 8
285  ret void
286}
287
288; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64:
289; SICIVI: s_mov_b32 m0
290; GFX9-NOT: m0
291
292; GCN: ds_min_rtn_u64
293; GCN: s_endpgm
294define amdgpu_kernel void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
295  %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
296  store i64 %result, i64 addrspace(1)* %out, align 8
297  ret void
298}
299
300; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64_offset:
301; SICIVI: s_mov_b32 m0
302; GFX9-NOT: m0
303
304; GCN: ds_min_rtn_u64 {{.*}} offset:32
305; GCN: s_endpgm
306define amdgpu_kernel void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
307  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
308  %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
309  store i64 %result, i64 addrspace(1)* %out, align 8
310  ret void
311}
312
313; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64:
314; SICIVI: s_mov_b32 m0
315; GFX9-NOT: m0
316
317; GCN: ds_max_rtn_u64
318; GCN: s_endpgm
319define amdgpu_kernel void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
320  %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
321  store i64 %result, i64 addrspace(1)* %out, align 8
322  ret void
323}
324
325; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64_offset:
326; SICIVI: s_mov_b32 m0
327; GFX9-NOT: m0
328
329; GCN: ds_max_rtn_u64 {{.*}} offset:32
330; GCN: s_endpgm
331define amdgpu_kernel void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
332  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
333  %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
334  store i64 %result, i64 addrspace(1)* %out, align 8
335  ret void
336}
337
338; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64:
339; SICIVI: s_mov_b32 m0
340; GFX9-NOT: m0
341
342; GCN: ds_wrxchg_rtn_b64
343; GCN: s_endpgm
344define amdgpu_kernel void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind {
345  %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
346  ret void
347}
348
349; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset:
350; SICIVI: s_mov_b32 m0
351; GFX9-NOT: m0
352
353; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
354; GCN: s_endpgm
355define amdgpu_kernel void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
356  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
357  %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
358  ret void
359}
360
361; GCN-LABEL: {{^}}lds_atomic_add_noret_i64:
362; SICIVI: s_mov_b32 m0
363; GFX9-NOT: m0
364
365; GCN: ds_add_u64
366; GCN: s_endpgm
367define amdgpu_kernel void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind {
368  %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
369  ret void
370}
371
372; GCN-LABEL: {{^}}lds_atomic_add_noret_i64_offset:
373; SICIVI-DAG: s_mov_b32 m0
374; GFX9-NOT: m0
375
376; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
377; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
378; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
379; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
380; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
381; GCN: ds_add_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32
382; GCN: s_endpgm
383define amdgpu_kernel void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
384  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4
385  %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
386  ret void
387}
388
389; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64:
390; SICIVI-DAG: s_mov_b32 m0
391; GFX9-NOT: m0
392
393; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
394; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
395; GCN: ds_add_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
396; GCN: s_endpgm
397define amdgpu_kernel void @lds_atomic_add1_noret_i64(i64 addrspace(3)* %ptr) nounwind {
398  %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
399  ret void
400}
401
402; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64_offset:
403; SICIVI: s_mov_b32 m0
404; GFX9-NOT: m0
405
406; GCN: ds_add_u64 {{.*}} offset:32
407; GCN: s_endpgm
408define amdgpu_kernel void @lds_atomic_add1_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
409  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
410  %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
411  ret void
412}
413
414; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64:
415; SICIVI: s_mov_b32 m0
416; GFX9-NOT: m0
417
418; GCN: ds_sub_u64
419; GCN: s_endpgm
420define amdgpu_kernel void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind {
421  %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
422  ret void
423}
424
425; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64_offset:
426; SICIVI: s_mov_b32 m0
427; GFX9-NOT: m0
428
429; GCN: ds_sub_u64 {{.*}} offset:32
430; GCN: s_endpgm
431define amdgpu_kernel void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
432  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
433  %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
434  ret void
435}
436
437; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64:
438; SICIVI-DAG: s_mov_b32 m0
439; GFX9-NOT: m0
440
441; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
442; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
443; GCN: ds_sub_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
444; GCN: s_endpgm
445define amdgpu_kernel void @lds_atomic_sub1_noret_i64(i64 addrspace(3)* %ptr) nounwind {
446  %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
447  ret void
448}
449
450; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64_offset:
451; SICIVI: s_mov_b32 m0
452; GFX9-NOT: m0
453
454; GCN: ds_sub_u64 {{.*}} offset:32
455; GCN: s_endpgm
456define amdgpu_kernel void @lds_atomic_sub1_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
457  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
458  %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
459  ret void
460}
461
462; GCN-LABEL: {{^}}lds_atomic_and_noret_i64:
463; SICIVI: s_mov_b32 m0
464; GFX9-NOT: m0
465
466; GCN: ds_and_b64
467; GCN: s_endpgm
468define amdgpu_kernel void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind {
469  %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
470  ret void
471}
472
473; GCN-LABEL: {{^}}lds_atomic_and_noret_i64_offset:
474; SICIVI: s_mov_b32 m0
475; GFX9-NOT: m0
476
477; GCN: ds_and_b64 {{.*}} offset:32
478; GCN: s_endpgm
479define amdgpu_kernel void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
480  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
481  %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
482  ret void
483}
484
485; GCN-LABEL: {{^}}lds_atomic_or_noret_i64:
486; SICIVI: s_mov_b32 m0
487; GFX9-NOT: m0
488
489; GCN: ds_or_b64
490; GCN: s_endpgm
491define amdgpu_kernel void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind {
492  %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
493  ret void
494}
495
496; GCN-LABEL: {{^}}lds_atomic_or_noret_i64_offset:
497; SICIVI: s_mov_b32 m0
498; GFX9-NOT: m0
499
500; GCN: ds_or_b64 {{.*}} offset:32
501; GCN: s_endpgm
502define amdgpu_kernel void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
503  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
504  %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
505  ret void
506}
507
508; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64:
509; SICIVI: s_mov_b32 m0
510; GFX9-NOT: m0
511
512; GCN: ds_xor_b64
513; GCN: s_endpgm
514define amdgpu_kernel void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind {
515  %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
516  ret void
517}
518
519; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64_offset:
520; SICIVI: s_mov_b32 m0
521; GFX9-NOT: m0
522
523; GCN: ds_xor_b64 {{.*}} offset:32
524; GCN: s_endpgm
525define amdgpu_kernel void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
526  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
527  %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
528  ret void
529}
530
531; FIXME: There is no atomic nand instr
532; XGCN-LABEL: {{^}}lds_atomic_nand_noret_i64:uction, so we somehow need to expand this.
533; define amdgpu_kernel void @lds_atomic_nand_noret_i64(i64 addrspace(3)* %ptr) nounwind {
534;   %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst
535;   ret void
536; }
537
538; GCN-LABEL: {{^}}lds_atomic_min_noret_i64:
539; SICIVI: s_mov_b32 m0
540; GFX9-NOT: m0
541
542; GCN: ds_min_i64
543; GCN: s_endpgm
544define amdgpu_kernel void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind {
545  %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
546  ret void
547}
548
549; GCN-LABEL: {{^}}lds_atomic_min_noret_i64_offset:
550; SICIVI: s_mov_b32 m0
551; GFX9-NOT: m0
552
553; GCN: ds_min_i64 {{.*}} offset:32
554; GCN: s_endpgm
555define amdgpu_kernel void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
556  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
557  %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
558  ret void
559}
560
561; GCN-LABEL: {{^}}lds_atomic_max_noret_i64:
562; SICIVI: s_mov_b32 m0
563; GFX9-NOT: m0
564
565; GCN: ds_max_i64
566; GCN: s_endpgm
567define amdgpu_kernel void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind {
568  %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
569  ret void
570}
571
572; GCN-LABEL: {{^}}lds_atomic_max_noret_i64_offset:
573; SICIVI: s_mov_b32 m0
574; GFX9-NOT: m0
575
576; GCN: ds_max_i64 {{.*}} offset:32
577; GCN: s_endpgm
578define amdgpu_kernel void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
579  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
580  %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
581  ret void
582}
583
584; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64:
585; SICIVI: s_mov_b32 m0
586; GFX9-NOT: m0
587
588; GCN: ds_min_u64
589; GCN: s_endpgm
590define amdgpu_kernel void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind {
591  %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
592  ret void
593}
594
595; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64_offset:
596; SICIVI: s_mov_b32 m0
597; GFX9-NOT: m0
598
599; GCN: ds_min_u64 {{.*}} offset:32
600; GCN: s_endpgm
601define amdgpu_kernel void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
602  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
603  %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
604  ret void
605}
606
607; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64:
608; SICIVI: s_mov_b32 m0
609; GFX9-NOT: m0
610
611; GCN: ds_max_u64
612; GCN: s_endpgm
613define amdgpu_kernel void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind {
614  %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
615  ret void
616}
617
618; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64_offset:
619; SICIVI: s_mov_b32 m0
620; GFX9-NOT: m0
621
622; GCN: ds_max_u64 {{.*}} offset:32
623; GCN: s_endpgm
624define amdgpu_kernel void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
625  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
626  %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
627  ret void
628}
629