1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel < %s -march=amdgcn -mcpu=gfx90a -verify-machineinstrs | FileCheck %s -check-prefix=GFX90A
3
4declare double @llvm.amdgcn.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i1)
5declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
6declare double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32 immarg)
7declare double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
8declare double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32 immarg)
9declare double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
10declare double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32 immarg)
11declare double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
12declare double @llvm.amdgcn.global.atomic.fmin.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
13declare double @llvm.amdgcn.global.atomic.fmax.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
14declare double @llvm.amdgcn.flat.atomic.fadd.f64.p0f64.f64(double* %ptr, double %data)
15declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %ptr, double %data)
16declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %ptr, double %data)
17declare double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* nocapture, double, i32, i32, i1)
18
19define amdgpu_kernel void @buffer_atomic_add_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
20; GFX90A-LABEL: buffer_atomic_add_noret_f64:
21; GFX90A:       ; %bb.0: ; %main_body
22; GFX90A-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
23; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
24; GFX90A-NEXT:    s_load_dword s8, s[0:1], 0x3c
25; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
26; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
27; GFX90A-NEXT:    v_mov_b32_e32 v2, s8
28; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 offen glc
29; GFX90A-NEXT:    s_endpgm
30main_body:
31  %ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
32  ret void
33}
34
35define amdgpu_ps void @buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
36; GFX90A-LABEL: buffer_atomic_add_rtn_f64:
37; GFX90A:       ; %bb.0: ; %main_body
38; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen glc
39; GFX90A-NEXT:    s_waitcnt vmcnt(0)
40; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
41; GFX90A-NEXT:    s_endpgm
42main_body:
43  %ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
44  store double %ret, double* undef
45  ret void
46}
47
48define amdgpu_kernel void @buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) {
49; GFX90A-LABEL: buffer_atomic_add_rtn_f64_off4_slc:
50; GFX90A:       ; %bb.0: ; %main_body
51; GFX90A-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
52; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
53; GFX90A-NEXT:    s_load_dword s10, s[0:1], 0x3c
54; GFX90A-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x44
55; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
56; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
57; GFX90A-NEXT:    v_mov_b32_e32 v2, s10
58; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[4:7], 4 offen glc slc
59; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
60; GFX90A-NEXT:    s_waitcnt vmcnt(0)
61; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
62; GFX90A-NEXT:    s_endpgm
63main_body:
64  %ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1)
65  store double %ret, double addrspace(1)* %out, align 8
66  ret void
67}
68
69define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
70; GFX90A-LABEL: raw_buffer_atomic_add_noret_f64:
71; GFX90A:       ; %bb.0: ; %main_body
72; GFX90A-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
73; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
74; GFX90A-NEXT:    s_load_dword s8, s[0:1], 0x3c
75; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
76; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
77; GFX90A-NEXT:    v_mov_b32_e32 v2, s8
78; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 offen glc
79; GFX90A-NEXT:    s_endpgm
80main_body:
81  %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
82  ret void
83}
84
85define amdgpu_ps void @raw_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
86; GFX90A-LABEL: raw_buffer_atomic_add_rtn_f64:
87; GFX90A:       ; %bb.0: ; %main_body
88; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen glc
89; GFX90A-NEXT:    s_waitcnt vmcnt(0)
90; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
91; GFX90A-NEXT:    s_endpgm
92main_body:
93  %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
94  store double %ret, double* undef
95  ret void
96}
97
98define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) {
99; GFX90A-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc:
100; GFX90A:       ; %bb.0: ; %main_body
101; GFX90A-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
102; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
103; GFX90A-NEXT:    s_load_dword s10, s[0:1], 0x3c
104; GFX90A-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x44
105; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
106; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
107; GFX90A-NEXT:    v_mov_b32_e32 v2, s10
108; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[4:7], 4 offen glc slc
109; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
110; GFX90A-NEXT:    s_waitcnt vmcnt(0)
111; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
112; GFX90A-NEXT:    s_endpgm
113main_body:
114  %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
115  store double %ret, double addrspace(1)* %out, align 8
116  ret void
117}
118
119define amdgpu_kernel void @struct_buffer_atomic_add_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
120; GFX90A-LABEL: struct_buffer_atomic_add_noret_f64:
121; GFX90A:       ; %bb.0: ; %main_body
122; GFX90A-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
123; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
124; GFX90A-NEXT:    s_load_dword s8, s[0:1], 0x3c
125; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
126; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
127; GFX90A-NEXT:    v_mov_b32_e32 v2, s8
128; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen glc
129; GFX90A-NEXT:    s_endpgm
130main_body:
131  %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
132  ret void
133}
134
135define amdgpu_ps void @struct_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
136; GFX90A-LABEL: struct_buffer_atomic_add_rtn_f64:
137; GFX90A:       ; %bb.0: ; %main_body
138; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen glc
139; GFX90A-NEXT:    s_waitcnt vmcnt(0)
140; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
141; GFX90A-NEXT:    s_endpgm
142main_body:
143  %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
144  store double %ret, double* undef
145  ret void
146}
147
148define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) {
149; GFX90A-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc:
150; GFX90A:       ; %bb.0: ; %main_body
151; GFX90A-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
152; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
153; GFX90A-NEXT:    s_load_dword s10, s[0:1], 0x3c
154; GFX90A-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x44
155; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
156; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
157; GFX90A-NEXT:    v_mov_b32_e32 v2, s10
158; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 glc slc
159; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
160; GFX90A-NEXT:    s_waitcnt vmcnt(0)
161; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
162; GFX90A-NEXT:    s_endpgm
163main_body:
164  %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
165  store double %ret, double addrspace(1)* %out, align 8
166  ret void
167}
168
169define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
170; GFX90A-LABEL: raw_buffer_atomic_min_noret_f64:
171; GFX90A:       ; %bb.0: ; %main_body
172; GFX90A-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
173; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
174; GFX90A-NEXT:    s_load_dword s8, s[0:1], 0x3c
175; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
176; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
177; GFX90A-NEXT:    v_mov_b32_e32 v2, s8
178; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 offen glc
179; GFX90A-NEXT:    s_endpgm
180main_body:
181  %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
182  ret void
183}
184
185define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
186; GFX90A-LABEL: raw_buffer_atomic_min_rtn_f64:
187; GFX90A:       ; %bb.0: ; %main_body
188; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen glc
189; GFX90A-NEXT:    s_waitcnt vmcnt(0)
190; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
191; GFX90A-NEXT:    s_endpgm
192main_body:
193  %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
194  store double %ret, double* undef
195  ret void
196}
197
198define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) {
199; GFX90A-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
200; GFX90A:       ; %bb.0: ; %main_body
201; GFX90A-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
202; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
203; GFX90A-NEXT:    s_load_dword s10, s[0:1], 0x3c
204; GFX90A-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x44
205; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
206; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
207; GFX90A-NEXT:    v_mov_b32_e32 v2, s10
208; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[4:7], 4 offen glc slc
209; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
210; GFX90A-NEXT:    s_waitcnt vmcnt(0)
211; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
212; GFX90A-NEXT:    s_endpgm
213main_body:
214  %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
215  store double %ret, double addrspace(1)* %out, align 8
216  ret void
217}
218
219define amdgpu_kernel void @struct_buffer_atomic_min_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
220; GFX90A-LABEL: struct_buffer_atomic_min_noret_f64:
221; GFX90A:       ; %bb.0: ; %main_body
222; GFX90A-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
223; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
224; GFX90A-NEXT:    s_load_dword s8, s[0:1], 0x3c
225; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
226; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
227; GFX90A-NEXT:    v_mov_b32_e32 v2, s8
228; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 idxen glc
229; GFX90A-NEXT:    s_endpgm
230main_body:
231  %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
232  ret void
233}
234
235define amdgpu_ps void @struct_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
236; GFX90A-LABEL: struct_buffer_atomic_min_rtn_f64:
237; GFX90A:       ; %bb.0: ; %main_body
238; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen glc
239; GFX90A-NEXT:    s_waitcnt vmcnt(0)
240; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
241; GFX90A-NEXT:    s_endpgm
242main_body:
243  %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
244  store double %ret, double* undef
245  ret void
246}
247
248define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) {
249; GFX90A-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc:
250; GFX90A:       ; %bb.0: ; %main_body
251; GFX90A-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
252; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
253; GFX90A-NEXT:    s_load_dword s10, s[0:1], 0x3c
254; GFX90A-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x44
255; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
256; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
257; GFX90A-NEXT:    v_mov_b32_e32 v2, s10
258; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 glc slc
259; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
260; GFX90A-NEXT:    s_waitcnt vmcnt(0)
261; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
262; GFX90A-NEXT:    s_endpgm
263main_body:
264  %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
265  store double %ret, double addrspace(1)* %out, align 8
266  ret void
267}
268
269define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
270; GFX90A-LABEL: raw_buffer_atomic_max_noret_f64:
271; GFX90A:       ; %bb.0: ; %main_body
272; GFX90A-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
273; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
274; GFX90A-NEXT:    s_load_dword s8, s[0:1], 0x3c
275; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
276; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
277; GFX90A-NEXT:    v_mov_b32_e32 v2, s8
278; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 offen glc
279; GFX90A-NEXT:    s_endpgm
280main_body:
281  %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
282  ret void
283}
284
285define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
286; GFX90A-LABEL: raw_buffer_atomic_max_rtn_f64:
287; GFX90A:       ; %bb.0: ; %main_body
288; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen glc
289; GFX90A-NEXT:    s_waitcnt vmcnt(0)
290; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
291; GFX90A-NEXT:    s_endpgm
292main_body:
293  %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
294  store double %ret, double* undef
295  ret void
296}
297
298define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) {
299; GFX90A-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
300; GFX90A:       ; %bb.0: ; %main_body
301; GFX90A-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
302; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
303; GFX90A-NEXT:    s_load_dword s10, s[0:1], 0x3c
304; GFX90A-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x44
305; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
306; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
307; GFX90A-NEXT:    v_mov_b32_e32 v2, s10
308; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[4:7], 4 offen glc slc
309; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
310; GFX90A-NEXT:    s_waitcnt vmcnt(0)
311; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
312; GFX90A-NEXT:    s_endpgm
313main_body:
314  %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
315  store double %ret, double addrspace(1)* %out, align 8
316  ret void
317}
318
319define amdgpu_kernel void @struct_buffer_atomic_max_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
320; GFX90A-LABEL: struct_buffer_atomic_max_noret_f64:
321; GFX90A:       ; %bb.0: ; %main_body
322; GFX90A-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
323; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
324; GFX90A-NEXT:    s_load_dword s8, s[0:1], 0x3c
325; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
326; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
327; GFX90A-NEXT:    v_mov_b32_e32 v2, s8
328; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 idxen glc
329; GFX90A-NEXT:    s_endpgm
330main_body:
331  %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
332  ret void
333}
334
335define amdgpu_ps void @struct_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
336; GFX90A-LABEL: struct_buffer_atomic_max_rtn_f64:
337; GFX90A:       ; %bb.0: ; %main_body
338; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen glc
339; GFX90A-NEXT:    s_waitcnt vmcnt(0)
340; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
341; GFX90A-NEXT:    s_endpgm
342main_body:
343  %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
344  store double %ret, double* undef
345  ret void
346}
347
348define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) {
349; GFX90A-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc:
350; GFX90A:       ; %bb.0: ; %main_body
351; GFX90A-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
352; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
353; GFX90A-NEXT:    s_load_dword s10, s[0:1], 0x3c
354; GFX90A-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x44
355; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
356; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
357; GFX90A-NEXT:    v_mov_b32_e32 v2, s10
358; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 glc slc
359; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
360; GFX90A-NEXT:    s_waitcnt vmcnt(0)
361; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
362; GFX90A-NEXT:    s_endpgm
363main_body:
364  %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
365  store double %ret, double addrspace(1)* %out, align 8
366  ret void
367}
368
369define amdgpu_kernel void @global_atomic_fadd_f64_noret(double addrspace(1)* %ptr, double %data) {
370; GFX90A-LABEL: global_atomic_fadd_f64_noret:
371; GFX90A:       ; %bb.0: ; %main_body
372; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
373; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
374; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
375; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
376; GFX90A-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc
377; GFX90A-NEXT:    s_endpgm
378main_body:
379  %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
380  ret void
381}
382
383define amdgpu_kernel void @global_atomic_fmin_f64_noret(double addrspace(1)* %ptr, double %data) {
384; GFX90A-LABEL: global_atomic_fmin_f64_noret:
385; GFX90A:       ; %bb.0: ; %main_body
386; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
387; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
388; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
389; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
390; GFX90A-NEXT:    global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off glc
391; GFX90A-NEXT:    s_endpgm
392main_body:
393  %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
394  ret void
395}
396
397define amdgpu_kernel void @global_atomic_fmax_f64_noret(double addrspace(1)* %ptr, double %data) {
398; GFX90A-LABEL: global_atomic_fmax_f64_noret:
399; GFX90A:       ; %bb.0: ; %main_body
400; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
401; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
402; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
403; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
404; GFX90A-NEXT:    global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off glc
405; GFX90A-NEXT:    s_endpgm
406main_body:
407  %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
408  ret void
409}
410
411define double @global_atomic_fadd_f64_rtn(double addrspace(1)* %ptr, double %data) {
412; GFX90A-LABEL: global_atomic_fadd_f64_rtn:
413; GFX90A:       ; %bb.0: ; %main_body
414; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
415; GFX90A-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc
416; GFX90A-NEXT:    s_waitcnt vmcnt(0)
417; GFX90A-NEXT:    s_setpc_b64 s[30:31]
418main_body:
419  %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
420  ret double %ret
421}
422
423define double @global_atomic_fmax_f64_rtn(double addrspace(1)* %ptr, double %data) {
424; GFX90A-LABEL: global_atomic_fmax_f64_rtn:
425; GFX90A:       ; %bb.0: ; %main_body
426; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
427; GFX90A-NEXT:    global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off glc
428; GFX90A-NEXT:    s_waitcnt vmcnt(0)
429; GFX90A-NEXT:    s_setpc_b64 s[30:31]
430main_body:
431  %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
432  ret double %ret
433}
434
435define double @global_atomic_fmin_f64_rtn(double addrspace(1)* %ptr, double %data) {
436; GFX90A-LABEL: global_atomic_fmin_f64_rtn:
437; GFX90A:       ; %bb.0: ; %main_body
438; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
439; GFX90A-NEXT:    global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off glc
440; GFX90A-NEXT:    s_waitcnt vmcnt(0)
441; GFX90A-NEXT:    s_setpc_b64 s[30:31]
442main_body:
443  %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
444  ret double %ret
445}
446
447define amdgpu_kernel void @flat_atomic_fadd_f64_noret(double* %ptr, double %data) {
448; GFX90A-LABEL: flat_atomic_fadd_f64_noret:
449; GFX90A:       ; %bb.0: ; %main_body
450; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
451; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
452; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
453; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
454; GFX90A-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
455; GFX90A-NEXT:    s_endpgm
456main_body:
457  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0f64.f64(double* %ptr, double %data)
458  ret void
459}
460
461define double @flat_atomic_fadd_f64_rtn(double* %ptr, double %data) {
462; GFX90A-LABEL: flat_atomic_fadd_f64_rtn:
463; GFX90A:       ; %bb.0: ; %main_body
464; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
465; GFX90A-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
466; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
467; GFX90A-NEXT:    s_setpc_b64 s[30:31]
468main_body:
469  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0f64.f64(double* %ptr, double %data)
470  ret double %ret
471}
472
473define amdgpu_kernel void @flat_atomic_fmin_f64_noret(double* %ptr, double %data) {
474; GFX90A-LABEL: flat_atomic_fmin_f64_noret:
475; GFX90A:       ; %bb.0: ; %main_body
476; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
477; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
478; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
479; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
480; GFX90A-NEXT:    flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc
481; GFX90A-NEXT:    s_endpgm
482main_body:
483  %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %ptr, double %data)
484  ret void
485}
486
487define double @flat_atomic_fmin_f64_rtn(double* %ptr, double %data) {
488; GFX90A-LABEL: flat_atomic_fmin_f64_rtn:
489; GFX90A:       ; %bb.0: ; %main_body
490; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
491; GFX90A-NEXT:    flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc
492; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
493; GFX90A-NEXT:    s_setpc_b64 s[30:31]
494main_body:
495  %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %ptr, double %data)
496  ret double %ret
497}
498
499define amdgpu_kernel void @flat_atomic_fmax_f64_noret(double* %ptr, double %data) {
500; GFX90A-LABEL: flat_atomic_fmax_f64_noret:
501; GFX90A:       ; %bb.0: ; %main_body
502; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
503; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
504; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
505; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
506; GFX90A-NEXT:    flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] glc
507; GFX90A-NEXT:    s_endpgm
508main_body:
509  %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %ptr, double %data)
510  ret void
511}
512
513define double @flat_atomic_fmax_f64_rtn(double* %ptr, double %data) {
514; GFX90A-LABEL: flat_atomic_fmax_f64_rtn:
515; GFX90A:       ; %bb.0: ; %main_body
516; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
517; GFX90A-NEXT:    flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] glc
518; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
519; GFX90A-NEXT:    s_setpc_b64 s[30:31]
520main_body:
521  %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %ptr, double %data)
522  ret double %ret
523}
524
525define amdgpu_kernel void @local_atomic_fadd_f64_noret(double addrspace(3)* %ptr, double %data) {
526; GFX90A-LABEL: local_atomic_fadd_f64_noret:
527; GFX90A:       ; %bb.0: ; %main_body
528; GFX90A-NEXT:    s_load_dword s4, s[0:1], 0x24
529; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
530; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
531; GFX90A-NEXT:    v_mov_b32_e32 v2, s4
532; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
533; GFX90A-NEXT:    ds_add_rtn_f64 v[0:1], v2, v[0:1]
534; GFX90A-NEXT:    s_endpgm
535main_body:
536  %ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0)
537  ret void
538}
539
540define double @local_atomic_fadd_f64_rtn(double addrspace(3)* %ptr, double %data) {
541; GFX90A-LABEL: local_atomic_fadd_f64_rtn:
542; GFX90A:       ; %bb.0: ; %main_body
543; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
544; GFX90A-NEXT:    v_mov_b32_e32 v4, v1
545; GFX90A-NEXT:    v_mov_b32_e32 v5, v2
546; GFX90A-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[4:5]
547; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
548; GFX90A-NEXT:    s_setpc_b64 s[30:31]
549main_body:
550  %ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0)
551  ret double %ret
552}
553
554define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(double addrspace(3)* %ptr) {
555; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat:
556; GFX90A:       ; %bb.0: ; %main_body
557; GFX90A-NEXT:    s_load_dword s0, s[0:1], 0x24
558; GFX90A-NEXT:    v_mov_b32_e32 v0, 0
559; GFX90A-NEXT:    v_mov_b32_e32 v1, 0x40100000
560; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
561; GFX90A-NEXT:    v_mov_b32_e32 v2, s0
562; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
563; GFX90A-NEXT:    ds_add_rtn_f64 v[0:1], v2, v[0:1]
564; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
565; GFX90A-NEXT:    s_endpgm
566main_body:
567  %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst
568  ret void
569}
570
571define double @local_atomic_fadd_f64_rtn_pat(double addrspace(3)* %ptr, double %data) {
572; GFX90A-LABEL: local_atomic_fadd_f64_rtn_pat:
573; GFX90A:       ; %bb.0: ; %main_body
574; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
575; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
576; GFX90A-NEXT:    v_mov_b32_e32 v3, 0x40100000
577; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
578; GFX90A-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
579; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
580; GFX90A-NEXT:    s_setpc_b64 s[30:31]
581main_body:
582  %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst
583  ret double %ret
584}
585