1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s
4
5define amdgpu_ps float @atomic_swap_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
6; GFX9-LABEL: atomic_swap_i32_1d:
7; GFX9:       ; %bb.0: ; %main_body
8; GFX9-NEXT:    s_mov_b32 s0, s2
9; GFX9-NEXT:    s_mov_b32 s1, s3
10; GFX9-NEXT:    s_mov_b32 s2, s4
11; GFX9-NEXT:    s_mov_b32 s3, s5
12; GFX9-NEXT:    s_mov_b32 s4, s6
13; GFX9-NEXT:    s_mov_b32 s5, s7
14; GFX9-NEXT:    s_mov_b32 s6, s8
15; GFX9-NEXT:    s_mov_b32 s7, s9
16; GFX9-NEXT:    image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc a16
17; GFX9-NEXT:    s_waitcnt vmcnt(0)
18; GFX9-NEXT:    ; return to shader part epilog
19;
20; GFX10-LABEL: atomic_swap_i32_1d:
21; GFX10:       ; %bb.0: ; %main_body
22; GFX10-NEXT:    s_mov_b32 s0, s2
23; GFX10-NEXT:    s_mov_b32 s1, s3
24; GFX10-NEXT:    s_mov_b32 s2, s4
25; GFX10-NEXT:    s_mov_b32 s3, s5
26; GFX10-NEXT:    s_mov_b32 s4, s6
27; GFX10-NEXT:    s_mov_b32 s5, s7
28; GFX10-NEXT:    s_mov_b32 s6, s8
29; GFX10-NEXT:    s_mov_b32 s7, s9
30; GFX10-NEXT:    ; implicit-def: $vcc_hi
31; GFX10-NEXT:    image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
32; GFX10-NEXT:    s_waitcnt vmcnt(0)
33; GFX10-NEXT:    ; return to shader part epilog
34main_body:
35  %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
36  %out = bitcast i32 %v to float
37  ret float %out
38}
39
40define amdgpu_ps float @atomic_add_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
41; GFX9-LABEL: atomic_add_i32_1d:
42; GFX9:       ; %bb.0: ; %main_body
43; GFX9-NEXT:    s_mov_b32 s0, s2
44; GFX9-NEXT:    s_mov_b32 s1, s3
45; GFX9-NEXT:    s_mov_b32 s2, s4
46; GFX9-NEXT:    s_mov_b32 s3, s5
47; GFX9-NEXT:    s_mov_b32 s4, s6
48; GFX9-NEXT:    s_mov_b32 s5, s7
49; GFX9-NEXT:    s_mov_b32 s6, s8
50; GFX9-NEXT:    s_mov_b32 s7, s9
51; GFX9-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16
52; GFX9-NEXT:    s_waitcnt vmcnt(0)
53; GFX9-NEXT:    ; return to shader part epilog
54;
55; GFX10-LABEL: atomic_add_i32_1d:
56; GFX10:       ; %bb.0: ; %main_body
57; GFX10-NEXT:    s_mov_b32 s0, s2
58; GFX10-NEXT:    s_mov_b32 s1, s3
59; GFX10-NEXT:    s_mov_b32 s2, s4
60; GFX10-NEXT:    s_mov_b32 s3, s5
61; GFX10-NEXT:    s_mov_b32 s4, s6
62; GFX10-NEXT:    s_mov_b32 s5, s7
63; GFX10-NEXT:    s_mov_b32 s6, s8
64; GFX10-NEXT:    s_mov_b32 s7, s9
65; GFX10-NEXT:    ; implicit-def: $vcc_hi
66; GFX10-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
67; GFX10-NEXT:    s_waitcnt vmcnt(0)
68; GFX10-NEXT:    ; return to shader part epilog
69main_body:
70  %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
71  %out = bitcast i32 %v to float
72  ret float %out
73}
74
75define amdgpu_ps float @atomic_sub_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
76; GFX9-LABEL: atomic_sub_i32_1d:
77; GFX9:       ; %bb.0: ; %main_body
78; GFX9-NEXT:    s_mov_b32 s0, s2
79; GFX9-NEXT:    s_mov_b32 s1, s3
80; GFX9-NEXT:    s_mov_b32 s2, s4
81; GFX9-NEXT:    s_mov_b32 s3, s5
82; GFX9-NEXT:    s_mov_b32 s4, s6
83; GFX9-NEXT:    s_mov_b32 s5, s7
84; GFX9-NEXT:    s_mov_b32 s6, s8
85; GFX9-NEXT:    s_mov_b32 s7, s9
86; GFX9-NEXT:    image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc a16
87; GFX9-NEXT:    s_waitcnt vmcnt(0)
88; GFX9-NEXT:    ; return to shader part epilog
89;
90; GFX10-LABEL: atomic_sub_i32_1d:
91; GFX10:       ; %bb.0: ; %main_body
92; GFX10-NEXT:    s_mov_b32 s0, s2
93; GFX10-NEXT:    s_mov_b32 s1, s3
94; GFX10-NEXT:    s_mov_b32 s2, s4
95; GFX10-NEXT:    s_mov_b32 s3, s5
96; GFX10-NEXT:    s_mov_b32 s4, s6
97; GFX10-NEXT:    s_mov_b32 s5, s7
98; GFX10-NEXT:    s_mov_b32 s6, s8
99; GFX10-NEXT:    s_mov_b32 s7, s9
100; GFX10-NEXT:    ; implicit-def: $vcc_hi
101; GFX10-NEXT:    image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
102; GFX10-NEXT:    s_waitcnt vmcnt(0)
103; GFX10-NEXT:    ; return to shader part epilog
104main_body:
105  %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
106  %out = bitcast i32 %v to float
107  ret float %out
108}
109
110define amdgpu_ps float @atomic_smin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
111; GFX9-LABEL: atomic_smin_i32_1d:
112; GFX9:       ; %bb.0: ; %main_body
113; GFX9-NEXT:    s_mov_b32 s0, s2
114; GFX9-NEXT:    s_mov_b32 s1, s3
115; GFX9-NEXT:    s_mov_b32 s2, s4
116; GFX9-NEXT:    s_mov_b32 s3, s5
117; GFX9-NEXT:    s_mov_b32 s4, s6
118; GFX9-NEXT:    s_mov_b32 s5, s7
119; GFX9-NEXT:    s_mov_b32 s6, s8
120; GFX9-NEXT:    s_mov_b32 s7, s9
121; GFX9-NEXT:    image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc a16
122; GFX9-NEXT:    s_waitcnt vmcnt(0)
123; GFX9-NEXT:    ; return to shader part epilog
124;
125; GFX10-LABEL: atomic_smin_i32_1d:
126; GFX10:       ; %bb.0: ; %main_body
127; GFX10-NEXT:    s_mov_b32 s0, s2
128; GFX10-NEXT:    s_mov_b32 s1, s3
129; GFX10-NEXT:    s_mov_b32 s2, s4
130; GFX10-NEXT:    s_mov_b32 s3, s5
131; GFX10-NEXT:    s_mov_b32 s4, s6
132; GFX10-NEXT:    s_mov_b32 s5, s7
133; GFX10-NEXT:    s_mov_b32 s6, s8
134; GFX10-NEXT:    s_mov_b32 s7, s9
135; GFX10-NEXT:    ; implicit-def: $vcc_hi
136; GFX10-NEXT:    image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
137; GFX10-NEXT:    s_waitcnt vmcnt(0)
138; GFX10-NEXT:    ; return to shader part epilog
139main_body:
140  %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
141  %out = bitcast i32 %v to float
142  ret float %out
143}
144
145define amdgpu_ps float @atomic_umin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
146; GFX9-LABEL: atomic_umin_i32_1d:
147; GFX9:       ; %bb.0: ; %main_body
148; GFX9-NEXT:    s_mov_b32 s0, s2
149; GFX9-NEXT:    s_mov_b32 s1, s3
150; GFX9-NEXT:    s_mov_b32 s2, s4
151; GFX9-NEXT:    s_mov_b32 s3, s5
152; GFX9-NEXT:    s_mov_b32 s4, s6
153; GFX9-NEXT:    s_mov_b32 s5, s7
154; GFX9-NEXT:    s_mov_b32 s6, s8
155; GFX9-NEXT:    s_mov_b32 s7, s9
156; GFX9-NEXT:    image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc a16
157; GFX9-NEXT:    s_waitcnt vmcnt(0)
158; GFX9-NEXT:    ; return to shader part epilog
159;
160; GFX10-LABEL: atomic_umin_i32_1d:
161; GFX10:       ; %bb.0: ; %main_body
162; GFX10-NEXT:    s_mov_b32 s0, s2
163; GFX10-NEXT:    s_mov_b32 s1, s3
164; GFX10-NEXT:    s_mov_b32 s2, s4
165; GFX10-NEXT:    s_mov_b32 s3, s5
166; GFX10-NEXT:    s_mov_b32 s4, s6
167; GFX10-NEXT:    s_mov_b32 s5, s7
168; GFX10-NEXT:    s_mov_b32 s6, s8
169; GFX10-NEXT:    s_mov_b32 s7, s9
170; GFX10-NEXT:    ; implicit-def: $vcc_hi
171; GFX10-NEXT:    image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
172; GFX10-NEXT:    s_waitcnt vmcnt(0)
173; GFX10-NEXT:    ; return to shader part epilog
174main_body:
175  %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
176  %out = bitcast i32 %v to float
177  ret float %out
178}
179
180define amdgpu_ps float @atomic_smax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
181; GFX9-LABEL: atomic_smax_i32_1d:
182; GFX9:       ; %bb.0: ; %main_body
183; GFX9-NEXT:    s_mov_b32 s0, s2
184; GFX9-NEXT:    s_mov_b32 s1, s3
185; GFX9-NEXT:    s_mov_b32 s2, s4
186; GFX9-NEXT:    s_mov_b32 s3, s5
187; GFX9-NEXT:    s_mov_b32 s4, s6
188; GFX9-NEXT:    s_mov_b32 s5, s7
189; GFX9-NEXT:    s_mov_b32 s6, s8
190; GFX9-NEXT:    s_mov_b32 s7, s9
191; GFX9-NEXT:    image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc a16
192; GFX9-NEXT:    s_waitcnt vmcnt(0)
193; GFX9-NEXT:    ; return to shader part epilog
194;
195; GFX10-LABEL: atomic_smax_i32_1d:
196; GFX10:       ; %bb.0: ; %main_body
197; GFX10-NEXT:    s_mov_b32 s0, s2
198; GFX10-NEXT:    s_mov_b32 s1, s3
199; GFX10-NEXT:    s_mov_b32 s2, s4
200; GFX10-NEXT:    s_mov_b32 s3, s5
201; GFX10-NEXT:    s_mov_b32 s4, s6
202; GFX10-NEXT:    s_mov_b32 s5, s7
203; GFX10-NEXT:    s_mov_b32 s6, s8
204; GFX10-NEXT:    s_mov_b32 s7, s9
205; GFX10-NEXT:    ; implicit-def: $vcc_hi
206; GFX10-NEXT:    image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
207; GFX10-NEXT:    s_waitcnt vmcnt(0)
208; GFX10-NEXT:    ; return to shader part epilog
209main_body:
210  %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
211  %out = bitcast i32 %v to float
212  ret float %out
213}
214
215define amdgpu_ps float @atomic_umax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
216; GFX9-LABEL: atomic_umax_i32_1d:
217; GFX9:       ; %bb.0: ; %main_body
218; GFX9-NEXT:    s_mov_b32 s0, s2
219; GFX9-NEXT:    s_mov_b32 s1, s3
220; GFX9-NEXT:    s_mov_b32 s2, s4
221; GFX9-NEXT:    s_mov_b32 s3, s5
222; GFX9-NEXT:    s_mov_b32 s4, s6
223; GFX9-NEXT:    s_mov_b32 s5, s7
224; GFX9-NEXT:    s_mov_b32 s6, s8
225; GFX9-NEXT:    s_mov_b32 s7, s9
226; GFX9-NEXT:    image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc a16
227; GFX9-NEXT:    s_waitcnt vmcnt(0)
228; GFX9-NEXT:    ; return to shader part epilog
229;
230; GFX10-LABEL: atomic_umax_i32_1d:
231; GFX10:       ; %bb.0: ; %main_body
232; GFX10-NEXT:    s_mov_b32 s0, s2
233; GFX10-NEXT:    s_mov_b32 s1, s3
234; GFX10-NEXT:    s_mov_b32 s2, s4
235; GFX10-NEXT:    s_mov_b32 s3, s5
236; GFX10-NEXT:    s_mov_b32 s4, s6
237; GFX10-NEXT:    s_mov_b32 s5, s7
238; GFX10-NEXT:    s_mov_b32 s6, s8
239; GFX10-NEXT:    s_mov_b32 s7, s9
240; GFX10-NEXT:    ; implicit-def: $vcc_hi
241; GFX10-NEXT:    image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
242; GFX10-NEXT:    s_waitcnt vmcnt(0)
243; GFX10-NEXT:    ; return to shader part epilog
244main_body:
245  %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
246  %out = bitcast i32 %v to float
247  ret float %out
248}
249
250define amdgpu_ps float @atomic_and_i321d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
251; GFX9-LABEL: atomic_and_i321d:
252; GFX9:       ; %bb.0: ; %main_body
253; GFX9-NEXT:    s_mov_b32 s0, s2
254; GFX9-NEXT:    s_mov_b32 s1, s3
255; GFX9-NEXT:    s_mov_b32 s2, s4
256; GFX9-NEXT:    s_mov_b32 s3, s5
257; GFX9-NEXT:    s_mov_b32 s4, s6
258; GFX9-NEXT:    s_mov_b32 s5, s7
259; GFX9-NEXT:    s_mov_b32 s6, s8
260; GFX9-NEXT:    s_mov_b32 s7, s9
261; GFX9-NEXT:    image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc a16
262; GFX9-NEXT:    s_waitcnt vmcnt(0)
263; GFX9-NEXT:    ; return to shader part epilog
264;
265; GFX10-LABEL: atomic_and_i321d:
266; GFX10:       ; %bb.0: ; %main_body
267; GFX10-NEXT:    s_mov_b32 s0, s2
268; GFX10-NEXT:    s_mov_b32 s1, s3
269; GFX10-NEXT:    s_mov_b32 s2, s4
270; GFX10-NEXT:    s_mov_b32 s3, s5
271; GFX10-NEXT:    s_mov_b32 s4, s6
272; GFX10-NEXT:    s_mov_b32 s5, s7
273; GFX10-NEXT:    s_mov_b32 s6, s8
274; GFX10-NEXT:    s_mov_b32 s7, s9
275; GFX10-NEXT:    ; implicit-def: $vcc_hi
276; GFX10-NEXT:    image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
277; GFX10-NEXT:    s_waitcnt vmcnt(0)
278; GFX10-NEXT:    ; return to shader part epilog
279main_body:
280  %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
281  %out = bitcast i32 %v to float
282  ret float %out
283}
284
285define amdgpu_ps float @atomic_or_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
286; GFX9-LABEL: atomic_or_i32_1d:
287; GFX9:       ; %bb.0: ; %main_body
288; GFX9-NEXT:    s_mov_b32 s0, s2
289; GFX9-NEXT:    s_mov_b32 s1, s3
290; GFX9-NEXT:    s_mov_b32 s2, s4
291; GFX9-NEXT:    s_mov_b32 s3, s5
292; GFX9-NEXT:    s_mov_b32 s4, s6
293; GFX9-NEXT:    s_mov_b32 s5, s7
294; GFX9-NEXT:    s_mov_b32 s6, s8
295; GFX9-NEXT:    s_mov_b32 s7, s9
296; GFX9-NEXT:    image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc a16
297; GFX9-NEXT:    s_waitcnt vmcnt(0)
298; GFX9-NEXT:    ; return to shader part epilog
299;
300; GFX10-LABEL: atomic_or_i32_1d:
301; GFX10:       ; %bb.0: ; %main_body
302; GFX10-NEXT:    s_mov_b32 s0, s2
303; GFX10-NEXT:    s_mov_b32 s1, s3
304; GFX10-NEXT:    s_mov_b32 s2, s4
305; GFX10-NEXT:    s_mov_b32 s3, s5
306; GFX10-NEXT:    s_mov_b32 s4, s6
307; GFX10-NEXT:    s_mov_b32 s5, s7
308; GFX10-NEXT:    s_mov_b32 s6, s8
309; GFX10-NEXT:    s_mov_b32 s7, s9
310; GFX10-NEXT:    ; implicit-def: $vcc_hi
311; GFX10-NEXT:    image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
312; GFX10-NEXT:    s_waitcnt vmcnt(0)
313; GFX10-NEXT:    ; return to shader part epilog
314main_body:
315  %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
316  %out = bitcast i32 %v to float
317  ret float %out
318}
319
320define amdgpu_ps float @atomic_xor_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
321; GFX9-LABEL: atomic_xor_i32_1d:
322; GFX9:       ; %bb.0: ; %main_body
323; GFX9-NEXT:    s_mov_b32 s0, s2
324; GFX9-NEXT:    s_mov_b32 s1, s3
325; GFX9-NEXT:    s_mov_b32 s2, s4
326; GFX9-NEXT:    s_mov_b32 s3, s5
327; GFX9-NEXT:    s_mov_b32 s4, s6
328; GFX9-NEXT:    s_mov_b32 s5, s7
329; GFX9-NEXT:    s_mov_b32 s6, s8
330; GFX9-NEXT:    s_mov_b32 s7, s9
331; GFX9-NEXT:    image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc a16
332; GFX9-NEXT:    s_waitcnt vmcnt(0)
333; GFX9-NEXT:    ; return to shader part epilog
334;
335; GFX10-LABEL: atomic_xor_i32_1d:
336; GFX10:       ; %bb.0: ; %main_body
337; GFX10-NEXT:    s_mov_b32 s0, s2
338; GFX10-NEXT:    s_mov_b32 s1, s3
339; GFX10-NEXT:    s_mov_b32 s2, s4
340; GFX10-NEXT:    s_mov_b32 s3, s5
341; GFX10-NEXT:    s_mov_b32 s4, s6
342; GFX10-NEXT:    s_mov_b32 s5, s7
343; GFX10-NEXT:    s_mov_b32 s6, s8
344; GFX10-NEXT:    s_mov_b32 s7, s9
345; GFX10-NEXT:    ; implicit-def: $vcc_hi
346; GFX10-NEXT:    image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
347; GFX10-NEXT:    s_waitcnt vmcnt(0)
348; GFX10-NEXT:    ; return to shader part epilog
349main_body:
350  %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
351  %out = bitcast i32 %v to float
352  ret float %out
353}
354
355define amdgpu_ps float @atomic_inc_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
356; GFX9-LABEL: atomic_inc_i32_1d:
357; GFX9:       ; %bb.0: ; %main_body
358; GFX9-NEXT:    s_mov_b32 s0, s2
359; GFX9-NEXT:    s_mov_b32 s1, s3
360; GFX9-NEXT:    s_mov_b32 s2, s4
361; GFX9-NEXT:    s_mov_b32 s3, s5
362; GFX9-NEXT:    s_mov_b32 s4, s6
363; GFX9-NEXT:    s_mov_b32 s5, s7
364; GFX9-NEXT:    s_mov_b32 s6, s8
365; GFX9-NEXT:    s_mov_b32 s7, s9
366; GFX9-NEXT:    image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc a16
367; GFX9-NEXT:    s_waitcnt vmcnt(0)
368; GFX9-NEXT:    ; return to shader part epilog
369;
370; GFX10-LABEL: atomic_inc_i32_1d:
371; GFX10:       ; %bb.0: ; %main_body
372; GFX10-NEXT:    s_mov_b32 s0, s2
373; GFX10-NEXT:    s_mov_b32 s1, s3
374; GFX10-NEXT:    s_mov_b32 s2, s4
375; GFX10-NEXT:    s_mov_b32 s3, s5
376; GFX10-NEXT:    s_mov_b32 s4, s6
377; GFX10-NEXT:    s_mov_b32 s5, s7
378; GFX10-NEXT:    s_mov_b32 s6, s8
379; GFX10-NEXT:    s_mov_b32 s7, s9
380; GFX10-NEXT:    ; implicit-def: $vcc_hi
381; GFX10-NEXT:    image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
382; GFX10-NEXT:    s_waitcnt vmcnt(0)
383; GFX10-NEXT:    ; return to shader part epilog
384main_body:
385  %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
386  %out = bitcast i32 %v to float
387  ret float %out
388}
389
390define amdgpu_ps float @atomic_dec_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
391; GFX9-LABEL: atomic_dec_i32_1d:
392; GFX9:       ; %bb.0: ; %main_body
393; GFX9-NEXT:    s_mov_b32 s0, s2
394; GFX9-NEXT:    s_mov_b32 s1, s3
395; GFX9-NEXT:    s_mov_b32 s2, s4
396; GFX9-NEXT:    s_mov_b32 s3, s5
397; GFX9-NEXT:    s_mov_b32 s4, s6
398; GFX9-NEXT:    s_mov_b32 s5, s7
399; GFX9-NEXT:    s_mov_b32 s6, s8
400; GFX9-NEXT:    s_mov_b32 s7, s9
401; GFX9-NEXT:    image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc a16
402; GFX9-NEXT:    s_waitcnt vmcnt(0)
403; GFX9-NEXT:    ; return to shader part epilog
404;
405; GFX10-LABEL: atomic_dec_i32_1d:
406; GFX10:       ; %bb.0: ; %main_body
407; GFX10-NEXT:    s_mov_b32 s0, s2
408; GFX10-NEXT:    s_mov_b32 s1, s3
409; GFX10-NEXT:    s_mov_b32 s2, s4
410; GFX10-NEXT:    s_mov_b32 s3, s5
411; GFX10-NEXT:    s_mov_b32 s4, s6
412; GFX10-NEXT:    s_mov_b32 s5, s7
413; GFX10-NEXT:    s_mov_b32 s6, s8
414; GFX10-NEXT:    s_mov_b32 s7, s9
415; GFX10-NEXT:    ; implicit-def: $vcc_hi
416; GFX10-NEXT:    image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16
417; GFX10-NEXT:    s_waitcnt vmcnt(0)
418; GFX10-NEXT:    ; return to shader part epilog
419main_body:
420  %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
421  %out = bitcast i32 %v to float
422  ret float %out
423}
424
425define amdgpu_ps float @atomic_cmpswap_i32_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s) {
426; GFX9-LABEL: atomic_cmpswap_i32_1d:
427; GFX9:       ; %bb.0: ; %main_body
428; GFX9-NEXT:    s_mov_b32 s0, s2
429; GFX9-NEXT:    s_mov_b32 s1, s3
430; GFX9-NEXT:    s_mov_b32 s2, s4
431; GFX9-NEXT:    s_mov_b32 s3, s5
432; GFX9-NEXT:    s_mov_b32 s4, s6
433; GFX9-NEXT:    s_mov_b32 s5, s7
434; GFX9-NEXT:    s_mov_b32 s6, s8
435; GFX9-NEXT:    s_mov_b32 s7, s9
436; GFX9-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
437; GFX9-NEXT:    s_waitcnt vmcnt(0)
438; GFX9-NEXT:    ; return to shader part epilog
439;
440; GFX10-LABEL: atomic_cmpswap_i32_1d:
441; GFX10:       ; %bb.0: ; %main_body
442; GFX10-NEXT:    s_mov_b32 s0, s2
443; GFX10-NEXT:    s_mov_b32 s1, s3
444; GFX10-NEXT:    s_mov_b32 s2, s4
445; GFX10-NEXT:    s_mov_b32 s3, s5
446; GFX10-NEXT:    s_mov_b32 s4, s6
447; GFX10-NEXT:    s_mov_b32 s5, s7
448; GFX10-NEXT:    s_mov_b32 s6, s8
449; GFX10-NEXT:    s_mov_b32 s7, s9
450; GFX10-NEXT:    ; implicit-def: $vcc_hi
451; GFX10-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
452; GFX10-NEXT:    s_waitcnt vmcnt(0)
453; GFX10-NEXT:    ; return to shader part epilog
454main_body:
455  %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32 %cmp, i32 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
456  %out = bitcast i32 %v to float
457  ret float %out
458}
459
460define amdgpu_ps float @atomic_add_i32_2d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t) {
461; GFX9-LABEL: atomic_add_i32_2d:
462; GFX9:       ; %bb.0: ; %main_body
463; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
464; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
465; GFX9-NEXT:    s_mov_b32 s0, s2
466; GFX9-NEXT:    s_mov_b32 s1, s3
467; GFX9-NEXT:    s_mov_b32 s2, s4
468; GFX9-NEXT:    s_mov_b32 s3, s5
469; GFX9-NEXT:    s_mov_b32 s4, s6
470; GFX9-NEXT:    s_mov_b32 s5, s7
471; GFX9-NEXT:    s_mov_b32 s6, s8
472; GFX9-NEXT:    s_mov_b32 s7, s9
473; GFX9-NEXT:    v_and_or_b32 v1, v1, v3, v2
474; GFX9-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16
475; GFX9-NEXT:    s_waitcnt vmcnt(0)
476; GFX9-NEXT:    ; return to shader part epilog
477;
478; GFX10-LABEL: atomic_add_i32_2d:
479; GFX10:       ; %bb.0: ; %main_body
480; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
481; GFX10-NEXT:    s_mov_b32 s0, s2
482; GFX10-NEXT:    s_mov_b32 s1, s3
483; GFX10-NEXT:    s_mov_b32 s2, s4
484; GFX10-NEXT:    s_mov_b32 s3, s5
485; GFX10-NEXT:    v_and_or_b32 v1, v1, 0xffff, v2
486; GFX10-NEXT:    s_mov_b32 s4, s6
487; GFX10-NEXT:    s_mov_b32 s5, s7
488; GFX10-NEXT:    s_mov_b32 s6, s8
489; GFX10-NEXT:    s_mov_b32 s7, s9
490; GFX10-NEXT:    ; implicit-def: $vcc_hi
491; GFX10-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm glc a16
492; GFX10-NEXT:    s_waitcnt vmcnt(0)
493; GFX10-NEXT:    ; return to shader part epilog
494main_body:
495  %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
496  %out = bitcast i32 %v to float
497  ret float %out
498}
499
500define amdgpu_ps float @atomic_add_i32_3d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %r) {
501; GFX9-LABEL: atomic_add_i32_3d:
502; GFX9:       ; %bb.0: ; %main_body
503; GFX9-NEXT:    s_mov_b32 s0, s2
504; GFX9-NEXT:    s_mov_b32 s2, s4
505; GFX9-NEXT:    s_mov_b32 s4, s6
506; GFX9-NEXT:    s_mov_b32 s6, s8
507; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
508; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
509; GFX9-NEXT:    s_lshl_b32 s8, s0, 16
510; GFX9-NEXT:    s_mov_b32 s1, s3
511; GFX9-NEXT:    s_mov_b32 s3, s5
512; GFX9-NEXT:    s_mov_b32 s5, s7
513; GFX9-NEXT:    v_and_or_b32 v1, v1, v4, v2
514; GFX9-NEXT:    s_mov_b32 s7, s9
515; GFX9-NEXT:    v_and_or_b32 v2, v3, v4, s8
516; GFX9-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16
517; GFX9-NEXT:    s_waitcnt vmcnt(0)
518; GFX9-NEXT:    ; return to shader part epilog
519;
520; GFX10-LABEL: atomic_add_i32_3d:
521; GFX10:       ; %bb.0: ; %main_body
522; GFX10-NEXT:    v_mov_b32_e32 v4, 0xffff
523; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
524; GFX10-NEXT:    s_mov_b32 s0, s2
525; GFX10-NEXT:    s_mov_b32 s2, s4
526; GFX10-NEXT:    s_mov_b32 s4, s6
527; GFX10-NEXT:    s_mov_b32 s6, s8
528; GFX10-NEXT:    s_lshl_b32 s8, s0, 16
529; GFX10-NEXT:    v_and_or_b32 v1, v1, v4, v2
530; GFX10-NEXT:    v_and_or_b32 v2, v3, v4, s8
531; GFX10-NEXT:    s_mov_b32 s1, s3
532; GFX10-NEXT:    s_mov_b32 s3, s5
533; GFX10-NEXT:    s_mov_b32 s5, s7
534; GFX10-NEXT:    s_mov_b32 s7, s9
535; GFX10-NEXT:    ; implicit-def: $vcc_hi
536; GFX10-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc a16
537; GFX10-NEXT:    s_waitcnt vmcnt(0)
538; GFX10-NEXT:    ; return to shader part epilog
539main_body:
540  %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
541  %out = bitcast i32 %v to float
542  ret float %out
543}
544
545define amdgpu_ps float @atomic_add_i32_cube(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %face) {
546; GFX9-LABEL: atomic_add_i32_cube:
547; GFX9:       ; %bb.0: ; %main_body
548; GFX9-NEXT:    s_mov_b32 s0, s2
549; GFX9-NEXT:    s_mov_b32 s2, s4
550; GFX9-NEXT:    s_mov_b32 s4, s6
551; GFX9-NEXT:    s_mov_b32 s6, s8
552; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
553; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
554; GFX9-NEXT:    s_lshl_b32 s8, s0, 16
555; GFX9-NEXT:    s_mov_b32 s1, s3
556; GFX9-NEXT:    s_mov_b32 s3, s5
557; GFX9-NEXT:    s_mov_b32 s5, s7
558; GFX9-NEXT:    v_and_or_b32 v1, v1, v4, v2
559; GFX9-NEXT:    s_mov_b32 s7, s9
560; GFX9-NEXT:    v_and_or_b32 v2, v3, v4, s8
561; GFX9-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da
562; GFX9-NEXT:    s_waitcnt vmcnt(0)
563; GFX9-NEXT:    ; return to shader part epilog
564;
565; GFX10-LABEL: atomic_add_i32_cube:
566; GFX10:       ; %bb.0: ; %main_body
567; GFX10-NEXT:    v_mov_b32_e32 v4, 0xffff
568; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
569; GFX10-NEXT:    s_mov_b32 s0, s2
570; GFX10-NEXT:    s_mov_b32 s2, s4
571; GFX10-NEXT:    s_mov_b32 s4, s6
572; GFX10-NEXT:    s_mov_b32 s6, s8
573; GFX10-NEXT:    s_lshl_b32 s8, s0, 16
574; GFX10-NEXT:    v_and_or_b32 v1, v1, v4, v2
575; GFX10-NEXT:    v_and_or_b32 v2, v3, v4, s8
576; GFX10-NEXT:    s_mov_b32 s1, s3
577; GFX10-NEXT:    s_mov_b32 s3, s5
578; GFX10-NEXT:    s_mov_b32 s5, s7
579; GFX10-NEXT:    s_mov_b32 s7, s9
580; GFX10-NEXT:    ; implicit-def: $vcc_hi
581; GFX10-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc a16
582; GFX10-NEXT:    s_waitcnt vmcnt(0)
583; GFX10-NEXT:    ; return to shader part epilog
584main_body:
585  %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32 %data, i16 %s, i16 %t, i16 %face, <8 x i32> %rsrc, i32 0, i32 0)
586  %out = bitcast i32 %v to float
587  ret float %out
588}
589
590define amdgpu_ps float @atomic_add_i32_1darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %slice) {
591; GFX9-LABEL: atomic_add_i32_1darray:
592; GFX9:       ; %bb.0: ; %main_body
593; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
594; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
595; GFX9-NEXT:    s_mov_b32 s0, s2
596; GFX9-NEXT:    s_mov_b32 s1, s3
597; GFX9-NEXT:    s_mov_b32 s2, s4
598; GFX9-NEXT:    s_mov_b32 s3, s5
599; GFX9-NEXT:    s_mov_b32 s4, s6
600; GFX9-NEXT:    s_mov_b32 s5, s7
601; GFX9-NEXT:    s_mov_b32 s6, s8
602; GFX9-NEXT:    s_mov_b32 s7, s9
603; GFX9-NEXT:    v_and_or_b32 v1, v1, v3, v2
604; GFX9-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16 da
605; GFX9-NEXT:    s_waitcnt vmcnt(0)
606; GFX9-NEXT:    ; return to shader part epilog
607;
608; GFX10-LABEL: atomic_add_i32_1darray:
609; GFX10:       ; %bb.0: ; %main_body
610; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
611; GFX10-NEXT:    s_mov_b32 s0, s2
612; GFX10-NEXT:    s_mov_b32 s1, s3
613; GFX10-NEXT:    s_mov_b32 s2, s4
614; GFX10-NEXT:    s_mov_b32 s3, s5
615; GFX10-NEXT:    v_and_or_b32 v1, v1, 0xffff, v2
616; GFX10-NEXT:    s_mov_b32 s4, s6
617; GFX10-NEXT:    s_mov_b32 s5, s7
618; GFX10-NEXT:    s_mov_b32 s6, s8
619; GFX10-NEXT:    s_mov_b32 s7, s9
620; GFX10-NEXT:    ; implicit-def: $vcc_hi
621; GFX10-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc a16
622; GFX10-NEXT:    s_waitcnt vmcnt(0)
623; GFX10-NEXT:    ; return to shader part epilog
624main_body:
625  %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
626  %out = bitcast i32 %v to float
627  ret float %out
628}
629
630define amdgpu_ps float @atomic_add_i32_2darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice) {
631; GFX9-LABEL: atomic_add_i32_2darray:
632; GFX9:       ; %bb.0: ; %main_body
633; GFX9-NEXT:    s_mov_b32 s0, s2
634; GFX9-NEXT:    s_mov_b32 s2, s4
635; GFX9-NEXT:    s_mov_b32 s4, s6
636; GFX9-NEXT:    s_mov_b32 s6, s8
637; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
638; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
639; GFX9-NEXT:    s_lshl_b32 s8, s0, 16
640; GFX9-NEXT:    s_mov_b32 s1, s3
641; GFX9-NEXT:    s_mov_b32 s3, s5
642; GFX9-NEXT:    s_mov_b32 s5, s7
643; GFX9-NEXT:    v_and_or_b32 v1, v1, v4, v2
644; GFX9-NEXT:    s_mov_b32 s7, s9
645; GFX9-NEXT:    v_and_or_b32 v2, v3, v4, s8
646; GFX9-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da
647; GFX9-NEXT:    s_waitcnt vmcnt(0)
648; GFX9-NEXT:    ; return to shader part epilog
649;
650; GFX10-LABEL: atomic_add_i32_2darray:
651; GFX10:       ; %bb.0: ; %main_body
652; GFX10-NEXT:    v_mov_b32_e32 v4, 0xffff
653; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
654; GFX10-NEXT:    s_mov_b32 s0, s2
655; GFX10-NEXT:    s_mov_b32 s2, s4
656; GFX10-NEXT:    s_mov_b32 s4, s6
657; GFX10-NEXT:    s_mov_b32 s6, s8
658; GFX10-NEXT:    s_lshl_b32 s8, s0, 16
659; GFX10-NEXT:    v_and_or_b32 v1, v1, v4, v2
660; GFX10-NEXT:    v_and_or_b32 v2, v3, v4, s8
661; GFX10-NEXT:    s_mov_b32 s1, s3
662; GFX10-NEXT:    s_mov_b32 s3, s5
663; GFX10-NEXT:    s_mov_b32 s5, s7
664; GFX10-NEXT:    s_mov_b32 s7, s9
665; GFX10-NEXT:    ; implicit-def: $vcc_hi
666; GFX10-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc a16
667; GFX10-NEXT:    s_waitcnt vmcnt(0)
668; GFX10-NEXT:    ; return to shader part epilog
669main_body:
670  %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
671  %out = bitcast i32 %v to float
672  ret float %out
673}
674
675define amdgpu_ps float @atomic_add_i32_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %fragid) {
676; GFX9-LABEL: atomic_add_i32_2dmsaa:
677; GFX9:       ; %bb.0: ; %main_body
678; GFX9-NEXT:    s_mov_b32 s0, s2
679; GFX9-NEXT:    s_mov_b32 s2, s4
680; GFX9-NEXT:    s_mov_b32 s4, s6
681; GFX9-NEXT:    s_mov_b32 s6, s8
682; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
683; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
684; GFX9-NEXT:    s_lshl_b32 s8, s0, 16
685; GFX9-NEXT:    s_mov_b32 s1, s3
686; GFX9-NEXT:    s_mov_b32 s3, s5
687; GFX9-NEXT:    s_mov_b32 s5, s7
688; GFX9-NEXT:    v_and_or_b32 v1, v1, v4, v2
689; GFX9-NEXT:    s_mov_b32 s7, s9
690; GFX9-NEXT:    v_and_or_b32 v2, v3, v4, s8
691; GFX9-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16
692; GFX9-NEXT:    s_waitcnt vmcnt(0)
693; GFX9-NEXT:    ; return to shader part epilog
694;
695; GFX10-LABEL: atomic_add_i32_2dmsaa:
696; GFX10:       ; %bb.0: ; %main_body
697; GFX10-NEXT:    v_mov_b32_e32 v4, 0xffff
698; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
699; GFX10-NEXT:    s_mov_b32 s0, s2
700; GFX10-NEXT:    s_mov_b32 s2, s4
701; GFX10-NEXT:    s_mov_b32 s4, s6
702; GFX10-NEXT:    s_mov_b32 s6, s8
703; GFX10-NEXT:    s_lshl_b32 s8, s0, 16
704; GFX10-NEXT:    v_and_or_b32 v1, v1, v4, v2
705; GFX10-NEXT:    v_and_or_b32 v2, v3, v4, s8
706; GFX10-NEXT:    s_mov_b32 s1, s3
707; GFX10-NEXT:    s_mov_b32 s3, s5
708; GFX10-NEXT:    s_mov_b32 s5, s7
709; GFX10-NEXT:    s_mov_b32 s7, s9
710; GFX10-NEXT:    ; implicit-def: $vcc_hi
711; GFX10-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc a16
712; GFX10-NEXT:    s_waitcnt vmcnt(0)
713; GFX10-NEXT:    ; return to shader part epilog
714main_body:
715  %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
716  %out = bitcast i32 %v to float
717  ret float %out
718}
719
720define amdgpu_ps float @atomic_add_i32_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
721; GFX9-LABEL: atomic_add_i32_2darraymsaa:
722; GFX9:       ; %bb.0: ; %main_body
723; GFX9-NEXT:    v_mov_b32_e32 v5, 0xffff
724; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
725; GFX9-NEXT:    v_and_or_b32 v1, v1, v5, v2
726; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
727; GFX9-NEXT:    s_mov_b32 s0, s2
728; GFX9-NEXT:    s_mov_b32 s1, s3
729; GFX9-NEXT:    s_mov_b32 s2, s4
730; GFX9-NEXT:    s_mov_b32 s3, s5
731; GFX9-NEXT:    s_mov_b32 s4, s6
732; GFX9-NEXT:    s_mov_b32 s5, s7
733; GFX9-NEXT:    s_mov_b32 s6, s8
734; GFX9-NEXT:    s_mov_b32 s7, s9
735; GFX9-NEXT:    v_and_or_b32 v2, v3, v5, v2
736; GFX9-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da
737; GFX9-NEXT:    s_waitcnt vmcnt(0)
738; GFX9-NEXT:    ; return to shader part epilog
739;
740; GFX10-LABEL: atomic_add_i32_2darraymsaa:
741; GFX10:       ; %bb.0: ; %main_body
742; GFX10-NEXT:    v_mov_b32_e32 v5, 0xffff
743; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
744; GFX10-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
745; GFX10-NEXT:    s_mov_b32 s0, s2
746; GFX10-NEXT:    s_mov_b32 s1, s3
747; GFX10-NEXT:    s_mov_b32 s2, s4
748; GFX10-NEXT:    v_and_or_b32 v1, v1, v5, v2
749; GFX10-NEXT:    v_and_or_b32 v2, v3, v5, v4
750; GFX10-NEXT:    s_mov_b32 s3, s5
751; GFX10-NEXT:    s_mov_b32 s4, s6
752; GFX10-NEXT:    s_mov_b32 s5, s7
753; GFX10-NEXT:    s_mov_b32 s6, s8
754; GFX10-NEXT:    s_mov_b32 s7, s9
755; GFX10-NEXT:    ; implicit-def: $vcc_hi
756; GFX10-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc a16
757; GFX10-NEXT:    s_waitcnt vmcnt(0)
758; GFX10-NEXT:    ; return to shader part epilog
759main_body:
760  %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
761  %out = bitcast i32 %v to float
762  ret float %out
763}
764
765define amdgpu_ps float @atomic_add_i32_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i16 %s) {
766; GFX9-LABEL: atomic_add_i32_1d_slc:
767; GFX9:       ; %bb.0: ; %main_body
768; GFX9-NEXT:    s_mov_b32 s0, s2
769; GFX9-NEXT:    s_mov_b32 s1, s3
770; GFX9-NEXT:    s_mov_b32 s2, s4
771; GFX9-NEXT:    s_mov_b32 s3, s5
772; GFX9-NEXT:    s_mov_b32 s4, s6
773; GFX9-NEXT:    s_mov_b32 s5, s7
774; GFX9-NEXT:    s_mov_b32 s6, s8
775; GFX9-NEXT:    s_mov_b32 s7, s9
776; GFX9-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc a16
777; GFX9-NEXT:    s_waitcnt vmcnt(0)
778; GFX9-NEXT:    ; return to shader part epilog
779;
780; GFX10-LABEL: atomic_add_i32_1d_slc:
781; GFX10:       ; %bb.0: ; %main_body
782; GFX10-NEXT:    s_mov_b32 s0, s2
783; GFX10-NEXT:    s_mov_b32 s1, s3
784; GFX10-NEXT:    s_mov_b32 s2, s4
785; GFX10-NEXT:    s_mov_b32 s3, s5
786; GFX10-NEXT:    s_mov_b32 s4, s6
787; GFX10-NEXT:    s_mov_b32 s5, s7
788; GFX10-NEXT:    s_mov_b32 s6, s8
789; GFX10-NEXT:    s_mov_b32 s7, s9
790; GFX10-NEXT:    ; implicit-def: $vcc_hi
791; GFX10-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc slc a16
792; GFX10-NEXT:    s_waitcnt vmcnt(0)
793; GFX10-NEXT:    ; return to shader part epilog
794main_body:
795  %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
796  %out = bitcast i32 %v to float
797  ret float %out
798}
799
800define amdgpu_ps <2 x float> @atomic_swap_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
801; GFX9-LABEL: atomic_swap_i64_1d:
802; GFX9:       ; %bb.0: ; %main_body
803; GFX9-NEXT:    s_mov_b32 s0, s2
804; GFX9-NEXT:    s_mov_b32 s1, s3
805; GFX9-NEXT:    s_mov_b32 s2, s4
806; GFX9-NEXT:    s_mov_b32 s3, s5
807; GFX9-NEXT:    s_mov_b32 s4, s6
808; GFX9-NEXT:    s_mov_b32 s5, s7
809; GFX9-NEXT:    s_mov_b32 s6, s8
810; GFX9-NEXT:    s_mov_b32 s7, s9
811; GFX9-NEXT:    image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
812; GFX9-NEXT:    s_waitcnt vmcnt(0)
813; GFX9-NEXT:    ; return to shader part epilog
814;
815; GFX10-LABEL: atomic_swap_i64_1d:
816; GFX10:       ; %bb.0: ; %main_body
817; GFX10-NEXT:    s_mov_b32 s0, s2
818; GFX10-NEXT:    s_mov_b32 s1, s3
819; GFX10-NEXT:    s_mov_b32 s2, s4
820; GFX10-NEXT:    s_mov_b32 s3, s5
821; GFX10-NEXT:    s_mov_b32 s4, s6
822; GFX10-NEXT:    s_mov_b32 s5, s7
823; GFX10-NEXT:    s_mov_b32 s6, s8
824; GFX10-NEXT:    s_mov_b32 s7, s9
825; GFX10-NEXT:    ; implicit-def: $vcc_hi
826; GFX10-NEXT:    image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
827; GFX10-NEXT:    s_waitcnt vmcnt(0)
828; GFX10-NEXT:    ; return to shader part epilog
829main_body:
830  %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
831  %out = bitcast i64 %v to <2 x float>
832  ret <2 x float> %out
833}
834
835define amdgpu_ps <2 x float> @atomic_add_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
836; GFX9-LABEL: atomic_add_i64_1d:
837; GFX9:       ; %bb.0: ; %main_body
838; GFX9-NEXT:    s_mov_b32 s0, s2
839; GFX9-NEXT:    s_mov_b32 s1, s3
840; GFX9-NEXT:    s_mov_b32 s2, s4
841; GFX9-NEXT:    s_mov_b32 s3, s5
842; GFX9-NEXT:    s_mov_b32 s4, s6
843; GFX9-NEXT:    s_mov_b32 s5, s7
844; GFX9-NEXT:    s_mov_b32 s6, s8
845; GFX9-NEXT:    s_mov_b32 s7, s9
846; GFX9-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
847; GFX9-NEXT:    s_waitcnt vmcnt(0)
848; GFX9-NEXT:    ; return to shader part epilog
849;
850; GFX10-LABEL: atomic_add_i64_1d:
851; GFX10:       ; %bb.0: ; %main_body
852; GFX10-NEXT:    s_mov_b32 s0, s2
853; GFX10-NEXT:    s_mov_b32 s1, s3
854; GFX10-NEXT:    s_mov_b32 s2, s4
855; GFX10-NEXT:    s_mov_b32 s3, s5
856; GFX10-NEXT:    s_mov_b32 s4, s6
857; GFX10-NEXT:    s_mov_b32 s5, s7
858; GFX10-NEXT:    s_mov_b32 s6, s8
859; GFX10-NEXT:    s_mov_b32 s7, s9
860; GFX10-NEXT:    ; implicit-def: $vcc_hi
861; GFX10-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
862; GFX10-NEXT:    s_waitcnt vmcnt(0)
863; GFX10-NEXT:    ; return to shader part epilog
864main_body:
865  %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
866  %out = bitcast i64 %v to <2 x float>
867  ret <2 x float> %out
868}
869
870define amdgpu_ps <2 x float> @atomic_sub_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
871; GFX9-LABEL: atomic_sub_i64_1d:
872; GFX9:       ; %bb.0: ; %main_body
873; GFX9-NEXT:    s_mov_b32 s0, s2
874; GFX9-NEXT:    s_mov_b32 s1, s3
875; GFX9-NEXT:    s_mov_b32 s2, s4
876; GFX9-NEXT:    s_mov_b32 s3, s5
877; GFX9-NEXT:    s_mov_b32 s4, s6
878; GFX9-NEXT:    s_mov_b32 s5, s7
879; GFX9-NEXT:    s_mov_b32 s6, s8
880; GFX9-NEXT:    s_mov_b32 s7, s9
881; GFX9-NEXT:    image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
882; GFX9-NEXT:    s_waitcnt vmcnt(0)
883; GFX9-NEXT:    ; return to shader part epilog
884;
885; GFX10-LABEL: atomic_sub_i64_1d:
886; GFX10:       ; %bb.0: ; %main_body
887; GFX10-NEXT:    s_mov_b32 s0, s2
888; GFX10-NEXT:    s_mov_b32 s1, s3
889; GFX10-NEXT:    s_mov_b32 s2, s4
890; GFX10-NEXT:    s_mov_b32 s3, s5
891; GFX10-NEXT:    s_mov_b32 s4, s6
892; GFX10-NEXT:    s_mov_b32 s5, s7
893; GFX10-NEXT:    s_mov_b32 s6, s8
894; GFX10-NEXT:    s_mov_b32 s7, s9
895; GFX10-NEXT:    ; implicit-def: $vcc_hi
896; GFX10-NEXT:    image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
897; GFX10-NEXT:    s_waitcnt vmcnt(0)
898; GFX10-NEXT:    ; return to shader part epilog
899main_body:
900  %v = call i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
901  %out = bitcast i64 %v to <2 x float>
902  ret <2 x float> %out
903}
904
905define amdgpu_ps <2 x float> @atomic_smin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
906; GFX9-LABEL: atomic_smin_i64_1d:
907; GFX9:       ; %bb.0: ; %main_body
908; GFX9-NEXT:    s_mov_b32 s0, s2
909; GFX9-NEXT:    s_mov_b32 s1, s3
910; GFX9-NEXT:    s_mov_b32 s2, s4
911; GFX9-NEXT:    s_mov_b32 s3, s5
912; GFX9-NEXT:    s_mov_b32 s4, s6
913; GFX9-NEXT:    s_mov_b32 s5, s7
914; GFX9-NEXT:    s_mov_b32 s6, s8
915; GFX9-NEXT:    s_mov_b32 s7, s9
916; GFX9-NEXT:    image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
917; GFX9-NEXT:    s_waitcnt vmcnt(0)
918; GFX9-NEXT:    ; return to shader part epilog
919;
920; GFX10-LABEL: atomic_smin_i64_1d:
921; GFX10:       ; %bb.0: ; %main_body
922; GFX10-NEXT:    s_mov_b32 s0, s2
923; GFX10-NEXT:    s_mov_b32 s1, s3
924; GFX10-NEXT:    s_mov_b32 s2, s4
925; GFX10-NEXT:    s_mov_b32 s3, s5
926; GFX10-NEXT:    s_mov_b32 s4, s6
927; GFX10-NEXT:    s_mov_b32 s5, s7
928; GFX10-NEXT:    s_mov_b32 s6, s8
929; GFX10-NEXT:    s_mov_b32 s7, s9
930; GFX10-NEXT:    ; implicit-def: $vcc_hi
931; GFX10-NEXT:    image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
932; GFX10-NEXT:    s_waitcnt vmcnt(0)
933; GFX10-NEXT:    ; return to shader part epilog
934main_body:
935  %v = call i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
936  %out = bitcast i64 %v to <2 x float>
937  ret <2 x float> %out
938}
939
940define amdgpu_ps <2 x float> @atomic_umin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
941; GFX9-LABEL: atomic_umin_i64_1d:
942; GFX9:       ; %bb.0: ; %main_body
943; GFX9-NEXT:    s_mov_b32 s0, s2
944; GFX9-NEXT:    s_mov_b32 s1, s3
945; GFX9-NEXT:    s_mov_b32 s2, s4
946; GFX9-NEXT:    s_mov_b32 s3, s5
947; GFX9-NEXT:    s_mov_b32 s4, s6
948; GFX9-NEXT:    s_mov_b32 s5, s7
949; GFX9-NEXT:    s_mov_b32 s6, s8
950; GFX9-NEXT:    s_mov_b32 s7, s9
951; GFX9-NEXT:    image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
952; GFX9-NEXT:    s_waitcnt vmcnt(0)
953; GFX9-NEXT:    ; return to shader part epilog
954;
955; GFX10-LABEL: atomic_umin_i64_1d:
956; GFX10:       ; %bb.0: ; %main_body
957; GFX10-NEXT:    s_mov_b32 s0, s2
958; GFX10-NEXT:    s_mov_b32 s1, s3
959; GFX10-NEXT:    s_mov_b32 s2, s4
960; GFX10-NEXT:    s_mov_b32 s3, s5
961; GFX10-NEXT:    s_mov_b32 s4, s6
962; GFX10-NEXT:    s_mov_b32 s5, s7
963; GFX10-NEXT:    s_mov_b32 s6, s8
964; GFX10-NEXT:    s_mov_b32 s7, s9
965; GFX10-NEXT:    ; implicit-def: $vcc_hi
966; GFX10-NEXT:    image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
967; GFX10-NEXT:    s_waitcnt vmcnt(0)
968; GFX10-NEXT:    ; return to shader part epilog
969main_body:
970  %v = call i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
971  %out = bitcast i64 %v to <2 x float>
972  ret <2 x float> %out
973}
974
975define amdgpu_ps <2 x float> @atomic_smax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
976; GFX9-LABEL: atomic_smax_i64_1d:
977; GFX9:       ; %bb.0: ; %main_body
978; GFX9-NEXT:    s_mov_b32 s0, s2
979; GFX9-NEXT:    s_mov_b32 s1, s3
980; GFX9-NEXT:    s_mov_b32 s2, s4
981; GFX9-NEXT:    s_mov_b32 s3, s5
982; GFX9-NEXT:    s_mov_b32 s4, s6
983; GFX9-NEXT:    s_mov_b32 s5, s7
984; GFX9-NEXT:    s_mov_b32 s6, s8
985; GFX9-NEXT:    s_mov_b32 s7, s9
986; GFX9-NEXT:    image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
987; GFX9-NEXT:    s_waitcnt vmcnt(0)
988; GFX9-NEXT:    ; return to shader part epilog
989;
990; GFX10-LABEL: atomic_smax_i64_1d:
991; GFX10:       ; %bb.0: ; %main_body
992; GFX10-NEXT:    s_mov_b32 s0, s2
993; GFX10-NEXT:    s_mov_b32 s1, s3
994; GFX10-NEXT:    s_mov_b32 s2, s4
995; GFX10-NEXT:    s_mov_b32 s3, s5
996; GFX10-NEXT:    s_mov_b32 s4, s6
997; GFX10-NEXT:    s_mov_b32 s5, s7
998; GFX10-NEXT:    s_mov_b32 s6, s8
999; GFX10-NEXT:    s_mov_b32 s7, s9
1000; GFX10-NEXT:    ; implicit-def: $vcc_hi
1001; GFX10-NEXT:    image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
1002; GFX10-NEXT:    s_waitcnt vmcnt(0)
1003; GFX10-NEXT:    ; return to shader part epilog
1004main_body:
1005  %v = call i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1006  %out = bitcast i64 %v to <2 x float>
1007  ret <2 x float> %out
1008}
1009
1010define amdgpu_ps <2 x float> @atomic_umax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
1011; GFX9-LABEL: atomic_umax_i64_1d:
1012; GFX9:       ; %bb.0: ; %main_body
1013; GFX9-NEXT:    s_mov_b32 s0, s2
1014; GFX9-NEXT:    s_mov_b32 s1, s3
1015; GFX9-NEXT:    s_mov_b32 s2, s4
1016; GFX9-NEXT:    s_mov_b32 s3, s5
1017; GFX9-NEXT:    s_mov_b32 s4, s6
1018; GFX9-NEXT:    s_mov_b32 s5, s7
1019; GFX9-NEXT:    s_mov_b32 s6, s8
1020; GFX9-NEXT:    s_mov_b32 s7, s9
1021; GFX9-NEXT:    image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
1022; GFX9-NEXT:    s_waitcnt vmcnt(0)
1023; GFX9-NEXT:    ; return to shader part epilog
1024;
1025; GFX10-LABEL: atomic_umax_i64_1d:
1026; GFX10:       ; %bb.0: ; %main_body
1027; GFX10-NEXT:    s_mov_b32 s0, s2
1028; GFX10-NEXT:    s_mov_b32 s1, s3
1029; GFX10-NEXT:    s_mov_b32 s2, s4
1030; GFX10-NEXT:    s_mov_b32 s3, s5
1031; GFX10-NEXT:    s_mov_b32 s4, s6
1032; GFX10-NEXT:    s_mov_b32 s5, s7
1033; GFX10-NEXT:    s_mov_b32 s6, s8
1034; GFX10-NEXT:    s_mov_b32 s7, s9
1035; GFX10-NEXT:    ; implicit-def: $vcc_hi
1036; GFX10-NEXT:    image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
1037; GFX10-NEXT:    s_waitcnt vmcnt(0)
1038; GFX10-NEXT:    ; return to shader part epilog
1039main_body:
1040  %v = call i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1041  %out = bitcast i64 %v to <2 x float>
1042  ret <2 x float> %out
1043}
1044
1045define amdgpu_ps <2 x float> @atomic_and_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
1046; GFX9-LABEL: atomic_and_i64_1d:
1047; GFX9:       ; %bb.0: ; %main_body
1048; GFX9-NEXT:    s_mov_b32 s0, s2
1049; GFX9-NEXT:    s_mov_b32 s1, s3
1050; GFX9-NEXT:    s_mov_b32 s2, s4
1051; GFX9-NEXT:    s_mov_b32 s3, s5
1052; GFX9-NEXT:    s_mov_b32 s4, s6
1053; GFX9-NEXT:    s_mov_b32 s5, s7
1054; GFX9-NEXT:    s_mov_b32 s6, s8
1055; GFX9-NEXT:    s_mov_b32 s7, s9
1056; GFX9-NEXT:    image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
1057; GFX9-NEXT:    s_waitcnt vmcnt(0)
1058; GFX9-NEXT:    ; return to shader part epilog
1059;
1060; GFX10-LABEL: atomic_and_i64_1d:
1061; GFX10:       ; %bb.0: ; %main_body
1062; GFX10-NEXT:    s_mov_b32 s0, s2
1063; GFX10-NEXT:    s_mov_b32 s1, s3
1064; GFX10-NEXT:    s_mov_b32 s2, s4
1065; GFX10-NEXT:    s_mov_b32 s3, s5
1066; GFX10-NEXT:    s_mov_b32 s4, s6
1067; GFX10-NEXT:    s_mov_b32 s5, s7
1068; GFX10-NEXT:    s_mov_b32 s6, s8
1069; GFX10-NEXT:    s_mov_b32 s7, s9
1070; GFX10-NEXT:    ; implicit-def: $vcc_hi
1071; GFX10-NEXT:    image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
1072; GFX10-NEXT:    s_waitcnt vmcnt(0)
1073; GFX10-NEXT:    ; return to shader part epilog
1074main_body:
1075  %v = call i64 @llvm.amdgcn.image.atomic.and.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1076  %out = bitcast i64 %v to <2 x float>
1077  ret <2 x float> %out
1078}
1079
1080define amdgpu_ps <2 x float> @atomic_or_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
1081; GFX9-LABEL: atomic_or_i64_1d:
1082; GFX9:       ; %bb.0: ; %main_body
1083; GFX9-NEXT:    s_mov_b32 s0, s2
1084; GFX9-NEXT:    s_mov_b32 s1, s3
1085; GFX9-NEXT:    s_mov_b32 s2, s4
1086; GFX9-NEXT:    s_mov_b32 s3, s5
1087; GFX9-NEXT:    s_mov_b32 s4, s6
1088; GFX9-NEXT:    s_mov_b32 s5, s7
1089; GFX9-NEXT:    s_mov_b32 s6, s8
1090; GFX9-NEXT:    s_mov_b32 s7, s9
1091; GFX9-NEXT:    image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
1092; GFX9-NEXT:    s_waitcnt vmcnt(0)
1093; GFX9-NEXT:    ; return to shader part epilog
1094;
1095; GFX10-LABEL: atomic_or_i64_1d:
1096; GFX10:       ; %bb.0: ; %main_body
1097; GFX10-NEXT:    s_mov_b32 s0, s2
1098; GFX10-NEXT:    s_mov_b32 s1, s3
1099; GFX10-NEXT:    s_mov_b32 s2, s4
1100; GFX10-NEXT:    s_mov_b32 s3, s5
1101; GFX10-NEXT:    s_mov_b32 s4, s6
1102; GFX10-NEXT:    s_mov_b32 s5, s7
1103; GFX10-NEXT:    s_mov_b32 s6, s8
1104; GFX10-NEXT:    s_mov_b32 s7, s9
1105; GFX10-NEXT:    ; implicit-def: $vcc_hi
1106; GFX10-NEXT:    image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
1107; GFX10-NEXT:    s_waitcnt vmcnt(0)
1108; GFX10-NEXT:    ; return to shader part epilog
1109main_body:
1110  %v = call i64 @llvm.amdgcn.image.atomic.or.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1111  %out = bitcast i64 %v to <2 x float>
1112  ret <2 x float> %out
1113}
1114
1115define amdgpu_ps <2 x float> @atomic_xor_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
1116; GFX9-LABEL: atomic_xor_i64_1d:
1117; GFX9:       ; %bb.0: ; %main_body
1118; GFX9-NEXT:    s_mov_b32 s0, s2
1119; GFX9-NEXT:    s_mov_b32 s1, s3
1120; GFX9-NEXT:    s_mov_b32 s2, s4
1121; GFX9-NEXT:    s_mov_b32 s3, s5
1122; GFX9-NEXT:    s_mov_b32 s4, s6
1123; GFX9-NEXT:    s_mov_b32 s5, s7
1124; GFX9-NEXT:    s_mov_b32 s6, s8
1125; GFX9-NEXT:    s_mov_b32 s7, s9
1126; GFX9-NEXT:    image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
1127; GFX9-NEXT:    s_waitcnt vmcnt(0)
1128; GFX9-NEXT:    ; return to shader part epilog
1129;
1130; GFX10-LABEL: atomic_xor_i64_1d:
1131; GFX10:       ; %bb.0: ; %main_body
1132; GFX10-NEXT:    s_mov_b32 s0, s2
1133; GFX10-NEXT:    s_mov_b32 s1, s3
1134; GFX10-NEXT:    s_mov_b32 s2, s4
1135; GFX10-NEXT:    s_mov_b32 s3, s5
1136; GFX10-NEXT:    s_mov_b32 s4, s6
1137; GFX10-NEXT:    s_mov_b32 s5, s7
1138; GFX10-NEXT:    s_mov_b32 s6, s8
1139; GFX10-NEXT:    s_mov_b32 s7, s9
1140; GFX10-NEXT:    ; implicit-def: $vcc_hi
1141; GFX10-NEXT:    image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
1142; GFX10-NEXT:    s_waitcnt vmcnt(0)
1143; GFX10-NEXT:    ; return to shader part epilog
1144main_body:
1145  %v = call i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1146  %out = bitcast i64 %v to <2 x float>
1147  ret <2 x float> %out
1148}
1149
1150define amdgpu_ps <2 x float> @atomic_inc_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
1151; GFX9-LABEL: atomic_inc_i64_1d:
1152; GFX9:       ; %bb.0: ; %main_body
1153; GFX9-NEXT:    s_mov_b32 s0, s2
1154; GFX9-NEXT:    s_mov_b32 s1, s3
1155; GFX9-NEXT:    s_mov_b32 s2, s4
1156; GFX9-NEXT:    s_mov_b32 s3, s5
1157; GFX9-NEXT:    s_mov_b32 s4, s6
1158; GFX9-NEXT:    s_mov_b32 s5, s7
1159; GFX9-NEXT:    s_mov_b32 s6, s8
1160; GFX9-NEXT:    s_mov_b32 s7, s9
1161; GFX9-NEXT:    image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
1162; GFX9-NEXT:    s_waitcnt vmcnt(0)
1163; GFX9-NEXT:    ; return to shader part epilog
1164;
1165; GFX10-LABEL: atomic_inc_i64_1d:
1166; GFX10:       ; %bb.0: ; %main_body
1167; GFX10-NEXT:    s_mov_b32 s0, s2
1168; GFX10-NEXT:    s_mov_b32 s1, s3
1169; GFX10-NEXT:    s_mov_b32 s2, s4
1170; GFX10-NEXT:    s_mov_b32 s3, s5
1171; GFX10-NEXT:    s_mov_b32 s4, s6
1172; GFX10-NEXT:    s_mov_b32 s5, s7
1173; GFX10-NEXT:    s_mov_b32 s6, s8
1174; GFX10-NEXT:    s_mov_b32 s7, s9
1175; GFX10-NEXT:    ; implicit-def: $vcc_hi
1176; GFX10-NEXT:    image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
1177; GFX10-NEXT:    s_waitcnt vmcnt(0)
1178; GFX10-NEXT:    ; return to shader part epilog
1179main_body:
1180  %v = call i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1181  %out = bitcast i64 %v to <2 x float>
1182  ret <2 x float> %out
1183}
1184
1185define amdgpu_ps <2 x float> @atomic_dec_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
1186; GFX9-LABEL: atomic_dec_i64_1d:
1187; GFX9:       ; %bb.0: ; %main_body
1188; GFX9-NEXT:    s_mov_b32 s0, s2
1189; GFX9-NEXT:    s_mov_b32 s1, s3
1190; GFX9-NEXT:    s_mov_b32 s2, s4
1191; GFX9-NEXT:    s_mov_b32 s3, s5
1192; GFX9-NEXT:    s_mov_b32 s4, s6
1193; GFX9-NEXT:    s_mov_b32 s5, s7
1194; GFX9-NEXT:    s_mov_b32 s6, s8
1195; GFX9-NEXT:    s_mov_b32 s7, s9
1196; GFX9-NEXT:    image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
1197; GFX9-NEXT:    s_waitcnt vmcnt(0)
1198; GFX9-NEXT:    ; return to shader part epilog
1199;
1200; GFX10-LABEL: atomic_dec_i64_1d:
1201; GFX10:       ; %bb.0: ; %main_body
1202; GFX10-NEXT:    s_mov_b32 s0, s2
1203; GFX10-NEXT:    s_mov_b32 s1, s3
1204; GFX10-NEXT:    s_mov_b32 s2, s4
1205; GFX10-NEXT:    s_mov_b32 s3, s5
1206; GFX10-NEXT:    s_mov_b32 s4, s6
1207; GFX10-NEXT:    s_mov_b32 s5, s7
1208; GFX10-NEXT:    s_mov_b32 s6, s8
1209; GFX10-NEXT:    s_mov_b32 s7, s9
1210; GFX10-NEXT:    ; implicit-def: $vcc_hi
1211; GFX10-NEXT:    image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16
1212; GFX10-NEXT:    s_waitcnt vmcnt(0)
1213; GFX10-NEXT:    ; return to shader part epilog
1214main_body:
1215  %v = call i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1216  %out = bitcast i64 %v to <2 x float>
1217  ret <2 x float> %out
1218}
1219
1220define amdgpu_ps <2 x float> @atomic_cmpswap_i64_1d(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i16 %s) {
1221; GFX9-LABEL: atomic_cmpswap_i64_1d:
1222; GFX9:       ; %bb.0: ; %main_body
1223; GFX9-NEXT:    s_mov_b32 s0, s2
1224; GFX9-NEXT:    s_mov_b32 s1, s3
1225; GFX9-NEXT:    s_mov_b32 s2, s4
1226; GFX9-NEXT:    s_mov_b32 s3, s5
1227; GFX9-NEXT:    s_mov_b32 s4, s6
1228; GFX9-NEXT:    s_mov_b32 s5, s7
1229; GFX9-NEXT:    s_mov_b32 s6, s8
1230; GFX9-NEXT:    s_mov_b32 s7, s9
1231; GFX9-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc a16
1232; GFX9-NEXT:    s_waitcnt vmcnt(0)
1233; GFX9-NEXT:    ; return to shader part epilog
1234;
1235; GFX10-LABEL: atomic_cmpswap_i64_1d:
1236; GFX10:       ; %bb.0: ; %main_body
1237; GFX10-NEXT:    s_mov_b32 s0, s2
1238; GFX10-NEXT:    s_mov_b32 s1, s3
1239; GFX10-NEXT:    s_mov_b32 s2, s4
1240; GFX10-NEXT:    s_mov_b32 s3, s5
1241; GFX10-NEXT:    s_mov_b32 s4, s6
1242; GFX10-NEXT:    s_mov_b32 s5, s7
1243; GFX10-NEXT:    s_mov_b32 s6, s8
1244; GFX10-NEXT:    s_mov_b32 s7, s9
1245; GFX10-NEXT:    ; implicit-def: $vcc_hi
1246; GFX10-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc a16
1247; GFX10-NEXT:    s_waitcnt vmcnt(0)
1248; GFX10-NEXT:    ; return to shader part epilog
1249main_body:
1250  %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i16(i64 %cmp, i64 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1251  %out = bitcast i64 %v to <2 x float>
1252  ret <2 x float> %out
1253}
1254
1255define amdgpu_ps <2 x float> @atomic_add_i64_2d(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t) {
1256; GFX9-LABEL: atomic_add_i64_2d:
1257; GFX9:       ; %bb.0: ; %main_body
1258; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
1259; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1260; GFX9-NEXT:    s_mov_b32 s0, s2
1261; GFX9-NEXT:    s_mov_b32 s1, s3
1262; GFX9-NEXT:    s_mov_b32 s2, s4
1263; GFX9-NEXT:    s_mov_b32 s3, s5
1264; GFX9-NEXT:    s_mov_b32 s4, s6
1265; GFX9-NEXT:    s_mov_b32 s5, s7
1266; GFX9-NEXT:    s_mov_b32 s6, s8
1267; GFX9-NEXT:    s_mov_b32 s7, s9
1268; GFX9-NEXT:    v_and_or_b32 v2, v2, v4, v3
1269; GFX9-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
1270; GFX9-NEXT:    s_waitcnt vmcnt(0)
1271; GFX9-NEXT:    ; return to shader part epilog
1272;
1273; GFX10-LABEL: atomic_add_i64_2d:
1274; GFX10:       ; %bb.0: ; %main_body
1275; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1276; GFX10-NEXT:    s_mov_b32 s0, s2
1277; GFX10-NEXT:    s_mov_b32 s1, s3
1278; GFX10-NEXT:    s_mov_b32 s2, s4
1279; GFX10-NEXT:    s_mov_b32 s3, s5
1280; GFX10-NEXT:    v_and_or_b32 v2, v2, 0xffff, v3
1281; GFX10-NEXT:    s_mov_b32 s4, s6
1282; GFX10-NEXT:    s_mov_b32 s5, s7
1283; GFX10-NEXT:    s_mov_b32 s6, s8
1284; GFX10-NEXT:    s_mov_b32 s7, s9
1285; GFX10-NEXT:    ; implicit-def: $vcc_hi
1286; GFX10-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm glc a16
1287; GFX10-NEXT:    s_waitcnt vmcnt(0)
1288; GFX10-NEXT:    ; return to shader part epilog
1289main_body:
1290  %v = call i64 @llvm.amdgcn.image.atomic.add.2d.i64.i16(i64 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
1291  %out = bitcast i64 %v to <2 x float>
1292  ret <2 x float> %out
1293}
1294
1295define amdgpu_ps <2 x float> @atomic_add_i64_3d(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %r) {
1296; GFX9-LABEL: atomic_add_i64_3d:
1297; GFX9:       ; %bb.0: ; %main_body
1298; GFX9-NEXT:    s_mov_b32 s0, s2
1299; GFX9-NEXT:    s_mov_b32 s2, s4
1300; GFX9-NEXT:    s_mov_b32 s4, s6
1301; GFX9-NEXT:    s_mov_b32 s6, s8
1302; GFX9-NEXT:    v_mov_b32_e32 v5, 0xffff
1303; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1304; GFX9-NEXT:    s_lshl_b32 s8, s0, 16
1305; GFX9-NEXT:    s_mov_b32 s1, s3
1306; GFX9-NEXT:    s_mov_b32 s3, s5
1307; GFX9-NEXT:    s_mov_b32 s5, s7
1308; GFX9-NEXT:    v_and_or_b32 v2, v2, v5, v3
1309; GFX9-NEXT:    s_mov_b32 s7, s9
1310; GFX9-NEXT:    v_and_or_b32 v3, v4, v5, s8
1311; GFX9-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16
1312; GFX9-NEXT:    s_waitcnt vmcnt(0)
1313; GFX9-NEXT:    ; return to shader part epilog
1314;
1315; GFX10-LABEL: atomic_add_i64_3d:
1316; GFX10:       ; %bb.0: ; %main_body
1317; GFX10-NEXT:    v_mov_b32_e32 v5, 0xffff
1318; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1319; GFX10-NEXT:    s_mov_b32 s0, s2
1320; GFX10-NEXT:    s_mov_b32 s2, s4
1321; GFX10-NEXT:    s_mov_b32 s4, s6
1322; GFX10-NEXT:    s_mov_b32 s6, s8
1323; GFX10-NEXT:    s_lshl_b32 s8, s0, 16
1324; GFX10-NEXT:    v_and_or_b32 v2, v2, v5, v3
1325; GFX10-NEXT:    v_and_or_b32 v3, v4, v5, s8
1326; GFX10-NEXT:    s_mov_b32 s1, s3
1327; GFX10-NEXT:    s_mov_b32 s3, s5
1328; GFX10-NEXT:    s_mov_b32 s5, s7
1329; GFX10-NEXT:    s_mov_b32 s7, s9
1330; GFX10-NEXT:    ; implicit-def: $vcc_hi
1331; GFX10-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm glc a16
1332; GFX10-NEXT:    s_waitcnt vmcnt(0)
1333; GFX10-NEXT:    ; return to shader part epilog
1334main_body:
1335  %v = call i64 @llvm.amdgcn.image.atomic.add.3d.i64.i16(i64 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
1336  %out = bitcast i64 %v to <2 x float>
1337  ret <2 x float> %out
1338}
1339
1340define amdgpu_ps <2 x float> @atomic_add_i64_cube(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %face) {
1341; GFX9-LABEL: atomic_add_i64_cube:
1342; GFX9:       ; %bb.0: ; %main_body
1343; GFX9-NEXT:    s_mov_b32 s0, s2
1344; GFX9-NEXT:    s_mov_b32 s2, s4
1345; GFX9-NEXT:    s_mov_b32 s4, s6
1346; GFX9-NEXT:    s_mov_b32 s6, s8
1347; GFX9-NEXT:    v_mov_b32_e32 v5, 0xffff
1348; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1349; GFX9-NEXT:    s_lshl_b32 s8, s0, 16
1350; GFX9-NEXT:    s_mov_b32 s1, s3
1351; GFX9-NEXT:    s_mov_b32 s3, s5
1352; GFX9-NEXT:    s_mov_b32 s5, s7
1353; GFX9-NEXT:    v_and_or_b32 v2, v2, v5, v3
1354; GFX9-NEXT:    s_mov_b32 s7, s9
1355; GFX9-NEXT:    v_and_or_b32 v3, v4, v5, s8
1356; GFX9-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da
1357; GFX9-NEXT:    s_waitcnt vmcnt(0)
1358; GFX9-NEXT:    ; return to shader part epilog
1359;
1360; GFX10-LABEL: atomic_add_i64_cube:
1361; GFX10:       ; %bb.0: ; %main_body
1362; GFX10-NEXT:    v_mov_b32_e32 v5, 0xffff
1363; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1364; GFX10-NEXT:    s_mov_b32 s0, s2
1365; GFX10-NEXT:    s_mov_b32 s2, s4
1366; GFX10-NEXT:    s_mov_b32 s4, s6
1367; GFX10-NEXT:    s_mov_b32 s6, s8
1368; GFX10-NEXT:    s_lshl_b32 s8, s0, 16
1369; GFX10-NEXT:    v_and_or_b32 v2, v2, v5, v3
1370; GFX10-NEXT:    v_and_or_b32 v3, v4, v5, s8
1371; GFX10-NEXT:    s_mov_b32 s1, s3
1372; GFX10-NEXT:    s_mov_b32 s3, s5
1373; GFX10-NEXT:    s_mov_b32 s5, s7
1374; GFX10-NEXT:    s_mov_b32 s7, s9
1375; GFX10-NEXT:    ; implicit-def: $vcc_hi
1376; GFX10-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_CUBE unorm glc a16
1377; GFX10-NEXT:    s_waitcnt vmcnt(0)
1378; GFX10-NEXT:    ; return to shader part epilog
1379main_body:
1380  %v = call i64 @llvm.amdgcn.image.atomic.add.cube.i64.i16(i64 %data, i16 %s, i16 %t, i16 %face , <8 x i32> %rsrc, i32 0, i32 0)
1381  %out = bitcast i64 %v to <2 x float>
1382  ret <2 x float> %out
1383}
1384
1385define amdgpu_ps <2 x float> @atomic_add_i64_1darray(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %slice) {
1386; GFX9-LABEL: atomic_add_i64_1darray:
1387; GFX9:       ; %bb.0: ; %main_body
1388; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
1389; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1390; GFX9-NEXT:    s_mov_b32 s0, s2
1391; GFX9-NEXT:    s_mov_b32 s1, s3
1392; GFX9-NEXT:    s_mov_b32 s2, s4
1393; GFX9-NEXT:    s_mov_b32 s3, s5
1394; GFX9-NEXT:    s_mov_b32 s4, s6
1395; GFX9-NEXT:    s_mov_b32 s5, s7
1396; GFX9-NEXT:    s_mov_b32 s6, s8
1397; GFX9-NEXT:    s_mov_b32 s7, s9
1398; GFX9-NEXT:    v_and_or_b32 v2, v2, v4, v3
1399; GFX9-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 da
1400; GFX9-NEXT:    s_waitcnt vmcnt(0)
1401; GFX9-NEXT:    ; return to shader part epilog
1402;
1403; GFX10-LABEL: atomic_add_i64_1darray:
1404; GFX10:       ; %bb.0: ; %main_body
1405; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1406; GFX10-NEXT:    s_mov_b32 s0, s2
1407; GFX10-NEXT:    s_mov_b32 s1, s3
1408; GFX10-NEXT:    s_mov_b32 s2, s4
1409; GFX10-NEXT:    s_mov_b32 s3, s5
1410; GFX10-NEXT:    v_and_or_b32 v2, v2, 0xffff, v3
1411; GFX10-NEXT:    s_mov_b32 s4, s6
1412; GFX10-NEXT:    s_mov_b32 s5, s7
1413; GFX10-NEXT:    s_mov_b32 s6, s8
1414; GFX10-NEXT:    s_mov_b32 s7, s9
1415; GFX10-NEXT:    ; implicit-def: $vcc_hi
1416; GFX10-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc a16
1417; GFX10-NEXT:    s_waitcnt vmcnt(0)
1418; GFX10-NEXT:    ; return to shader part epilog
1419main_body:
1420  %v = call i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i16(i64 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1421  %out = bitcast i64 %v to <2 x float>
1422  ret <2 x float> %out
1423}
1424
1425define amdgpu_ps <2 x float> @atomic_add_i64_2darray(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %slice) {
1426; GFX9-LABEL: atomic_add_i64_2darray:
1427; GFX9:       ; %bb.0: ; %main_body
1428; GFX9-NEXT:    s_mov_b32 s0, s2
1429; GFX9-NEXT:    s_mov_b32 s2, s4
1430; GFX9-NEXT:    s_mov_b32 s4, s6
1431; GFX9-NEXT:    s_mov_b32 s6, s8
1432; GFX9-NEXT:    v_mov_b32_e32 v5, 0xffff
1433; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1434; GFX9-NEXT:    s_lshl_b32 s8, s0, 16
1435; GFX9-NEXT:    s_mov_b32 s1, s3
1436; GFX9-NEXT:    s_mov_b32 s3, s5
1437; GFX9-NEXT:    s_mov_b32 s5, s7
1438; GFX9-NEXT:    v_and_or_b32 v2, v2, v5, v3
1439; GFX9-NEXT:    s_mov_b32 s7, s9
1440; GFX9-NEXT:    v_and_or_b32 v3, v4, v5, s8
1441; GFX9-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da
1442; GFX9-NEXT:    s_waitcnt vmcnt(0)
1443; GFX9-NEXT:    ; return to shader part epilog
1444;
1445; GFX10-LABEL: atomic_add_i64_2darray:
1446; GFX10:       ; %bb.0: ; %main_body
1447; GFX10-NEXT:    v_mov_b32_e32 v5, 0xffff
1448; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1449; GFX10-NEXT:    s_mov_b32 s0, s2
1450; GFX10-NEXT:    s_mov_b32 s2, s4
1451; GFX10-NEXT:    s_mov_b32 s4, s6
1452; GFX10-NEXT:    s_mov_b32 s6, s8
1453; GFX10-NEXT:    s_lshl_b32 s8, s0, 16
1454; GFX10-NEXT:    v_and_or_b32 v2, v2, v5, v3
1455; GFX10-NEXT:    v_and_or_b32 v3, v4, v5, s8
1456; GFX10-NEXT:    s_mov_b32 s1, s3
1457; GFX10-NEXT:    s_mov_b32 s3, s5
1458; GFX10-NEXT:    s_mov_b32 s5, s7
1459; GFX10-NEXT:    s_mov_b32 s7, s9
1460; GFX10-NEXT:    ; implicit-def: $vcc_hi
1461; GFX10-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc a16
1462; GFX10-NEXT:    s_waitcnt vmcnt(0)
1463; GFX10-NEXT:    ; return to shader part epilog
1464main_body:
1465  %v = call i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i16(i64 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1466  %out = bitcast i64 %v to <2 x float>
1467  ret <2 x float> %out
1468}
1469
1470define amdgpu_ps <2 x float> @atomic_add_i64_2dmsaa(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %fragid) {
1471; GFX9-LABEL: atomic_add_i64_2dmsaa:
1472; GFX9:       ; %bb.0: ; %main_body
1473; GFX9-NEXT:    s_mov_b32 s0, s2
1474; GFX9-NEXT:    s_mov_b32 s2, s4
1475; GFX9-NEXT:    s_mov_b32 s4, s6
1476; GFX9-NEXT:    s_mov_b32 s6, s8
1477; GFX9-NEXT:    v_mov_b32_e32 v5, 0xffff
1478; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1479; GFX9-NEXT:    s_lshl_b32 s8, s0, 16
1480; GFX9-NEXT:    s_mov_b32 s1, s3
1481; GFX9-NEXT:    s_mov_b32 s3, s5
1482; GFX9-NEXT:    s_mov_b32 s5, s7
1483; GFX9-NEXT:    v_and_or_b32 v2, v2, v5, v3
1484; GFX9-NEXT:    s_mov_b32 s7, s9
1485; GFX9-NEXT:    v_and_or_b32 v3, v4, v5, s8
1486; GFX9-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16
1487; GFX9-NEXT:    s_waitcnt vmcnt(0)
1488; GFX9-NEXT:    ; return to shader part epilog
1489;
1490; GFX10-LABEL: atomic_add_i64_2dmsaa:
1491; GFX10:       ; %bb.0: ; %main_body
1492; GFX10-NEXT:    v_mov_b32_e32 v5, 0xffff
1493; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1494; GFX10-NEXT:    s_mov_b32 s0, s2
1495; GFX10-NEXT:    s_mov_b32 s2, s4
1496; GFX10-NEXT:    s_mov_b32 s4, s6
1497; GFX10-NEXT:    s_mov_b32 s6, s8
1498; GFX10-NEXT:    s_lshl_b32 s8, s0, 16
1499; GFX10-NEXT:    v_and_or_b32 v2, v2, v5, v3
1500; GFX10-NEXT:    v_and_or_b32 v3, v4, v5, s8
1501; GFX10-NEXT:    s_mov_b32 s1, s3
1502; GFX10-NEXT:    s_mov_b32 s3, s5
1503; GFX10-NEXT:    s_mov_b32 s5, s7
1504; GFX10-NEXT:    s_mov_b32 s7, s9
1505; GFX10-NEXT:    ; implicit-def: $vcc_hi
1506; GFX10-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm glc a16
1507; GFX10-NEXT:    s_waitcnt vmcnt(0)
1508; GFX10-NEXT:    ; return to shader part epilog
1509main_body:
1510  %v = call i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i16(i64 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1511  %out = bitcast i64 %v to <2 x float>
1512  ret <2 x float> %out
1513}
1514
1515define amdgpu_ps <2 x float> @atomic_add_i64_2darraymsaa(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
1516; GFX9-LABEL: atomic_add_i64_2darraymsaa:
1517; GFX9:       ; %bb.0: ; %main_body
1518; GFX9-NEXT:    v_mov_b32_e32 v6, 0xffff
1519; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1520; GFX9-NEXT:    v_and_or_b32 v2, v2, v6, v3
1521; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
1522; GFX9-NEXT:    s_mov_b32 s0, s2
1523; GFX9-NEXT:    s_mov_b32 s1, s3
1524; GFX9-NEXT:    s_mov_b32 s2, s4
1525; GFX9-NEXT:    s_mov_b32 s3, s5
1526; GFX9-NEXT:    s_mov_b32 s4, s6
1527; GFX9-NEXT:    s_mov_b32 s5, s7
1528; GFX9-NEXT:    s_mov_b32 s6, s8
1529; GFX9-NEXT:    s_mov_b32 s7, s9
1530; GFX9-NEXT:    v_and_or_b32 v3, v4, v6, v3
1531; GFX9-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da
1532; GFX9-NEXT:    s_waitcnt vmcnt(0)
1533; GFX9-NEXT:    ; return to shader part epilog
1534;
1535; GFX10-LABEL: atomic_add_i64_2darraymsaa:
1536; GFX10:       ; %bb.0: ; %main_body
1537; GFX10-NEXT:    v_mov_b32_e32 v6, 0xffff
1538; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1539; GFX10-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
1540; GFX10-NEXT:    s_mov_b32 s0, s2
1541; GFX10-NEXT:    s_mov_b32 s1, s3
1542; GFX10-NEXT:    s_mov_b32 s2, s4
1543; GFX10-NEXT:    v_and_or_b32 v2, v2, v6, v3
1544; GFX10-NEXT:    v_and_or_b32 v3, v4, v6, v5
1545; GFX10-NEXT:    s_mov_b32 s3, s5
1546; GFX10-NEXT:    s_mov_b32 s4, s6
1547; GFX10-NEXT:    s_mov_b32 s5, s7
1548; GFX10-NEXT:    s_mov_b32 s6, s8
1549; GFX10-NEXT:    s_mov_b32 s7, s9
1550; GFX10-NEXT:    ; implicit-def: $vcc_hi
1551; GFX10-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc a16
1552; GFX10-NEXT:    s_waitcnt vmcnt(0)
1553; GFX10-NEXT:    ; return to shader part epilog
1554main_body:
1555  %v = call i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i16(i64 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1556  %out = bitcast i64 %v to <2 x float>
1557  ret <2 x float> %out
1558}
1559
1560define amdgpu_ps <2 x float> @atomic_add_i64_1d_slc(<8 x i32> inreg %rsrc, i64 %data, i16 %s) {
1561; GFX9-LABEL: atomic_add_i64_1d_slc:
1562; GFX9:       ; %bb.0: ; %main_body
1563; GFX9-NEXT:    s_mov_b32 s0, s2
1564; GFX9-NEXT:    s_mov_b32 s1, s3
1565; GFX9-NEXT:    s_mov_b32 s2, s4
1566; GFX9-NEXT:    s_mov_b32 s3, s5
1567; GFX9-NEXT:    s_mov_b32 s4, s6
1568; GFX9-NEXT:    s_mov_b32 s5, s7
1569; GFX9-NEXT:    s_mov_b32 s6, s8
1570; GFX9-NEXT:    s_mov_b32 s7, s9
1571; GFX9-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc a16
1572; GFX9-NEXT:    s_waitcnt vmcnt(0)
1573; GFX9-NEXT:    ; return to shader part epilog
1574;
1575; GFX10-LABEL: atomic_add_i64_1d_slc:
1576; GFX10:       ; %bb.0: ; %main_body
1577; GFX10-NEXT:    s_mov_b32 s0, s2
1578; GFX10-NEXT:    s_mov_b32 s1, s3
1579; GFX10-NEXT:    s_mov_b32 s2, s4
1580; GFX10-NEXT:    s_mov_b32 s3, s5
1581; GFX10-NEXT:    s_mov_b32 s4, s6
1582; GFX10-NEXT:    s_mov_b32 s5, s7
1583; GFX10-NEXT:    s_mov_b32 s6, s8
1584; GFX10-NEXT:    s_mov_b32 s7, s9
1585; GFX10-NEXT:    ; implicit-def: $vcc_hi
1586; GFX10-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc slc a16
1587; GFX10-NEXT:    s_waitcnt vmcnt(0)
1588; GFX10-NEXT:    ; return to shader part epilog
1589main_body:
1590  %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
1591  %out = bitcast i64 %v to <2 x float>
1592  ret <2 x float> %out
1593}
1594
1595declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1596declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1597declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1598declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1599declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1600declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1601declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1602declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1603declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1604declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1605declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1606declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1607declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32, i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1608declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1609declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1610declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1611declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1612declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1613declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1614declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1615
1616declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1617declare i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1618declare i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1619declare i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1620declare i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1621declare i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1622declare i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1623declare i64 @llvm.amdgcn.image.atomic.and.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1624declare i64 @llvm.amdgcn.image.atomic.or.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1625declare i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1626declare i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1627declare i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1628declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i16(i64, i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1629declare i64 @llvm.amdgcn.image.atomic.add.2d.i64.i16(i64, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1630declare i64 @llvm.amdgcn.image.atomic.add.3d.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1631declare i64 @llvm.amdgcn.image.atomic.add.cube.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1632declare i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i16(i64, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1633declare i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1634declare i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1635declare i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i16(i64, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
1636
1637attributes #0 = { nounwind }
1638