1; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
4
5; GCN-LABEL: {{^}}load_1d:
6; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
7define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, i32 %s) {
8main_body:
9  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
10  ret <4 x float> %v
11}
12
13; GCN-LABEL: {{^}}load_2d:
14; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}}
15define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
16main_body:
17  %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
18  ret <4 x float> %v
19}
20
21; GCN-LABEL: {{^}}load_3d:
22; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
23define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
24main_body:
25  %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
26  ret <4 x float> %v
27}
28
29; GCN-LABEL: {{^}}load_cube:
30; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
31define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
32main_body:
33  %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
34  ret <4 x float> %v
35}
36
37; GCN-LABEL: {{^}}load_1darray:
38; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da{{$}}
39define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice) {
40main_body:
41  %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
42  ret <4 x float> %v
43}
44
45; GCN-LABEL: {{^}}load_2darray:
46; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
47define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
48main_body:
49  %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
50  ret <4 x float> %v
51}
52
53; GCN-LABEL: {{^}}load_2dmsaa:
54; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
55define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
56main_body:
57  %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
58  ret <4 x float> %v
59}
60
61; GCN-LABEL: {{^}}load_2darraymsaa:
62; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
63define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
64main_body:
65  %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
66  ret <4 x float> %v
67}
68
69; GCN-LABEL: {{^}}load_mip_1d:
70; GCN: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}}
71define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s, i32 %mip) {
72main_body:
73  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
74  ret <4 x float> %v
75}
76
77; GCN-LABEL: {{^}}load_mip_2d:
78; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
79define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
80main_body:
81  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
82  ret <4 x float> %v
83}
84
85; GCN-LABEL: {{^}}load_mip_3d:
86; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
87define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r, i32 %mip) {
88main_body:
89  %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
90  ret <4 x float> %v
91}
92
93; GCN-LABEL: {{^}}load_mip_cube:
94; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
95define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) {
96main_body:
97  %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
98  ret <4 x float> %v
99}
100
101; GCN-LABEL: {{^}}load_mip_1darray:
102; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
103define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice, i32 %mip) {
104main_body:
105  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
106  ret <4 x float> %v
107}
108
109; GCN-LABEL: {{^}}load_mip_2darray:
110; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
111define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) {
112main_body:
113  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
114  ret <4 x float> %v
115}
116
117; GCN-LABEL: {{^}}store_1d:
118; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm{{$}}
119define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
120main_body:
121  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
122  ret void
123}
124
125; GCN-LABEL: {{^}}store_2d:
126; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}}
127define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
128main_body:
129  call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
130  ret void
131}
132
133; GCN-LABEL: {{^}}store_3d:
134; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
135define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r) {
136main_body:
137  call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
138  ret void
139}
140
141; GCN-LABEL: {{^}}store_cube:
142; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
143define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) {
144main_body:
145  call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
146  ret void
147}
148
149; GCN-LABEL: {{^}}store_1darray:
150; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da{{$}}
151define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice) {
152main_body:
153  call void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
154  ret void
155}
156
157; GCN-LABEL: {{^}}store_2darray:
158; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
159define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) {
160main_body:
161  call void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
162  ret void
163}
164
165; GCN-LABEL: {{^}}store_2dmsaa:
166; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
167define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %fragid) {
168main_body:
169  call void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
170  ret void
171}
172
173; GCN-LABEL: {{^}}store_2darraymsaa:
174; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
175define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
176main_body:
177  call void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
178  ret void
179}
180
181; GCN-LABEL: {{^}}store_mip_1d:
182; GCN: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}}
183define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %mip) {
184main_body:
185  call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
186  ret void
187}
188
189; GCN-LABEL: {{^}}store_mip_2d:
190; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
191define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %mip) {
192main_body:
193  call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
194  ret void
195}
196
197; GCN-LABEL: {{^}}store_mip_3d:
198; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
199define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r, i32 %mip) {
200main_body:
201  call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
202  ret void
203}
204
205; GCN-LABEL: {{^}}store_mip_cube:
206; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
207define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) {
208main_body:
209  call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
210  ret void
211}
212
213; GCN-LABEL: {{^}}store_mip_1darray:
214; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
215define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice, i32 %mip) {
216main_body:
217  call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
218  ret void
219}
220
221; GCN-LABEL: {{^}}store_mip_2darray:
222; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
223define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) {
224main_body:
225  call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
226  ret void
227}
228
229; GCN-LABEL: {{^}}getresinfo_1d:
230; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
231define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, i32 %mip) {
232main_body:
233  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
234  ret <4 x float> %v
235}
236
237; GCN-LABEL: {{^}}getresinfo_2d:
238; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
239define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, i32 %mip) {
240main_body:
241  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
242  ret <4 x float> %v
243}
244
245; GCN-LABEL: {{^}}getresinfo_3d:
246; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
247define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, i32 %mip) {
248main_body:
249  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
250  ret <4 x float> %v
251}
252
253; GCN-LABEL: {{^}}getresinfo_cube:
254; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
255define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, i32 %mip) {
256main_body:
257  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
258  ret <4 x float> %v
259}
260
261; GCN-LABEL: {{^}}getresinfo_1darray:
262; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
263define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, i32 %mip) {
264main_body:
265  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
266  ret <4 x float> %v
267}
268
269; GCN-LABEL: {{^}}getresinfo_2darray:
270; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
271define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, i32 %mip) {
272main_body:
273  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
274  ret <4 x float> %v
275}
276
277; GCN-LABEL: {{^}}getresinfo_2dmsaa:
278; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
279define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, i32 %mip) {
280main_body:
281  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
282  ret <4 x float> %v
283}
284
285; GCN-LABEL: {{^}}getresinfo_2darraymsaa:
286; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
287define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, i32 %mip) {
288main_body:
289  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
290  ret <4 x float> %v
291}
292
293; GCN-LABEL: {{^}}load_1d_V1:
294; GCN: image_load v0, v0, s[0:7] dmask:0x8 unorm{{$}}
295define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, i32 %s) {
296main_body:
297  %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
298  ret float %v
299}
300
301; GCN-LABEL: {{^}}load_1d_V2:
302; GCN: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm{{$}}
303define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, i32 %s) {
304main_body:
305  %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
306  ret <2 x float> %v
307}
308
309; GCN-LABEL: {{^}}store_1d_V1:
310; GCN: image_store v0, v1, s[0:7] dmask:0x2 unorm{{$}}
311define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, i32 %s) {
312main_body:
313  call void @llvm.amdgcn.image.store.1d.f32.i32(float %vdata, i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
314  ret void
315}
316
317; GCN-LABEL: {{^}}store_1d_V2:
318; GCN: image_store v[0:1], v2, s[0:7] dmask:0xc unorm{{$}}
319define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, i32 %s) {
320main_body:
321  call void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float> %vdata, i32 12, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
322  ret void
323}
324
325; GCN-LABEL: {{^}}load_1d_glc:
326; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc{{$}}
327define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, i32 %s) {
328main_body:
329  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
330  ret <4 x float> %v
331}
332
333; GCN-LABEL: {{^}}load_1d_slc:
334; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc{{$}}
335define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, i32 %s) {
336main_body:
337  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
338  ret <4 x float> %v
339}
340
341; GCN-LABEL: {{^}}load_1d_glc_slc:
342; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc{{$}}
343define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, i32 %s) {
344main_body:
345  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3)
346  ret <4 x float> %v
347}
348
349; GCN-LABEL: {{^}}store_1d_glc:
350; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc{{$}}
351define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
352main_body:
353  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
354  ret void
355}
356
357; GCN-LABEL: {{^}}store_1d_slc:
358; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc{{$}}
359define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
360main_body:
361  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
362  ret void
363}
364
365; GCN-LABEL: {{^}}store_1d_glc_slc:
366; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc{{$}}
367define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
368main_body:
369  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3)
370  ret void
371}
372
373; GCN-LABEL: {{^}}getresinfo_dmask0:
374; GCN-NOT: image
375; GCN: ; return to shader part epilog
376define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %mip) #0 {
377main_body:
378  %r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 0, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
379  ret <4 x float> %r
380}
381
382; Ideally, the register allocator would avoid the wait here
383;
384; GCN-LABEL: {{^}}image_store_wait:
385; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
386; SI: s_waitcnt expcnt(0)
387; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm
388; GCN: s_waitcnt vmcnt(0)
389; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm
390define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 {
391main_body:
392  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %arg3, i32 15, i32 %arg4, <8 x i32> %arg, i32 0, i32 0)
393  %data = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %arg4, <8 x i32> %arg1, i32 0, i32 0)
394  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %data, i32 15, i32 %arg4, <8 x i32> %arg2, i32 0, i32 0)
395  ret void
396}
397
398; SI won't merge ds memory operations, because of the signed offset bug, so
399; we only have check lines for VI.
400; VI-LABEL: image_load_mmo
401; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
402; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
403define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 {
404  store float 0.000000e+00, float addrspace(3)* %lds
405  %c0 = extractelement <2 x i32> %c, i32 0
406  %c1 = extractelement <2 x i32> %c, i32 1
407  %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 15, i32 %c0, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0)
408  %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
409  store float 0.000000e+00, float addrspace(3)* %tmp2
410  ret float %tex
411}
412
413declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
414declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
415declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
416declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
417declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
418declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
419declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
420declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
421
422declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
423declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
424declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
425declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
426declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
427declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
428
429declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0
430declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
431declare void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
432declare void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
433declare void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
434declare void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
435declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
436declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
437
438declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
439declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
440declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
441declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
442declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
443declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
444
445declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
446declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
447declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
448declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
449declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
450declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
451declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
452declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
453
454declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1
455declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
456declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1
457declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) #0
458declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) #0
459
460attributes #0 = { nounwind }
461attributes #1 = { nounwind readonly }
462attributes #2 = { nounwind readnone }
463