1; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
2
3; GCN-LABEL: {{^}}load_1d:
4; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
5define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, i32 %s) {
6main_body:
7  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
8  ret <4 x float> %v
9}
10
11; GCN-LABEL: {{^}}load_1d_lwe:
12; GCN: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf unorm lwe{{$}}
13define amdgpu_ps <4 x float> @load_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
14main_body:
15  %v = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 2, i32 0)
16  %v.vec = extractvalue {<4 x float>, i32} %v, 0
17  %v.err = extractvalue {<4 x float>, i32} %v, 1
18  store i32 %v.err, i32 addrspace(1)* %out, align 4
19  ret <4 x float> %v.vec
20}
21
22; GCN-LABEL: {{^}}load_2d:
23; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}}
24define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
25main_body:
26  %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
27  ret <4 x float> %v
28}
29
30; GCN-LABEL: {{^}}load_3d:
31; GCN: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm{{$}}
32define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
33main_body:
34  %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
35  ret <4 x float> %v
36}
37
38; GCN-LABEL: {{^}}load_cube:
39; GCN: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da{{$}}
40define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
41main_body:
42  %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
43  ret <4 x float> %v
44}
45
46; GCN-LABEL: {{^}}load_cube_lwe:
47; GCN: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe da{{$}}
48define amdgpu_ps <4 x float> @load_cube_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) {
49main_body:
50  %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
51  %v.vec = extractvalue {<4 x float>, i32} %v, 0
52  %v.err = extractvalue {<4 x float>, i32} %v, 1
53  store i32 %v.err, i32 addrspace(1)* %out, align 4
54  ret <4 x float> %v.vec
55}
56
57; GCN-LABEL: {{^}}load_1darray:
58; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da{{$}}
59define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice) {
60main_body:
61  %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
62  ret <4 x float> %v
63}
64
65; GCN-LABEL: {{^}}load_2darray:
66; GCN: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da{{$}}
67define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
68main_body:
69  %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
70  ret <4 x float> %v
71}
72
73; GCN-LABEL: {{^}}load_2darray_lwe:
74; GCN: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe da{{$}}
75define amdgpu_ps <4 x float> @load_2darray_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) {
76main_body:
77  %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
78  %v.vec = extractvalue {<4 x float>, i32} %v, 0
79  %v.err = extractvalue {<4 x float>, i32} %v, 1
80  store i32 %v.err, i32 addrspace(1)* %out, align 4
81  ret <4 x float> %v.vec
82}
83
84; GCN-LABEL: {{^}}load_2dmsaa:
85; GCN: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm{{$}}
86define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
87main_body:
88  %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
89  ret <4 x float> %v
90}
91
92; GCN-LABEL: {{^}}load_2darraymsaa:
93; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
94define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
95main_body:
96  %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
97  ret <4 x float> %v
98}
99
100; GCN-LABEL: {{^}}store_1d:
101; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm{{$}}
102define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
103main_body:
104  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
105  ret void
106}
107
108; GCN-LABEL: {{^}}store_2d:
109; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}}
110define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
111main_body:
112  call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
113  ret void
114}
115
116; GCN-LABEL: {{^}}store_3d:
117; GCN: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm{{$}}
118define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r) {
119main_body:
120  call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
121  ret void
122}
123
124; GCN-LABEL: {{^}}store_cube:
125; GCN: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da{{$}}
126define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) {
127main_body:
128  call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
129  ret void
130}
131
132; GCN-LABEL: {{^}}store_1darray:
133; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da{{$}}
134define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice) {
135main_body:
136  call void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
137  ret void
138}
139
140; GCN-LABEL: {{^}}store_2darray:
141; GCN: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da{{$}}
142define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) {
143main_body:
144  call void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
145  ret void
146}
147
148; GCN-LABEL: {{^}}store_2dmsaa:
149; GCN: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm{{$}}
150define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %fragid) {
151main_body:
152  call void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
153  ret void
154}
155
156; GCN-LABEL: {{^}}store_2darraymsaa:
157; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
158define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
159main_body:
160  call void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
161  ret void
162}
163
164; GCN-LABEL: {{^}}load_1d_V1:
165; GCN: image_load v0, v0, s[0:7] dmask:0x8 unorm{{$}}
166define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, i32 %s) {
167main_body:
168  %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
169  ret float %v
170}
171
172; GCN-LABEL: {{^}}load_1d_V2:
173; GCN: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm{{$}}
174define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, i32 %s) {
175main_body:
176  %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
177  ret <2 x float> %v
178}
179
180; GCN-LABEL: {{^}}store_1d_V1:
181; GCN: image_store v0, v1, s[0:7] dmask:0x2 unorm{{$}}
182define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, i32 %s) {
183main_body:
184  call void @llvm.amdgcn.image.store.1d.f32.i32(float %vdata, i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
185  ret void
186}
187
188; GCN-LABEL: {{^}}store_1d_V2:
189; GCN: image_store v[0:1], v2, s[0:7] dmask:0xc unorm{{$}}
190define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, i32 %s) {
191main_body:
192  call void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float> %vdata, i32 12, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
193  ret void
194}
195
196; GCN-LABEL: {{^}}load_1d_glc:
197; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc{{$}}
198define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, i32 %s) {
199main_body:
200  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
201  ret <4 x float> %v
202}
203
204; GCN-LABEL: {{^}}load_1d_slc:
205; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc{{$}}
206define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, i32 %s) {
207main_body:
208  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
209  ret <4 x float> %v
210}
211
212; GCN-LABEL: {{^}}load_1d_glc_slc:
213; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc{{$}}
214define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, i32 %s) {
215main_body:
216  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3)
217  ret <4 x float> %v
218}
219
220; GCN-LABEL: {{^}}store_1d_glc:
221; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc{{$}}
222define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
223main_body:
224  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
225  ret void
226}
227
228; GCN-LABEL: {{^}}store_1d_slc:
229; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc{{$}}
230define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
231main_body:
232  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
233  ret void
234}
235
236; GCN-LABEL: {{^}}store_1d_glc_slc:
237; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc{{$}}
238define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
239main_body:
240  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3)
241  ret void
242}
243
244; GCN-LABEL: {{^}}image_store_wait:
245; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf
246; SI: s_waitcnt expcnt(0)
247; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf
248; GCN: s_waitcnt vmcnt(0)
249; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf
250define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 {
251main_body:
252  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %arg3, i32 15, i32 %arg4, <8 x i32> %arg, i32 0, i32 0)
253  %data = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %arg4, <8 x i32> %arg1, i32 0, i32 0)
254  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %data, i32 15, i32 %arg4, <8 x i32> %arg2, i32 0, i32 0)
255  ret void
256}
257
258; GCN-LABEL: image_load_mmo
259; GCN: image_load v1, v[2:3], s[0:7] dmask:0x1 unorm
260define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 {
261  store float 0.000000e+00, float addrspace(3)* %lds
262  %c0 = extractelement <2 x i32> %c, i32 0
263  %c1 = extractelement <2 x i32> %c, i32 1
264  %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 %c0, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0)
265  %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
266  store float 0.000000e+00, float addrspace(3)* %tmp2
267  ret float %tex
268}
269
270declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
271declare {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
272declare {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
273declare {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
274declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
275declare {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
276declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
277declare {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
278declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
279declare {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
280declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
281declare {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
282declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
283declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
284declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
285declare {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
286declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
287declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
288
289declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0
290declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
291declare void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
292declare void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
293declare void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
294declare void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
295declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
296declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
297
298declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1
299declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
300declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1
301declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) #0
302declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) #0
303
304attributes #0 = { nounwind }
305attributes #1 = { nounwind readonly }
306attributes #2 = { nounwind readnone }
307