1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
4
5define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
6; GFX9-LABEL: gather4_2d:
7; GFX9:       ; %bb.0: ; %main_body
8; GFX9-NEXT:    s_mov_b64 s[12:13], exec
9; GFX9-NEXT:    s_wqm_b64 exec, exec
10; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
11; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
12; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
13; GFX9-NEXT:    image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
14; GFX9-NEXT:    s_waitcnt vmcnt(0)
15; GFX9-NEXT:    ; return to shader part epilog
16;
17; GFX10-LABEL: gather4_2d:
18; GFX10:       ; %bb.0: ; %main_body
19; GFX10-NEXT:    s_mov_b32 s12, exec_lo
20; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
21; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
22; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
23; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
24; GFX10-NEXT:    image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
25; GFX10-NEXT:    s_waitcnt vmcnt(0)
26; GFX10-NEXT:    ; return to shader part epilog
27main_body:
28  %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
29  ret <4 x float> %v
30}
31
32define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
33; GFX9-LABEL: gather4_cube:
34; GFX9:       ; %bb.0: ; %main_body
35; GFX9-NEXT:    s_mov_b64 s[12:13], exec
36; GFX9-NEXT:    s_wqm_b64 exec, exec
37; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
38; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
39; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
40; GFX9-NEXT:    image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
41; GFX9-NEXT:    s_waitcnt vmcnt(0)
42; GFX9-NEXT:    ; return to shader part epilog
43;
44; GFX10-LABEL: gather4_cube:
45; GFX10:       ; %bb.0: ; %main_body
46; GFX10-NEXT:    s_mov_b32 s12, exec_lo
47; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
48; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
49; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
50; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
51; GFX10-NEXT:    image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE a16
52; GFX10-NEXT:    s_waitcnt vmcnt(0)
53; GFX10-NEXT:    ; return to shader part epilog
54main_body:
55  %v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 1, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
56  ret <4 x float> %v
57}
58
59define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
60; GFX9-LABEL: gather4_2darray:
61; GFX9:       ; %bb.0: ; %main_body
62; GFX9-NEXT:    s_mov_b64 s[12:13], exec
63; GFX9-NEXT:    s_wqm_b64 exec, exec
64; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
65; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
66; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
67; GFX9-NEXT:    image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
68; GFX9-NEXT:    s_waitcnt vmcnt(0)
69; GFX9-NEXT:    ; return to shader part epilog
70;
71; GFX10-LABEL: gather4_2darray:
72; GFX10:       ; %bb.0: ; %main_body
73; GFX10-NEXT:    s_mov_b32 s12, exec_lo
74; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
75; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
76; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
77; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
78; GFX10-NEXT:    image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY a16
79; GFX10-NEXT:    s_waitcnt vmcnt(0)
80; GFX10-NEXT:    ; return to shader part epilog
81main_body:
82  %v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 1, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
83  ret <4 x float> %v
84}
85
86define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
87; GFX9-LABEL: gather4_c_2d:
88; GFX9:       ; %bb.0: ; %main_body
89; GFX9-NEXT:    s_mov_b64 s[12:13], exec
90; GFX9-NEXT:    s_wqm_b64 exec, exec
91; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
92; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
93; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
94; GFX9-NEXT:    image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
95; GFX9-NEXT:    s_waitcnt vmcnt(0)
96; GFX9-NEXT:    ; return to shader part epilog
97;
98; GFX10-LABEL: gather4_c_2d:
99; GFX10:       ; %bb.0: ; %main_body
100; GFX10-NEXT:    s_mov_b32 s12, exec_lo
101; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
102; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
103; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
104; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
105; GFX10-NEXT:    image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
106; GFX10-NEXT:    s_waitcnt vmcnt(0)
107; GFX10-NEXT:    ; return to shader part epilog
108main_body:
109  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
110  ret <4 x float> %v
111}
112
113define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
114; GFX9-LABEL: gather4_cl_2d:
115; GFX9:       ; %bb.0: ; %main_body
116; GFX9-NEXT:    s_mov_b64 s[12:13], exec
117; GFX9-NEXT:    s_wqm_b64 exec, exec
118; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
119; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
120; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
121; GFX9-NEXT:    image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
122; GFX9-NEXT:    s_waitcnt vmcnt(0)
123; GFX9-NEXT:    ; return to shader part epilog
124;
125; GFX10-LABEL: gather4_cl_2d:
126; GFX10:       ; %bb.0: ; %main_body
127; GFX10-NEXT:    s_mov_b32 s12, exec_lo
128; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
129; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
130; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
131; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
132; GFX10-NEXT:    image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
133; GFX10-NEXT:    s_waitcnt vmcnt(0)
134; GFX10-NEXT:    ; return to shader part epilog
135main_body:
136  %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 1, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
137  ret <4 x float> %v
138}
139
140define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
141; GFX9-LABEL: gather4_c_cl_2d:
142; GFX9:       ; %bb.0: ; %main_body
143; GFX9-NEXT:    s_mov_b64 s[12:13], exec
144; GFX9-NEXT:    s_wqm_b64 exec, exec
145; GFX9-NEXT:    v_mov_b32_e32 v5, v3
146; GFX9-NEXT:    v_mov_b32_e32 v3, v0
147; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
148; GFX9-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
149; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
150; GFX9-NEXT:    image_gather4_c_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
151; GFX9-NEXT:    s_waitcnt vmcnt(0)
152; GFX9-NEXT:    ; return to shader part epilog
153;
154; GFX10-LABEL: gather4_c_cl_2d:
155; GFX10:       ; %bb.0: ; %main_body
156; GFX10-NEXT:    s_mov_b32 s12, exec_lo
157; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
158; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
159; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
160; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
161; GFX10-NEXT:    image_gather4_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
162; GFX10-NEXT:    s_waitcnt vmcnt(0)
163; GFX10-NEXT:    ; return to shader part epilog
164main_body:
165  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
166  ret <4 x float> %v
167}
168
169define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
170; GFX9-LABEL: gather4_b_2d:
171; GFX9:       ; %bb.0: ; %main_body
172; GFX9-NEXT:    s_mov_b64 s[12:13], exec
173; GFX9-NEXT:    s_wqm_b64 exec, exec
174; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
175; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
176; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
177; GFX9-NEXT:    image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
178; GFX9-NEXT:    s_waitcnt vmcnt(0)
179; GFX9-NEXT:    ; return to shader part epilog
180;
181; GFX10-LABEL: gather4_b_2d:
182; GFX10:       ; %bb.0: ; %main_body
183; GFX10-NEXT:    s_mov_b32 s12, exec_lo
184; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
185; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
186; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
187; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
188; GFX10-NEXT:    image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
189; GFX10-NEXT:    s_waitcnt vmcnt(0)
190; GFX10-NEXT:    ; return to shader part epilog
191main_body:
192  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
193  ret <4 x float> %v
194}
195
196define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
197; GFX9-LABEL: gather4_c_b_2d:
198; GFX9:       ; %bb.0: ; %main_body
199; GFX9-NEXT:    s_mov_b64 s[12:13], exec
200; GFX9-NEXT:    s_wqm_b64 exec, exec
201; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
202; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
203; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
204; GFX9-NEXT:    image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16
205; GFX9-NEXT:    s_waitcnt vmcnt(0)
206; GFX9-NEXT:    ; return to shader part epilog
207;
208; GFX10-LABEL: gather4_c_b_2d:
209; GFX10:       ; %bb.0: ; %main_body
210; GFX10-NEXT:    s_mov_b32 s12, exec_lo
211; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
212; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
213; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
214; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
215; GFX10-NEXT:    image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
216; GFX10-NEXT:    s_waitcnt vmcnt(0)
217; GFX10-NEXT:    ; return to shader part epilog
218main_body:
219  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
220  ret <4 x float> %v
221}
222
223define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
224; GFX9-LABEL: gather4_b_cl_2d:
225; GFX9:       ; %bb.0: ; %main_body
226; GFX9-NEXT:    s_mov_b64 s[12:13], exec
227; GFX9-NEXT:    s_wqm_b64 exec, exec
228; GFX9-NEXT:    v_mov_b32_e32 v5, v3
229; GFX9-NEXT:    v_mov_b32_e32 v3, v0
230; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
231; GFX9-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
232; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
233; GFX9-NEXT:    image_gather4_b_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
234; GFX9-NEXT:    s_waitcnt vmcnt(0)
235; GFX9-NEXT:    ; return to shader part epilog
236;
237; GFX10-LABEL: gather4_b_cl_2d:
238; GFX10:       ; %bb.0: ; %main_body
239; GFX10-NEXT:    s_mov_b32 s12, exec_lo
240; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
241; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
242; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
243; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
244; GFX10-NEXT:    image_gather4_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
245; GFX10-NEXT:    s_waitcnt vmcnt(0)
246; GFX10-NEXT:    ; return to shader part epilog
247main_body:
248  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
249  ret <4 x float> %v
250}
251
252define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
253; GFX9-LABEL: gather4_c_b_cl_2d:
254; GFX9:       ; %bb.0: ; %main_body
255; GFX9-NEXT:    s_mov_b64 s[12:13], exec
256; GFX9-NEXT:    s_wqm_b64 exec, exec
257; GFX9-NEXT:    v_mov_b32_e32 v7, v4
258; GFX9-NEXT:    v_mov_b32_e32 v4, v0
259; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v2
260; GFX9-NEXT:    v_mov_b32_e32 v5, v1
261; GFX9-NEXT:    v_lshl_or_b32 v6, v3, 16, v0
262; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
263; GFX9-NEXT:    image_gather4_c_b_cl v[0:3], v[4:7], s[0:7], s[8:11] dmask:0x1 a16
264; GFX9-NEXT:    s_waitcnt vmcnt(0)
265; GFX9-NEXT:    ; return to shader part epilog
266;
267; GFX10-LABEL: gather4_c_b_cl_2d:
268; GFX10:       ; %bb.0: ; %main_body
269; GFX10-NEXT:    s_mov_b32 s12, exec_lo
270; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
271; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
272; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
273; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
274; GFX10-NEXT:    image_gather4_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
275; GFX10-NEXT:    s_waitcnt vmcnt(0)
276; GFX10-NEXT:    ; return to shader part epilog
277main_body:
278  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
279  ret <4 x float> %v
280}
281
282define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
283; GFX9-LABEL: gather4_l_2d:
284; GFX9:       ; %bb.0: ; %main_body
285; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
286; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
287; GFX9-NEXT:    image_gather4_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
288; GFX9-NEXT:    s_waitcnt vmcnt(0)
289; GFX9-NEXT:    ; return to shader part epilog
290;
291; GFX10-LABEL: gather4_l_2d:
292; GFX10:       ; %bb.0: ; %main_body
293; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
294; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
295; GFX10-NEXT:    image_gather4_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
296; GFX10-NEXT:    s_waitcnt vmcnt(0)
297; GFX10-NEXT:    ; return to shader part epilog
298main_body:
299  %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 1, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
300  ret <4 x float> %v
301}
302
303define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
304; GFX9-LABEL: gather4_c_l_2d:
305; GFX9:       ; %bb.0: ; %main_body
306; GFX9-NEXT:    v_mov_b32_e32 v5, v3
307; GFX9-NEXT:    v_mov_b32_e32 v3, v0
308; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
309; GFX9-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
310; GFX9-NEXT:    image_gather4_c_l v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
311; GFX9-NEXT:    s_waitcnt vmcnt(0)
312; GFX9-NEXT:    ; return to shader part epilog
313;
314; GFX10-LABEL: gather4_c_l_2d:
315; GFX10:       ; %bb.0: ; %main_body
316; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
317; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
318; GFX10-NEXT:    image_gather4_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
319; GFX10-NEXT:    s_waitcnt vmcnt(0)
320; GFX10-NEXT:    ; return to shader part epilog
321main_body:
322  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
323  ret <4 x float> %v
324}
325
326define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
327; GFX9-LABEL: gather4_lz_2d:
328; GFX9:       ; %bb.0: ; %main_body
329; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
330; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
331; GFX9-NEXT:    image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
332; GFX9-NEXT:    s_waitcnt vmcnt(0)
333; GFX9-NEXT:    ; return to shader part epilog
334;
335; GFX10-LABEL: gather4_lz_2d:
336; GFX10:       ; %bb.0: ; %main_body
337; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
338; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
339; GFX10-NEXT:    image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
340; GFX10-NEXT:    s_waitcnt vmcnt(0)
341; GFX10-NEXT:    ; return to shader part epilog
342main_body:
343  %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
344  ret <4 x float> %v
345}
346
347define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
348; GFX9-LABEL: gather4_c_lz_2d:
349; GFX9:       ; %bb.0: ; %main_body
350; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
351; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
352; GFX9-NEXT:    image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
353; GFX9-NEXT:    s_waitcnt vmcnt(0)
354; GFX9-NEXT:    ; return to shader part epilog
355;
356; GFX10-LABEL: gather4_c_lz_2d:
357; GFX10:       ; %bb.0: ; %main_body
358; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
359; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
360; GFX10-NEXT:    image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
361; GFX10-NEXT:    s_waitcnt vmcnt(0)
362; GFX10-NEXT:    ; return to shader part epilog
363main_body:
364  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
365  ret <4 x float> %v
366}
367
368declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
369declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
370declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
371
372declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
373declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
374declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
375
376declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
377declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
378declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
379declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
380
381declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
382declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
383
384declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
385declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
386
387attributes #0 = { nounwind }
388attributes #1 = { nounwind readonly }
389attributes #2 = { nounwind readnone }
390