1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10NSA %s
4
5define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
6; GFX9-LABEL: gather4_2d:
7; GFX9:       ; %bb.0: ; %main_body
8; GFX9-NEXT:    s_mov_b64 s[14:15], exec
9; GFX9-NEXT:    s_mov_b32 s0, s2
10; GFX9-NEXT:    s_wqm_b64 exec, exec
11; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
12; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
13; GFX9-NEXT:    s_mov_b32 s1, s3
14; GFX9-NEXT:    s_mov_b32 s2, s4
15; GFX9-NEXT:    s_mov_b32 s3, s5
16; GFX9-NEXT:    s_mov_b32 s4, s6
17; GFX9-NEXT:    s_mov_b32 s5, s7
18; GFX9-NEXT:    s_mov_b32 s6, s8
19; GFX9-NEXT:    s_mov_b32 s7, s9
20; GFX9-NEXT:    s_mov_b32 s8, s10
21; GFX9-NEXT:    s_mov_b32 s9, s11
22; GFX9-NEXT:    s_mov_b32 s10, s12
23; GFX9-NEXT:    s_mov_b32 s11, s13
24; GFX9-NEXT:    v_and_or_b32 v0, v0, v2, v1
25; GFX9-NEXT:    s_and_b64 exec, exec, s[14:15]
26; GFX9-NEXT:    image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
27; GFX9-NEXT:    s_waitcnt vmcnt(0)
28; GFX9-NEXT:    ; return to shader part epilog
29;
30; GFX10NSA-LABEL: gather4_2d:
31; GFX10NSA:       ; %bb.0: ; %main_body
32; GFX10NSA-NEXT:    s_mov_b32 s28, exec_lo
33; GFX10NSA-NEXT:    s_mov_b32 s0, s2
34; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
35; GFX10NSA-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
36; GFX10NSA-NEXT:    s_mov_b32 s1, s3
37; GFX10NSA-NEXT:    s_mov_b32 s2, s4
38; GFX10NSA-NEXT:    s_mov_b32 s3, s5
39; GFX10NSA-NEXT:    s_mov_b32 s4, s6
40; GFX10NSA-NEXT:    s_mov_b32 s5, s7
41; GFX10NSA-NEXT:    s_mov_b32 s6, s8
42; GFX10NSA-NEXT:    s_mov_b32 s7, s9
43; GFX10NSA-NEXT:    s_mov_b32 s8, s10
44; GFX10NSA-NEXT:    s_mov_b32 s9, s11
45; GFX10NSA-NEXT:    s_mov_b32 s10, s12
46; GFX10NSA-NEXT:    s_mov_b32 s11, s13
47; GFX10NSA-NEXT:    v_and_or_b32 v0, v0, 0xffff, v1
48; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s28
49; GFX10NSA-NEXT:    image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
50; GFX10NSA-NEXT:    ; implicit-def: $vcc_hi
51; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
52; GFX10NSA-NEXT:    ; return to shader part epilog
53main_body:
54  %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
55  ret <4 x float> %v
56}
57
58define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
59; GFX9-LABEL: gather4_cube:
60; GFX9:       ; %bb.0: ; %main_body
61; GFX9-NEXT:    s_mov_b64 s[14:15], exec
62; GFX9-NEXT:    s_mov_b32 s0, s2
63; GFX9-NEXT:    s_wqm_b64 exec, exec
64; GFX9-NEXT:    s_mov_b32 s2, s4
65; GFX9-NEXT:    s_mov_b32 s4, s6
66; GFX9-NEXT:    s_mov_b32 s6, s8
67; GFX9-NEXT:    s_mov_b32 s8, s10
68; GFX9-NEXT:    s_mov_b32 s10, s12
69; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
70; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
71; GFX9-NEXT:    s_lshl_b32 s12, s0, 16
72; GFX9-NEXT:    s_mov_b32 s1, s3
73; GFX9-NEXT:    s_mov_b32 s3, s5
74; GFX9-NEXT:    s_mov_b32 s5, s7
75; GFX9-NEXT:    s_mov_b32 s7, s9
76; GFX9-NEXT:    s_mov_b32 s9, s11
77; GFX9-NEXT:    v_and_or_b32 v0, v0, v3, v1
78; GFX9-NEXT:    s_mov_b32 s11, s13
79; GFX9-NEXT:    v_and_or_b32 v1, v2, v3, s12
80; GFX9-NEXT:    s_and_b64 exec, exec, s[14:15]
81; GFX9-NEXT:    image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 da
82; GFX9-NEXT:    s_waitcnt vmcnt(0)
83; GFX9-NEXT:    ; return to shader part epilog
84;
85; GFX10NSA-LABEL: gather4_cube:
86; GFX10NSA:       ; %bb.0: ; %main_body
87; GFX10NSA-NEXT:    s_mov_b32 s28, exec_lo
88; GFX10NSA-NEXT:    s_mov_b32 s0, s2
89; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
90; GFX10NSA-NEXT:    v_mov_b32_e32 v3, 0xffff
91; GFX10NSA-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
92; GFX10NSA-NEXT:    s_mov_b32 s2, s4
93; GFX10NSA-NEXT:    s_mov_b32 s4, s6
94; GFX10NSA-NEXT:    s_mov_b32 s6, s8
95; GFX10NSA-NEXT:    s_mov_b32 s8, s10
96; GFX10NSA-NEXT:    s_mov_b32 s10, s12
97; GFX10NSA-NEXT:    s_lshl_b32 s12, s0, 16
98; GFX10NSA-NEXT:    s_mov_b32 s1, s3
99; GFX10NSA-NEXT:    s_mov_b32 s3, s5
100; GFX10NSA-NEXT:    s_mov_b32 s5, s7
101; GFX10NSA-NEXT:    s_mov_b32 s7, s9
102; GFX10NSA-NEXT:    s_mov_b32 s9, s11
103; GFX10NSA-NEXT:    v_and_or_b32 v0, v0, v3, v1
104; GFX10NSA-NEXT:    s_mov_b32 s11, s13
105; GFX10NSA-NEXT:    v_and_or_b32 v1, v2, v3, s12
106; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s28
107; GFX10NSA-NEXT:    image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE a16
108; GFX10NSA-NEXT:    ; implicit-def: $vcc_hi
109; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
110; GFX10NSA-NEXT:    ; return to shader part epilog
111main_body:
112  %v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 1, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
113  ret <4 x float> %v
114}
115
116define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
117; GFX9-LABEL: gather4_2darray:
118; GFX9:       ; %bb.0: ; %main_body
119; GFX9-NEXT:    s_mov_b64 s[14:15], exec
120; GFX9-NEXT:    s_mov_b32 s0, s2
121; GFX9-NEXT:    s_wqm_b64 exec, exec
122; GFX9-NEXT:    s_mov_b32 s2, s4
123; GFX9-NEXT:    s_mov_b32 s4, s6
124; GFX9-NEXT:    s_mov_b32 s6, s8
125; GFX9-NEXT:    s_mov_b32 s8, s10
126; GFX9-NEXT:    s_mov_b32 s10, s12
127; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
128; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
129; GFX9-NEXT:    s_lshl_b32 s12, s0, 16
130; GFX9-NEXT:    s_mov_b32 s1, s3
131; GFX9-NEXT:    s_mov_b32 s3, s5
132; GFX9-NEXT:    s_mov_b32 s5, s7
133; GFX9-NEXT:    s_mov_b32 s7, s9
134; GFX9-NEXT:    s_mov_b32 s9, s11
135; GFX9-NEXT:    v_and_or_b32 v0, v0, v3, v1
136; GFX9-NEXT:    s_mov_b32 s11, s13
137; GFX9-NEXT:    v_and_or_b32 v1, v2, v3, s12
138; GFX9-NEXT:    s_and_b64 exec, exec, s[14:15]
139; GFX9-NEXT:    image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 da
140; GFX9-NEXT:    s_waitcnt vmcnt(0)
141; GFX9-NEXT:    ; return to shader part epilog
142;
143; GFX10NSA-LABEL: gather4_2darray:
144; GFX10NSA:       ; %bb.0: ; %main_body
145; GFX10NSA-NEXT:    s_mov_b32 s28, exec_lo
146; GFX10NSA-NEXT:    s_mov_b32 s0, s2
147; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
148; GFX10NSA-NEXT:    v_mov_b32_e32 v3, 0xffff
149; GFX10NSA-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
150; GFX10NSA-NEXT:    s_mov_b32 s2, s4
151; GFX10NSA-NEXT:    s_mov_b32 s4, s6
152; GFX10NSA-NEXT:    s_mov_b32 s6, s8
153; GFX10NSA-NEXT:    s_mov_b32 s8, s10
154; GFX10NSA-NEXT:    s_mov_b32 s10, s12
155; GFX10NSA-NEXT:    s_lshl_b32 s12, s0, 16
156; GFX10NSA-NEXT:    s_mov_b32 s1, s3
157; GFX10NSA-NEXT:    s_mov_b32 s3, s5
158; GFX10NSA-NEXT:    s_mov_b32 s5, s7
159; GFX10NSA-NEXT:    s_mov_b32 s7, s9
160; GFX10NSA-NEXT:    s_mov_b32 s9, s11
161; GFX10NSA-NEXT:    v_and_or_b32 v0, v0, v3, v1
162; GFX10NSA-NEXT:    s_mov_b32 s11, s13
163; GFX10NSA-NEXT:    v_and_or_b32 v1, v2, v3, s12
164; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s28
165; GFX10NSA-NEXT:    image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY a16
166; GFX10NSA-NEXT:    ; implicit-def: $vcc_hi
167; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
168; GFX10NSA-NEXT:    ; return to shader part epilog
169main_body:
170  %v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 1, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
171  ret <4 x float> %v
172}
173
174define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
175; GFX9-LABEL: gather4_c_2d:
176; GFX9:       ; %bb.0: ; %main_body
177; GFX9-NEXT:    s_mov_b64 s[14:15], exec
178; GFX9-NEXT:    s_mov_b32 s0, s2
179; GFX9-NEXT:    s_wqm_b64 exec, exec
180; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
181; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
182; GFX9-NEXT:    s_mov_b32 s1, s3
183; GFX9-NEXT:    s_mov_b32 s2, s4
184; GFX9-NEXT:    s_mov_b32 s3, s5
185; GFX9-NEXT:    s_mov_b32 s4, s6
186; GFX9-NEXT:    s_mov_b32 s5, s7
187; GFX9-NEXT:    s_mov_b32 s6, s8
188; GFX9-NEXT:    s_mov_b32 s7, s9
189; GFX9-NEXT:    s_mov_b32 s8, s10
190; GFX9-NEXT:    s_mov_b32 s9, s11
191; GFX9-NEXT:    s_mov_b32 s10, s12
192; GFX9-NEXT:    s_mov_b32 s11, s13
193; GFX9-NEXT:    v_and_or_b32 v1, v1, v3, v2
194; GFX9-NEXT:    s_and_b64 exec, exec, s[14:15]
195; GFX9-NEXT:    image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
196; GFX9-NEXT:    s_waitcnt vmcnt(0)
197; GFX9-NEXT:    ; return to shader part epilog
198;
199; GFX10NSA-LABEL: gather4_c_2d:
200; GFX10NSA:       ; %bb.0: ; %main_body
201; GFX10NSA-NEXT:    s_mov_b32 s28, exec_lo
202; GFX10NSA-NEXT:    s_mov_b32 s0, s2
203; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
204; GFX10NSA-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
205; GFX10NSA-NEXT:    s_mov_b32 s1, s3
206; GFX10NSA-NEXT:    s_mov_b32 s2, s4
207; GFX10NSA-NEXT:    s_mov_b32 s3, s5
208; GFX10NSA-NEXT:    s_mov_b32 s4, s6
209; GFX10NSA-NEXT:    s_mov_b32 s5, s7
210; GFX10NSA-NEXT:    s_mov_b32 s6, s8
211; GFX10NSA-NEXT:    s_mov_b32 s7, s9
212; GFX10NSA-NEXT:    s_mov_b32 s8, s10
213; GFX10NSA-NEXT:    s_mov_b32 s9, s11
214; GFX10NSA-NEXT:    s_mov_b32 s10, s12
215; GFX10NSA-NEXT:    s_mov_b32 s11, s13
216; GFX10NSA-NEXT:    v_and_or_b32 v1, v1, 0xffff, v2
217; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s28
218; GFX10NSA-NEXT:    image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
219; GFX10NSA-NEXT:    ; implicit-def: $vcc_hi
220; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
221; GFX10NSA-NEXT:    ; return to shader part epilog
222main_body:
223  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f16(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
224  ret <4 x float> %v
225}
226
227define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
228; GFX9-LABEL: gather4_cl_2d:
229; GFX9:       ; %bb.0: ; %main_body
230; GFX9-NEXT:    s_mov_b64 s[14:15], exec
231; GFX9-NEXT:    s_mov_b32 s0, s2
232; GFX9-NEXT:    s_wqm_b64 exec, exec
233; GFX9-NEXT:    s_mov_b32 s2, s4
234; GFX9-NEXT:    s_mov_b32 s4, s6
235; GFX9-NEXT:    s_mov_b32 s6, s8
236; GFX9-NEXT:    s_mov_b32 s8, s10
237; GFX9-NEXT:    s_mov_b32 s10, s12
238; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
239; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
240; GFX9-NEXT:    s_lshl_b32 s12, s0, 16
241; GFX9-NEXT:    s_mov_b32 s1, s3
242; GFX9-NEXT:    s_mov_b32 s3, s5
243; GFX9-NEXT:    s_mov_b32 s5, s7
244; GFX9-NEXT:    s_mov_b32 s7, s9
245; GFX9-NEXT:    s_mov_b32 s9, s11
246; GFX9-NEXT:    v_and_or_b32 v0, v0, v3, v1
247; GFX9-NEXT:    s_mov_b32 s11, s13
248; GFX9-NEXT:    v_and_or_b32 v1, v2, v3, s12
249; GFX9-NEXT:    s_and_b64 exec, exec, s[14:15]
250; GFX9-NEXT:    image_gather4_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
251; GFX9-NEXT:    s_waitcnt vmcnt(0)
252; GFX9-NEXT:    ; return to shader part epilog
253;
254; GFX10NSA-LABEL: gather4_cl_2d:
255; GFX10NSA:       ; %bb.0: ; %main_body
256; GFX10NSA-NEXT:    s_mov_b32 s28, exec_lo
257; GFX10NSA-NEXT:    s_mov_b32 s0, s2
258; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
259; GFX10NSA-NEXT:    v_mov_b32_e32 v3, 0xffff
260; GFX10NSA-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
261; GFX10NSA-NEXT:    s_mov_b32 s2, s4
262; GFX10NSA-NEXT:    s_mov_b32 s4, s6
263; GFX10NSA-NEXT:    s_mov_b32 s6, s8
264; GFX10NSA-NEXT:    s_mov_b32 s8, s10
265; GFX10NSA-NEXT:    s_mov_b32 s10, s12
266; GFX10NSA-NEXT:    s_lshl_b32 s12, s0, 16
267; GFX10NSA-NEXT:    s_mov_b32 s1, s3
268; GFX10NSA-NEXT:    s_mov_b32 s3, s5
269; GFX10NSA-NEXT:    s_mov_b32 s5, s7
270; GFX10NSA-NEXT:    s_mov_b32 s7, s9
271; GFX10NSA-NEXT:    s_mov_b32 s9, s11
272; GFX10NSA-NEXT:    v_and_or_b32 v0, v0, v3, v1
273; GFX10NSA-NEXT:    s_mov_b32 s11, s13
274; GFX10NSA-NEXT:    v_and_or_b32 v1, v2, v3, s12
275; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s28
276; GFX10NSA-NEXT:    image_gather4_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
277; GFX10NSA-NEXT:    ; implicit-def: $vcc_hi
278; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
279; GFX10NSA-NEXT:    ; return to shader part epilog
280main_body:
281  %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 1, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
282  ret <4 x float> %v
283}
284
285define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
286; GFX9-LABEL: gather4_c_cl_2d:
287; GFX9:       ; %bb.0: ; %main_body
288; GFX9-NEXT:    s_mov_b64 s[14:15], exec
289; GFX9-NEXT:    s_mov_b32 s0, s2
290; GFX9-NEXT:    s_wqm_b64 exec, exec
291; GFX9-NEXT:    s_mov_b32 s2, s4
292; GFX9-NEXT:    s_mov_b32 s4, s6
293; GFX9-NEXT:    s_mov_b32 s6, s8
294; GFX9-NEXT:    s_mov_b32 s8, s10
295; GFX9-NEXT:    s_mov_b32 s10, s12
296; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
297; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
298; GFX9-NEXT:    s_lshl_b32 s12, s0, 16
299; GFX9-NEXT:    s_mov_b32 s1, s3
300; GFX9-NEXT:    s_mov_b32 s3, s5
301; GFX9-NEXT:    s_mov_b32 s5, s7
302; GFX9-NEXT:    s_mov_b32 s7, s9
303; GFX9-NEXT:    s_mov_b32 s9, s11
304; GFX9-NEXT:    v_and_or_b32 v1, v1, v4, v2
305; GFX9-NEXT:    s_mov_b32 s11, s13
306; GFX9-NEXT:    v_and_or_b32 v2, v3, v4, s12
307; GFX9-NEXT:    s_and_b64 exec, exec, s[14:15]
308; GFX9-NEXT:    image_gather4_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16
309; GFX9-NEXT:    s_waitcnt vmcnt(0)
310; GFX9-NEXT:    ; return to shader part epilog
311;
312; GFX10NSA-LABEL: gather4_c_cl_2d:
313; GFX10NSA:       ; %bb.0: ; %main_body
314; GFX10NSA-NEXT:    s_mov_b32 s28, exec_lo
315; GFX10NSA-NEXT:    s_mov_b32 s0, s2
316; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
317; GFX10NSA-NEXT:    v_mov_b32_e32 v4, 0xffff
318; GFX10NSA-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
319; GFX10NSA-NEXT:    s_mov_b32 s2, s4
320; GFX10NSA-NEXT:    s_mov_b32 s4, s6
321; GFX10NSA-NEXT:    s_mov_b32 s6, s8
322; GFX10NSA-NEXT:    s_mov_b32 s8, s10
323; GFX10NSA-NEXT:    s_mov_b32 s10, s12
324; GFX10NSA-NEXT:    s_lshl_b32 s12, s0, 16
325; GFX10NSA-NEXT:    s_mov_b32 s1, s3
326; GFX10NSA-NEXT:    s_mov_b32 s3, s5
327; GFX10NSA-NEXT:    s_mov_b32 s5, s7
328; GFX10NSA-NEXT:    s_mov_b32 s7, s9
329; GFX10NSA-NEXT:    s_mov_b32 s9, s11
330; GFX10NSA-NEXT:    v_and_or_b32 v1, v1, v4, v2
331; GFX10NSA-NEXT:    s_mov_b32 s11, s13
332; GFX10NSA-NEXT:    v_and_or_b32 v2, v3, v4, s12
333; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s28
334; GFX10NSA-NEXT:    image_gather4_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
335; GFX10NSA-NEXT:    ; implicit-def: $vcc_hi
336; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
337; GFX10NSA-NEXT:    ; return to shader part epilog
338main_body:
339  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f16(i32 1, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
340  ret <4 x float> %v
341}
342
343define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
344; GFX9-LABEL: gather4_b_2d:
345; GFX9:       ; %bb.0: ; %main_body
346; GFX9-NEXT:    s_mov_b64 s[14:15], exec
347; GFX9-NEXT:    s_mov_b32 s0, s2
348; GFX9-NEXT:    s_wqm_b64 exec, exec
349; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
350; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
351; GFX9-NEXT:    s_mov_b32 s1, s3
352; GFX9-NEXT:    s_mov_b32 s2, s4
353; GFX9-NEXT:    s_mov_b32 s3, s5
354; GFX9-NEXT:    s_mov_b32 s4, s6
355; GFX9-NEXT:    s_mov_b32 s5, s7
356; GFX9-NEXT:    s_mov_b32 s6, s8
357; GFX9-NEXT:    s_mov_b32 s7, s9
358; GFX9-NEXT:    s_mov_b32 s8, s10
359; GFX9-NEXT:    s_mov_b32 s9, s11
360; GFX9-NEXT:    s_mov_b32 s10, s12
361; GFX9-NEXT:    s_mov_b32 s11, s13
362; GFX9-NEXT:    v_and_or_b32 v1, v1, v3, v2
363; GFX9-NEXT:    s_and_b64 exec, exec, s[14:15]
364; GFX9-NEXT:    image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
365; GFX9-NEXT:    s_waitcnt vmcnt(0)
366; GFX9-NEXT:    ; return to shader part epilog
367;
368; GFX10NSA-LABEL: gather4_b_2d:
369; GFX10NSA:       ; %bb.0: ; %main_body
370; GFX10NSA-NEXT:    s_mov_b32 s28, exec_lo
371; GFX10NSA-NEXT:    s_mov_b32 s0, s2
372; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
373; GFX10NSA-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
374; GFX10NSA-NEXT:    s_mov_b32 s1, s3
375; GFX10NSA-NEXT:    s_mov_b32 s2, s4
376; GFX10NSA-NEXT:    s_mov_b32 s3, s5
377; GFX10NSA-NEXT:    s_mov_b32 s4, s6
378; GFX10NSA-NEXT:    s_mov_b32 s5, s7
379; GFX10NSA-NEXT:    s_mov_b32 s6, s8
380; GFX10NSA-NEXT:    s_mov_b32 s7, s9
381; GFX10NSA-NEXT:    s_mov_b32 s8, s10
382; GFX10NSA-NEXT:    s_mov_b32 s9, s11
383; GFX10NSA-NEXT:    s_mov_b32 s10, s12
384; GFX10NSA-NEXT:    s_mov_b32 s11, s13
385; GFX10NSA-NEXT:    v_and_or_b32 v1, v1, 0xffff, v2
386; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s28
387; GFX10NSA-NEXT:    image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
388; GFX10NSA-NEXT:    ; implicit-def: $vcc_hi
389; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
390; GFX10NSA-NEXT:    ; return to shader part epilog
391main_body:
392  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
393  ret <4 x float> %v
394}
395
396define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
397; GFX9-LABEL: gather4_c_b_2d:
398; GFX9:       ; %bb.0: ; %main_body
399; GFX9-NEXT:    s_mov_b64 s[14:15], exec
400; GFX9-NEXT:    s_mov_b32 s0, s2
401; GFX9-NEXT:    s_wqm_b64 exec, exec
402; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
403; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
404; GFX9-NEXT:    s_mov_b32 s1, s3
405; GFX9-NEXT:    s_mov_b32 s2, s4
406; GFX9-NEXT:    s_mov_b32 s3, s5
407; GFX9-NEXT:    s_mov_b32 s4, s6
408; GFX9-NEXT:    s_mov_b32 s5, s7
409; GFX9-NEXT:    s_mov_b32 s6, s8
410; GFX9-NEXT:    s_mov_b32 s7, s9
411; GFX9-NEXT:    s_mov_b32 s8, s10
412; GFX9-NEXT:    s_mov_b32 s9, s11
413; GFX9-NEXT:    s_mov_b32 s10, s12
414; GFX9-NEXT:    s_mov_b32 s11, s13
415; GFX9-NEXT:    v_and_or_b32 v2, v2, v4, v3
416; GFX9-NEXT:    s_and_b64 exec, exec, s[14:15]
417; GFX9-NEXT:    image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16
418; GFX9-NEXT:    s_waitcnt vmcnt(0)
419; GFX9-NEXT:    ; return to shader part epilog
420;
421; GFX10NSA-LABEL: gather4_c_b_2d:
422; GFX10NSA:       ; %bb.0: ; %main_body
423; GFX10NSA-NEXT:    s_mov_b32 s28, exec_lo
424; GFX10NSA-NEXT:    s_mov_b32 s0, s2
425; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
426; GFX10NSA-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
427; GFX10NSA-NEXT:    s_mov_b32 s1, s3
428; GFX10NSA-NEXT:    s_mov_b32 s2, s4
429; GFX10NSA-NEXT:    s_mov_b32 s3, s5
430; GFX10NSA-NEXT:    s_mov_b32 s4, s6
431; GFX10NSA-NEXT:    s_mov_b32 s5, s7
432; GFX10NSA-NEXT:    s_mov_b32 s6, s8
433; GFX10NSA-NEXT:    s_mov_b32 s7, s9
434; GFX10NSA-NEXT:    s_mov_b32 s8, s10
435; GFX10NSA-NEXT:    s_mov_b32 s9, s11
436; GFX10NSA-NEXT:    s_mov_b32 s10, s12
437; GFX10NSA-NEXT:    s_mov_b32 s11, s13
438; GFX10NSA-NEXT:    v_and_or_b32 v2, v2, 0xffff, v3
439; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s28
440; GFX10NSA-NEXT:    image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
441; GFX10NSA-NEXT:    ; implicit-def: $vcc_hi
442; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
443; GFX10NSA-NEXT:    ; return to shader part epilog
444main_body:
445  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
446  ret <4 x float> %v
447}
448
449define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
450; GFX9-LABEL: gather4_b_cl_2d:
451; GFX9:       ; %bb.0: ; %main_body
452; GFX9-NEXT:    s_mov_b64 s[14:15], exec
453; GFX9-NEXT:    s_mov_b32 s0, s2
454; GFX9-NEXT:    s_wqm_b64 exec, exec
455; GFX9-NEXT:    s_mov_b32 s2, s4
456; GFX9-NEXT:    s_mov_b32 s4, s6
457; GFX9-NEXT:    s_mov_b32 s6, s8
458; GFX9-NEXT:    s_mov_b32 s8, s10
459; GFX9-NEXT:    s_mov_b32 s10, s12
460; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
461; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
462; GFX9-NEXT:    s_lshl_b32 s12, s0, 16
463; GFX9-NEXT:    s_mov_b32 s1, s3
464; GFX9-NEXT:    s_mov_b32 s3, s5
465; GFX9-NEXT:    s_mov_b32 s5, s7
466; GFX9-NEXT:    s_mov_b32 s7, s9
467; GFX9-NEXT:    s_mov_b32 s9, s11
468; GFX9-NEXT:    v_and_or_b32 v1, v1, v4, v2
469; GFX9-NEXT:    s_mov_b32 s11, s13
470; GFX9-NEXT:    v_and_or_b32 v2, v3, v4, s12
471; GFX9-NEXT:    s_and_b64 exec, exec, s[14:15]
472; GFX9-NEXT:    image_gather4_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16
473; GFX9-NEXT:    s_waitcnt vmcnt(0)
474; GFX9-NEXT:    ; return to shader part epilog
475;
476; GFX10NSA-LABEL: gather4_b_cl_2d:
477; GFX10NSA:       ; %bb.0: ; %main_body
478; GFX10NSA-NEXT:    s_mov_b32 s28, exec_lo
479; GFX10NSA-NEXT:    s_mov_b32 s0, s2
480; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
481; GFX10NSA-NEXT:    v_mov_b32_e32 v4, 0xffff
482; GFX10NSA-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
483; GFX10NSA-NEXT:    s_mov_b32 s2, s4
484; GFX10NSA-NEXT:    s_mov_b32 s4, s6
485; GFX10NSA-NEXT:    s_mov_b32 s6, s8
486; GFX10NSA-NEXT:    s_mov_b32 s8, s10
487; GFX10NSA-NEXT:    s_mov_b32 s10, s12
488; GFX10NSA-NEXT:    s_lshl_b32 s12, s0, 16
489; GFX10NSA-NEXT:    s_mov_b32 s1, s3
490; GFX10NSA-NEXT:    s_mov_b32 s3, s5
491; GFX10NSA-NEXT:    s_mov_b32 s5, s7
492; GFX10NSA-NEXT:    s_mov_b32 s7, s9
493; GFX10NSA-NEXT:    s_mov_b32 s9, s11
494; GFX10NSA-NEXT:    v_and_or_b32 v1, v1, v4, v2
495; GFX10NSA-NEXT:    s_mov_b32 s11, s13
496; GFX10NSA-NEXT:    v_and_or_b32 v2, v3, v4, s12
497; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s28
498; GFX10NSA-NEXT:    image_gather4_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
499; GFX10NSA-NEXT:    ; implicit-def: $vcc_hi
500; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
501; GFX10NSA-NEXT:    ; return to shader part epilog
502main_body:
503  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
504  ret <4 x float> %v
505}
506
507define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
508; GFX9-LABEL: gather4_c_b_cl_2d:
509; GFX9:       ; %bb.0: ; %main_body
510; GFX9-NEXT:    s_mov_b64 s[14:15], exec
511; GFX9-NEXT:    s_mov_b32 s0, s2
512; GFX9-NEXT:    s_wqm_b64 exec, exec
513; GFX9-NEXT:    s_mov_b32 s2, s4
514; GFX9-NEXT:    s_mov_b32 s4, s6
515; GFX9-NEXT:    s_mov_b32 s6, s8
516; GFX9-NEXT:    s_mov_b32 s8, s10
517; GFX9-NEXT:    s_mov_b32 s10, s12
518; GFX9-NEXT:    v_mov_b32_e32 v5, 0xffff
519; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
520; GFX9-NEXT:    s_lshl_b32 s12, s0, 16
521; GFX9-NEXT:    s_mov_b32 s1, s3
522; GFX9-NEXT:    s_mov_b32 s3, s5
523; GFX9-NEXT:    s_mov_b32 s5, s7
524; GFX9-NEXT:    s_mov_b32 s7, s9
525; GFX9-NEXT:    s_mov_b32 s9, s11
526; GFX9-NEXT:    v_and_or_b32 v2, v2, v5, v3
527; GFX9-NEXT:    s_mov_b32 s11, s13
528; GFX9-NEXT:    v_and_or_b32 v3, v4, v5, s12
529; GFX9-NEXT:    s_and_b64 exec, exec, s[14:15]
530; GFX9-NEXT:    image_gather4_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16
531; GFX9-NEXT:    s_waitcnt vmcnt(0)
532; GFX9-NEXT:    ; return to shader part epilog
533;
534; GFX10NSA-LABEL: gather4_c_b_cl_2d:
535; GFX10NSA:       ; %bb.0: ; %main_body
536; GFX10NSA-NEXT:    s_mov_b32 s28, exec_lo
537; GFX10NSA-NEXT:    s_mov_b32 s0, s2
538; GFX10NSA-NEXT:    s_wqm_b32 exec_lo, exec_lo
539; GFX10NSA-NEXT:    v_mov_b32_e32 v5, 0xffff
540; GFX10NSA-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
541; GFX10NSA-NEXT:    s_mov_b32 s2, s4
542; GFX10NSA-NEXT:    s_mov_b32 s4, s6
543; GFX10NSA-NEXT:    s_mov_b32 s6, s8
544; GFX10NSA-NEXT:    s_mov_b32 s8, s10
545; GFX10NSA-NEXT:    s_mov_b32 s10, s12
546; GFX10NSA-NEXT:    s_lshl_b32 s12, s0, 16
547; GFX10NSA-NEXT:    s_mov_b32 s1, s3
548; GFX10NSA-NEXT:    s_mov_b32 s3, s5
549; GFX10NSA-NEXT:    s_mov_b32 s5, s7
550; GFX10NSA-NEXT:    s_mov_b32 s7, s9
551; GFX10NSA-NEXT:    s_mov_b32 s9, s11
552; GFX10NSA-NEXT:    v_and_or_b32 v2, v2, v5, v3
553; GFX10NSA-NEXT:    s_mov_b32 s11, s13
554; GFX10NSA-NEXT:    v_and_or_b32 v3, v4, v5, s12
555; GFX10NSA-NEXT:    s_and_b32 exec_lo, exec_lo, s28
556; GFX10NSA-NEXT:    image_gather4_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
557; GFX10NSA-NEXT:    ; implicit-def: $vcc_hi
558; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
559; GFX10NSA-NEXT:    ; return to shader part epilog
560main_body:
561  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
562  ret <4 x float> %v
563}
564
565define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
566; GFX9-LABEL: gather4_l_2d:
567; GFX9:       ; %bb.0: ; %main_body
568; GFX9-NEXT:    s_mov_b32 s0, s2
569; GFX9-NEXT:    s_mov_b32 s2, s4
570; GFX9-NEXT:    s_mov_b32 s4, s6
571; GFX9-NEXT:    s_mov_b32 s6, s8
572; GFX9-NEXT:    s_mov_b32 s8, s10
573; GFX9-NEXT:    s_mov_b32 s10, s12
574; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
575; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
576; GFX9-NEXT:    s_lshl_b32 s12, s0, 16
577; GFX9-NEXT:    s_mov_b32 s1, s3
578; GFX9-NEXT:    s_mov_b32 s3, s5
579; GFX9-NEXT:    s_mov_b32 s5, s7
580; GFX9-NEXT:    s_mov_b32 s7, s9
581; GFX9-NEXT:    s_mov_b32 s9, s11
582; GFX9-NEXT:    v_and_or_b32 v0, v0, v3, v1
583; GFX9-NEXT:    s_mov_b32 s11, s13
584; GFX9-NEXT:    v_and_or_b32 v1, v2, v3, s12
585; GFX9-NEXT:    image_gather4_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
586; GFX9-NEXT:    s_waitcnt vmcnt(0)
587; GFX9-NEXT:    ; return to shader part epilog
588;
589; GFX10NSA-LABEL: gather4_l_2d:
590; GFX10NSA:       ; %bb.0: ; %main_body
591; GFX10NSA-NEXT:    v_mov_b32_e32 v3, 0xffff
592; GFX10NSA-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
593; GFX10NSA-NEXT:    s_mov_b32 s0, s2
594; GFX10NSA-NEXT:    s_mov_b32 s2, s4
595; GFX10NSA-NEXT:    s_mov_b32 s4, s6
596; GFX10NSA-NEXT:    s_mov_b32 s6, s8
597; GFX10NSA-NEXT:    s_mov_b32 s8, s10
598; GFX10NSA-NEXT:    s_mov_b32 s10, s12
599; GFX10NSA-NEXT:    s_lshl_b32 s12, s0, 16
600; GFX10NSA-NEXT:    v_and_or_b32 v0, v0, v3, v1
601; GFX10NSA-NEXT:    v_and_or_b32 v1, v2, v3, s12
602; GFX10NSA-NEXT:    s_mov_b32 s1, s3
603; GFX10NSA-NEXT:    s_mov_b32 s3, s5
604; GFX10NSA-NEXT:    s_mov_b32 s5, s7
605; GFX10NSA-NEXT:    s_mov_b32 s7, s9
606; GFX10NSA-NEXT:    s_mov_b32 s9, s11
607; GFX10NSA-NEXT:    s_mov_b32 s11, s13
608; GFX10NSA-NEXT:    ; implicit-def: $vcc_hi
609; GFX10NSA-NEXT:    image_gather4_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
610; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
611; GFX10NSA-NEXT:    ; return to shader part epilog
612main_body:
613  %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 1, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
614  ret <4 x float> %v
615}
616
617define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
618; GFX9-LABEL: gather4_c_l_2d:
619; GFX9:       ; %bb.0: ; %main_body
620; GFX9-NEXT:    s_mov_b32 s0, s2
621; GFX9-NEXT:    s_mov_b32 s2, s4
622; GFX9-NEXT:    s_mov_b32 s4, s6
623; GFX9-NEXT:    s_mov_b32 s6, s8
624; GFX9-NEXT:    s_mov_b32 s8, s10
625; GFX9-NEXT:    s_mov_b32 s10, s12
626; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
627; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
628; GFX9-NEXT:    s_lshl_b32 s12, s0, 16
629; GFX9-NEXT:    s_mov_b32 s1, s3
630; GFX9-NEXT:    s_mov_b32 s3, s5
631; GFX9-NEXT:    s_mov_b32 s5, s7
632; GFX9-NEXT:    s_mov_b32 s7, s9
633; GFX9-NEXT:    s_mov_b32 s9, s11
634; GFX9-NEXT:    v_and_or_b32 v1, v1, v4, v2
635; GFX9-NEXT:    s_mov_b32 s11, s13
636; GFX9-NEXT:    v_and_or_b32 v2, v3, v4, s12
637; GFX9-NEXT:    image_gather4_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16
638; GFX9-NEXT:    s_waitcnt vmcnt(0)
639; GFX9-NEXT:    ; return to shader part epilog
640;
641; GFX10NSA-LABEL: gather4_c_l_2d:
642; GFX10NSA:       ; %bb.0: ; %main_body
643; GFX10NSA-NEXT:    v_mov_b32_e32 v4, 0xffff
644; GFX10NSA-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
645; GFX10NSA-NEXT:    s_mov_b32 s0, s2
646; GFX10NSA-NEXT:    s_mov_b32 s2, s4
647; GFX10NSA-NEXT:    s_mov_b32 s4, s6
648; GFX10NSA-NEXT:    s_mov_b32 s6, s8
649; GFX10NSA-NEXT:    s_mov_b32 s8, s10
650; GFX10NSA-NEXT:    s_mov_b32 s10, s12
651; GFX10NSA-NEXT:    s_lshl_b32 s12, s0, 16
652; GFX10NSA-NEXT:    v_and_or_b32 v1, v1, v4, v2
653; GFX10NSA-NEXT:    v_and_or_b32 v2, v3, v4, s12
654; GFX10NSA-NEXT:    s_mov_b32 s1, s3
655; GFX10NSA-NEXT:    s_mov_b32 s3, s5
656; GFX10NSA-NEXT:    s_mov_b32 s5, s7
657; GFX10NSA-NEXT:    s_mov_b32 s7, s9
658; GFX10NSA-NEXT:    s_mov_b32 s9, s11
659; GFX10NSA-NEXT:    s_mov_b32 s11, s13
660; GFX10NSA-NEXT:    ; implicit-def: $vcc_hi
661; GFX10NSA-NEXT:    image_gather4_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
662; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
663; GFX10NSA-NEXT:    ; return to shader part epilog
664main_body:
665  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f16(i32 1, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
666  ret <4 x float> %v
667}
668
669define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
670; GFX9-LABEL: gather4_lz_2d:
671; GFX9:       ; %bb.0: ; %main_body
672; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
673; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
674; GFX9-NEXT:    s_mov_b32 s0, s2
675; GFX9-NEXT:    s_mov_b32 s1, s3
676; GFX9-NEXT:    s_mov_b32 s2, s4
677; GFX9-NEXT:    s_mov_b32 s3, s5
678; GFX9-NEXT:    s_mov_b32 s4, s6
679; GFX9-NEXT:    s_mov_b32 s5, s7
680; GFX9-NEXT:    s_mov_b32 s6, s8
681; GFX9-NEXT:    s_mov_b32 s7, s9
682; GFX9-NEXT:    s_mov_b32 s8, s10
683; GFX9-NEXT:    s_mov_b32 s9, s11
684; GFX9-NEXT:    s_mov_b32 s10, s12
685; GFX9-NEXT:    s_mov_b32 s11, s13
686; GFX9-NEXT:    v_and_or_b32 v0, v0, v2, v1
687; GFX9-NEXT:    image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
688; GFX9-NEXT:    s_waitcnt vmcnt(0)
689; GFX9-NEXT:    ; return to shader part epilog
690;
691; GFX10NSA-LABEL: gather4_lz_2d:
692; GFX10NSA:       ; %bb.0: ; %main_body
693; GFX10NSA-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
694; GFX10NSA-NEXT:    s_mov_b32 s0, s2
695; GFX10NSA-NEXT:    s_mov_b32 s1, s3
696; GFX10NSA-NEXT:    s_mov_b32 s2, s4
697; GFX10NSA-NEXT:    s_mov_b32 s3, s5
698; GFX10NSA-NEXT:    v_and_or_b32 v0, v0, 0xffff, v1
699; GFX10NSA-NEXT:    s_mov_b32 s4, s6
700; GFX10NSA-NEXT:    s_mov_b32 s5, s7
701; GFX10NSA-NEXT:    s_mov_b32 s6, s8
702; GFX10NSA-NEXT:    s_mov_b32 s7, s9
703; GFX10NSA-NEXT:    s_mov_b32 s8, s10
704; GFX10NSA-NEXT:    s_mov_b32 s9, s11
705; GFX10NSA-NEXT:    s_mov_b32 s10, s12
706; GFX10NSA-NEXT:    s_mov_b32 s11, s13
707; GFX10NSA-NEXT:    ; implicit-def: $vcc_hi
708; GFX10NSA-NEXT:    image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
709; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
710; GFX10NSA-NEXT:    ; return to shader part epilog
711main_body:
712  %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
713  ret <4 x float> %v
714}
715
716define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
717; GFX9-LABEL: gather4_c_lz_2d:
718; GFX9:       ; %bb.0: ; %main_body
719; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
720; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
721; GFX9-NEXT:    s_mov_b32 s0, s2
722; GFX9-NEXT:    s_mov_b32 s1, s3
723; GFX9-NEXT:    s_mov_b32 s2, s4
724; GFX9-NEXT:    s_mov_b32 s3, s5
725; GFX9-NEXT:    s_mov_b32 s4, s6
726; GFX9-NEXT:    s_mov_b32 s5, s7
727; GFX9-NEXT:    s_mov_b32 s6, s8
728; GFX9-NEXT:    s_mov_b32 s7, s9
729; GFX9-NEXT:    s_mov_b32 s8, s10
730; GFX9-NEXT:    s_mov_b32 s9, s11
731; GFX9-NEXT:    s_mov_b32 s10, s12
732; GFX9-NEXT:    s_mov_b32 s11, s13
733; GFX9-NEXT:    v_and_or_b32 v1, v1, v3, v2
734; GFX9-NEXT:    image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
735; GFX9-NEXT:    s_waitcnt vmcnt(0)
736; GFX9-NEXT:    ; return to shader part epilog
737;
738; GFX10NSA-LABEL: gather4_c_lz_2d:
739; GFX10NSA:       ; %bb.0: ; %main_body
740; GFX10NSA-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
741; GFX10NSA-NEXT:    s_mov_b32 s0, s2
742; GFX10NSA-NEXT:    s_mov_b32 s1, s3
743; GFX10NSA-NEXT:    s_mov_b32 s2, s4
744; GFX10NSA-NEXT:    s_mov_b32 s3, s5
745; GFX10NSA-NEXT:    v_and_or_b32 v1, v1, 0xffff, v2
746; GFX10NSA-NEXT:    s_mov_b32 s4, s6
747; GFX10NSA-NEXT:    s_mov_b32 s5, s7
748; GFX10NSA-NEXT:    s_mov_b32 s6, s8
749; GFX10NSA-NEXT:    s_mov_b32 s7, s9
750; GFX10NSA-NEXT:    s_mov_b32 s8, s10
751; GFX10NSA-NEXT:    s_mov_b32 s9, s11
752; GFX10NSA-NEXT:    s_mov_b32 s10, s12
753; GFX10NSA-NEXT:    s_mov_b32 s11, s13
754; GFX10NSA-NEXT:    ; implicit-def: $vcc_hi
755; GFX10NSA-NEXT:    image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
756; GFX10NSA-NEXT:    s_waitcnt vmcnt(0)
757; GFX10NSA-NEXT:    ; return to shader part epilog
758main_body:
759  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f16(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
760  ret <4 x float> %v
761}
762
763declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 immarg, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
764declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
765declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
766declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
767declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
768declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32 immarg, float, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
769declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
770declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32 immarg, float, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
771declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
772declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 immarg, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
773declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
774declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
775declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
776declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
777
778attributes #0 = { nounwind readonly }
779