1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GCN %s
3
4define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
5; GCN-LABEL: sample_l_1d:
6; GCN:       ; %bb.0: ; %main_body
7; GCN-NEXT:    s_mov_b32 s0, s2
8; GCN-NEXT:    s_mov_b32 s1, s3
9; GCN-NEXT:    s_mov_b32 s2, s4
10; GCN-NEXT:    s_mov_b32 s3, s5
11; GCN-NEXT:    s_mov_b32 s4, s6
12; GCN-NEXT:    s_mov_b32 s5, s7
13; GCN-NEXT:    s_mov_b32 s6, s8
14; GCN-NEXT:    s_mov_b32 s7, s9
15; GCN-NEXT:    s_mov_b32 s8, s10
16; GCN-NEXT:    s_mov_b32 s9, s11
17; GCN-NEXT:    s_mov_b32 s10, s12
18; GCN-NEXT:    s_mov_b32 s11, s13
19; GCN-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf
20; GCN-NEXT:    s_waitcnt vmcnt(0)
21; GCN-NEXT:    ; return to shader part epilog
22main_body:
23  %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
24  ret <4 x float> %v
25}
26
27define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
28; GCN-LABEL: sample_l_2d:
29; GCN:       ; %bb.0: ; %main_body
30; GCN-NEXT:    s_mov_b32 s0, s2
31; GCN-NEXT:    s_mov_b32 s1, s3
32; GCN-NEXT:    s_mov_b32 s2, s4
33; GCN-NEXT:    s_mov_b32 s3, s5
34; GCN-NEXT:    s_mov_b32 s4, s6
35; GCN-NEXT:    s_mov_b32 s5, s7
36; GCN-NEXT:    s_mov_b32 s6, s8
37; GCN-NEXT:    s_mov_b32 s7, s9
38; GCN-NEXT:    s_mov_b32 s8, s10
39; GCN-NEXT:    s_mov_b32 s9, s11
40; GCN-NEXT:    s_mov_b32 s10, s12
41; GCN-NEXT:    s_mov_b32 s11, s13
42; GCN-NEXT:    image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
43; GCN-NEXT:    s_waitcnt vmcnt(0)
44; GCN-NEXT:    ; return to shader part epilog
45main_body:
46  %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float -0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
47  ret <4 x float> %v
48}
49
50define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
51; GCN-LABEL: sample_c_l_1d:
52; GCN:       ; %bb.0: ; %main_body
53; GCN-NEXT:    s_mov_b32 s0, s2
54; GCN-NEXT:    s_mov_b32 s1, s3
55; GCN-NEXT:    s_mov_b32 s2, s4
56; GCN-NEXT:    s_mov_b32 s3, s5
57; GCN-NEXT:    s_mov_b32 s4, s6
58; GCN-NEXT:    s_mov_b32 s5, s7
59; GCN-NEXT:    s_mov_b32 s6, s8
60; GCN-NEXT:    s_mov_b32 s7, s9
61; GCN-NEXT:    s_mov_b32 s8, s10
62; GCN-NEXT:    s_mov_b32 s9, s11
63; GCN-NEXT:    s_mov_b32 s10, s12
64; GCN-NEXT:    s_mov_b32 s11, s13
65; GCN-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
66; GCN-NEXT:    s_waitcnt vmcnt(0)
67; GCN-NEXT:    ; return to shader part epilog
68main_body:
69  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float -2.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
70  ret <4 x float> %v
71}
72
73define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
74; GCN-LABEL: sample_c_l_2d:
75; GCN:       ; %bb.0: ; %main_body
76; GCN-NEXT:    s_mov_b32 s0, s2
77; GCN-NEXT:    s_mov_b32 s1, s3
78; GCN-NEXT:    s_mov_b32 s2, s4
79; GCN-NEXT:    s_mov_b32 s3, s5
80; GCN-NEXT:    s_mov_b32 s4, s6
81; GCN-NEXT:    s_mov_b32 s5, s7
82; GCN-NEXT:    s_mov_b32 s6, s8
83; GCN-NEXT:    s_mov_b32 s7, s9
84; GCN-NEXT:    s_mov_b32 s8, s10
85; GCN-NEXT:    s_mov_b32 s9, s11
86; GCN-NEXT:    s_mov_b32 s10, s12
87; GCN-NEXT:    s_mov_b32 s11, s13
88; GCN-NEXT:    image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
89; GCN-NEXT:    s_waitcnt vmcnt(0)
90; GCN-NEXT:    ; return to shader part epilog
91main_body:
92  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
93  ret <4 x float> %v
94}
95
96define amdgpu_ps <4 x float> @sample_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) {
97; GCN-LABEL: sample_l_o_1d:
98; GCN:       ; %bb.0: ; %main_body
99; GCN-NEXT:    s_mov_b32 s0, s2
100; GCN-NEXT:    s_mov_b32 s1, s3
101; GCN-NEXT:    s_mov_b32 s2, s4
102; GCN-NEXT:    s_mov_b32 s3, s5
103; GCN-NEXT:    s_mov_b32 s4, s6
104; GCN-NEXT:    s_mov_b32 s5, s7
105; GCN-NEXT:    s_mov_b32 s6, s8
106; GCN-NEXT:    s_mov_b32 s7, s9
107; GCN-NEXT:    s_mov_b32 s8, s10
108; GCN-NEXT:    s_mov_b32 s9, s11
109; GCN-NEXT:    s_mov_b32 s10, s12
110; GCN-NEXT:    s_mov_b32 s11, s13
111; GCN-NEXT:    image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
112; GCN-NEXT:    s_waitcnt vmcnt(0)
113; GCN-NEXT:    ; return to shader part epilog
114main_body:
115  %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
116  ret <4 x float> %v
117}
118
119define amdgpu_ps <4 x float> @sample_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
120; GCN-LABEL: sample_l_o_2d:
121; GCN:       ; %bb.0: ; %main_body
122; GCN-NEXT:    s_mov_b32 s0, s2
123; GCN-NEXT:    s_mov_b32 s1, s3
124; GCN-NEXT:    s_mov_b32 s2, s4
125; GCN-NEXT:    s_mov_b32 s3, s5
126; GCN-NEXT:    s_mov_b32 s4, s6
127; GCN-NEXT:    s_mov_b32 s5, s7
128; GCN-NEXT:    s_mov_b32 s6, s8
129; GCN-NEXT:    s_mov_b32 s7, s9
130; GCN-NEXT:    s_mov_b32 s8, s10
131; GCN-NEXT:    s_mov_b32 s9, s11
132; GCN-NEXT:    s_mov_b32 s10, s12
133; GCN-NEXT:    s_mov_b32 s11, s13
134; GCN-NEXT:    image_sample_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
135; GCN-NEXT:    s_waitcnt vmcnt(0)
136; GCN-NEXT:    ; return to shader part epilog
137main_body:
138  %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
139  ret <4 x float> %v
140}
141
142define amdgpu_ps <4 x float> @sample_c_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) {
143; GCN-LABEL: sample_c_l_o_1d:
144; GCN:       ; %bb.0: ; %main_body
145; GCN-NEXT:    s_mov_b32 s0, s2
146; GCN-NEXT:    s_mov_b32 s1, s3
147; GCN-NEXT:    s_mov_b32 s2, s4
148; GCN-NEXT:    s_mov_b32 s3, s5
149; GCN-NEXT:    s_mov_b32 s4, s6
150; GCN-NEXT:    s_mov_b32 s5, s7
151; GCN-NEXT:    s_mov_b32 s6, s8
152; GCN-NEXT:    s_mov_b32 s7, s9
153; GCN-NEXT:    s_mov_b32 s8, s10
154; GCN-NEXT:    s_mov_b32 s9, s11
155; GCN-NEXT:    s_mov_b32 s10, s12
156; GCN-NEXT:    s_mov_b32 s11, s13
157; GCN-NEXT:    image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
158; GCN-NEXT:    s_waitcnt vmcnt(0)
159; GCN-NEXT:    ; return to shader part epilog
160main_body:
161  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
162  ret <4 x float> %v
163}
164
165define amdgpu_ps <4 x float> @sample_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
166; GCN-LABEL: sample_c_l_o_2d:
167; GCN:       ; %bb.0: ; %main_body
168; GCN-NEXT:    s_mov_b32 s0, s2
169; GCN-NEXT:    s_mov_b32 s1, s3
170; GCN-NEXT:    s_mov_b32 s2, s4
171; GCN-NEXT:    s_mov_b32 s3, s5
172; GCN-NEXT:    s_mov_b32 s4, s6
173; GCN-NEXT:    s_mov_b32 s5, s7
174; GCN-NEXT:    s_mov_b32 s6, s8
175; GCN-NEXT:    s_mov_b32 s7, s9
176; GCN-NEXT:    s_mov_b32 s8, s10
177; GCN-NEXT:    s_mov_b32 s9, s11
178; GCN-NEXT:    s_mov_b32 s10, s12
179; GCN-NEXT:    s_mov_b32 s11, s13
180; GCN-NEXT:    image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
181; GCN-NEXT:    s_waitcnt vmcnt(0)
182; GCN-NEXT:    ; return to shader part epilog
183main_body:
184  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
185  ret <4 x float> %v
186}
187
188define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
189; GCN-LABEL: gather4_l_2d:
190; GCN:       ; %bb.0: ; %main_body
191; GCN-NEXT:    s_mov_b32 s0, s2
192; GCN-NEXT:    s_mov_b32 s1, s3
193; GCN-NEXT:    s_mov_b32 s2, s4
194; GCN-NEXT:    s_mov_b32 s3, s5
195; GCN-NEXT:    s_mov_b32 s4, s6
196; GCN-NEXT:    s_mov_b32 s5, s7
197; GCN-NEXT:    s_mov_b32 s6, s8
198; GCN-NEXT:    s_mov_b32 s7, s9
199; GCN-NEXT:    s_mov_b32 s8, s10
200; GCN-NEXT:    s_mov_b32 s9, s11
201; GCN-NEXT:    s_mov_b32 s10, s12
202; GCN-NEXT:    s_mov_b32 s11, s13
203; GCN-NEXT:    image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
204; GCN-NEXT:    s_waitcnt vmcnt(0)
205; GCN-NEXT:    ; return to shader part epilog
206main_body:
207  %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 15, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
208  ret <4 x float> %v
209}
210
211define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
212; GCN-LABEL: gather4_c_l_2d:
213; GCN:       ; %bb.0: ; %main_body
214; GCN-NEXT:    s_mov_b32 s0, s2
215; GCN-NEXT:    s_mov_b32 s1, s3
216; GCN-NEXT:    s_mov_b32 s2, s4
217; GCN-NEXT:    s_mov_b32 s3, s5
218; GCN-NEXT:    s_mov_b32 s4, s6
219; GCN-NEXT:    s_mov_b32 s5, s7
220; GCN-NEXT:    s_mov_b32 s6, s8
221; GCN-NEXT:    s_mov_b32 s7, s9
222; GCN-NEXT:    s_mov_b32 s8, s10
223; GCN-NEXT:    s_mov_b32 s9, s11
224; GCN-NEXT:    s_mov_b32 s10, s12
225; GCN-NEXT:    s_mov_b32 s11, s13
226; GCN-NEXT:    image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
227; GCN-NEXT:    s_waitcnt vmcnt(0)
228; GCN-NEXT:    ; return to shader part epilog
229main_body:
230  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
231  ret <4 x float> %v
232}
233
234define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
235; GCN-LABEL: gather4_l_o_2d:
236; GCN:       ; %bb.0: ; %main_body
237; GCN-NEXT:    s_mov_b32 s0, s2
238; GCN-NEXT:    s_mov_b32 s1, s3
239; GCN-NEXT:    s_mov_b32 s2, s4
240; GCN-NEXT:    s_mov_b32 s3, s5
241; GCN-NEXT:    s_mov_b32 s4, s6
242; GCN-NEXT:    s_mov_b32 s5, s7
243; GCN-NEXT:    s_mov_b32 s6, s8
244; GCN-NEXT:    s_mov_b32 s7, s9
245; GCN-NEXT:    s_mov_b32 s8, s10
246; GCN-NEXT:    s_mov_b32 s9, s11
247; GCN-NEXT:    s_mov_b32 s10, s12
248; GCN-NEXT:    s_mov_b32 s11, s13
249; GCN-NEXT:    image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
250; GCN-NEXT:    s_waitcnt vmcnt(0)
251; GCN-NEXT:    ; return to shader part epilog
252main_body:
253  %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
254  ret <4 x float> %v
255}
256
257define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
258; GCN-LABEL: gather4_c_l_o_2d:
259; GCN:       ; %bb.0: ; %main_body
260; GCN-NEXT:    s_mov_b32 s0, s2
261; GCN-NEXT:    s_mov_b32 s1, s3
262; GCN-NEXT:    s_mov_b32 s2, s4
263; GCN-NEXT:    s_mov_b32 s3, s5
264; GCN-NEXT:    s_mov_b32 s4, s6
265; GCN-NEXT:    s_mov_b32 s5, s7
266; GCN-NEXT:    s_mov_b32 s6, s8
267; GCN-NEXT:    s_mov_b32 s7, s9
268; GCN-NEXT:    s_mov_b32 s8, s10
269; GCN-NEXT:    s_mov_b32 s9, s11
270; GCN-NEXT:    s_mov_b32 s10, s12
271; GCN-NEXT:    s_mov_b32 s11, s13
272; GCN-NEXT:    image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
273; GCN-NEXT:    s_waitcnt vmcnt(0)
274; GCN-NEXT:    ; return to shader part epilog
275main_body:
276  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
277  ret <4 x float> %v
278}
279
280declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
281declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
282declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
283declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
284declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
285declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
286declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
287declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
288declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
289declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
290declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
291declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
292
293attributes #0 = { nounwind readonly }
294