1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
3
4define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
5; GFX10-LABEL: sample_d_1d:
6; GFX10:       ; %bb.0: ; %main_body
7; GFX10-NEXT:    v_mov_b32_e32 v3, 0xffff
8; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
9; GFX10-NEXT:    ; implicit-def: $vcc_hi
10; GFX10-NEXT:    v_and_or_b32 v0, v0, v3, s12
11; GFX10-NEXT:    v_and_or_b32 v1, v1, v3, s12
12; GFX10-NEXT:    image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
13; GFX10-NEXT:    s_waitcnt vmcnt(0)
14; GFX10-NEXT:    ; return to shader part epilog
15main_body:
16  %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
17  ret <4 x float> %v
18}
19
20define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
21; GFX10-LABEL: sample_d_2d:
22; GFX10:       ; %bb.0: ; %main_body
23; GFX10-NEXT:    v_mov_b32_e32 v6, 0xffff
24; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
25; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
26; GFX10-NEXT:    ; implicit-def: $vcc_hi
27; GFX10-NEXT:    v_and_or_b32 v3, v2, v6, v3
28; GFX10-NEXT:    v_and_or_b32 v10, v0, v6, v1
29; GFX10-NEXT:    image_sample_d_g16 v[0:3], [v10, v3, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
30; GFX10-NEXT:    s_waitcnt vmcnt(0)
31; GFX10-NEXT:    ; return to shader part epilog
32main_body:
33  %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
34  ret <4 x float> %v
35}
36
37define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
38; GFX10-LABEL: sample_d_3d:
39; GFX10:       ; %bb.0: ; %main_body
40; GFX10-NEXT:    v_mov_b32_e32 v11, 0xffff
41; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
42; GFX10-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
43; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
44; GFX10-NEXT:    ; implicit-def: $vcc_hi
45; GFX10-NEXT:    v_and_or_b32 v0, v0, v11, v1
46; GFX10-NEXT:    v_and_or_b32 v1, v2, v11, s12
47; GFX10-NEXT:    v_and_or_b32 v2, v3, v11, v4
48; GFX10-NEXT:    v_and_or_b32 v3, v5, v11, s12
49; GFX10-NEXT:    image_sample_d_g16 v[0:3], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
50; GFX10-NEXT:    s_waitcnt vmcnt(0)
51; GFX10-NEXT:    ; return to shader part epilog
52main_body:
53  %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
54  ret <4 x float> %v
55}
56
57define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
58; GFX10-LABEL: sample_c_d_1d:
59; GFX10:       ; %bb.0: ; %main_body
60; GFX10-NEXT:    v_mov_b32_e32 v4, 0xffff
61; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
62; GFX10-NEXT:    ; implicit-def: $vcc_hi
63; GFX10-NEXT:    v_and_or_b32 v1, v1, v4, s12
64; GFX10-NEXT:    v_and_or_b32 v2, v2, v4, s12
65; GFX10-NEXT:    image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
66; GFX10-NEXT:    s_waitcnt vmcnt(0)
67; GFX10-NEXT:    ; return to shader part epilog
68main_body:
69  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
70  ret <4 x float> %v
71}
72
73define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
74; GFX10-LABEL: sample_c_d_2d:
75; GFX10:       ; %bb.0: ; %main_body
76; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
77; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
78; GFX10-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
79; GFX10-NEXT:    ; implicit-def: $vcc_hi
80; GFX10-NEXT:    v_and_or_b32 v1, v1, v7, v2
81; GFX10-NEXT:    v_and_or_b32 v3, v3, v7, v4
82; GFX10-NEXT:    image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
83; GFX10-NEXT:    s_waitcnt vmcnt(0)
84; GFX10-NEXT:    ; return to shader part epilog
85main_body:
86  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
87  ret <4 x float> %v
88}
89
90define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
91; GFX10-LABEL: sample_d_cl_1d:
92; GFX10:       ; %bb.0: ; %main_body
93; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
94; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
95; GFX10-NEXT:    ; implicit-def: $vcc_hi
96; GFX10-NEXT:    v_and_or_b32 v0, v0, v7, s12
97; GFX10-NEXT:    v_and_or_b32 v1, v1, v7, s12
98; GFX10-NEXT:    image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
99; GFX10-NEXT:    s_waitcnt vmcnt(0)
100; GFX10-NEXT:    ; return to shader part epilog
101main_body:
102  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
103  ret <4 x float> %v
104}
105
106define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
107; GFX10-LABEL: sample_d_cl_2d:
108; GFX10:       ; %bb.0: ; %main_body
109; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
110; GFX10-NEXT:    v_lshlrev_b32_e32 v9, 16, v3
111; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
112; GFX10-NEXT:    ; implicit-def: $vcc_hi
113; GFX10-NEXT:    v_and_or_b32 v3, v2, v7, v9
114; GFX10-NEXT:    v_and_or_b32 v0, v0, v7, v1
115; GFX10-NEXT:    image_sample_d_cl_g16 v[0:3], [v0, v3, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
116; GFX10-NEXT:    s_waitcnt vmcnt(0)
117; GFX10-NEXT:    ; return to shader part epilog
118main_body:
119  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
120  ret <4 x float> %v
121}
122
123define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
124; GFX10-LABEL: sample_c_d_cl_1d:
125; GFX10:       ; %bb.0: ; %main_body
126; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
127; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
128; GFX10-NEXT:    ; implicit-def: $vcc_hi
129; GFX10-NEXT:    v_and_or_b32 v1, v1, v7, s12
130; GFX10-NEXT:    v_and_or_b32 v2, v2, v7, s12
131; GFX10-NEXT:    image_sample_c_d_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
132; GFX10-NEXT:    s_waitcnt vmcnt(0)
133; GFX10-NEXT:    ; return to shader part epilog
134main_body:
135  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
136  ret <4 x float> %v
137}
138
139define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
140; GFX10-LABEL: sample_c_d_cl_2d:
141; GFX10:       ; %bb.0: ; %main_body
142; GFX10-NEXT:    v_mov_b32_e32 v8, 0xffff
143; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
144; GFX10-NEXT:    v_lshlrev_b32_e32 v10, 16, v4
145; GFX10-NEXT:    ; implicit-def: $vcc_hi
146; GFX10-NEXT:    v_and_or_b32 v1, v1, v8, v2
147; GFX10-NEXT:    v_and_or_b32 v2, v3, v8, v10
148; GFX10-NEXT:    image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
149; GFX10-NEXT:    s_waitcnt vmcnt(0)
150; GFX10-NEXT:    ; return to shader part epilog
151main_body:
152  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
153  ret <4 x float> %v
154}
155
156define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
157; GFX10-LABEL: sample_cd_1d:
158; GFX10:       ; %bb.0: ; %main_body
159; GFX10-NEXT:    v_mov_b32_e32 v3, 0xffff
160; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
161; GFX10-NEXT:    ; implicit-def: $vcc_hi
162; GFX10-NEXT:    v_and_or_b32 v0, v0, v3, s12
163; GFX10-NEXT:    v_and_or_b32 v1, v1, v3, s12
164; GFX10-NEXT:    image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
165; GFX10-NEXT:    s_waitcnt vmcnt(0)
166; GFX10-NEXT:    ; return to shader part epilog
167main_body:
168  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
169  ret <4 x float> %v
170}
171
172define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
173; GFX10-LABEL: sample_cd_2d:
174; GFX10:       ; %bb.0: ; %main_body
175; GFX10-NEXT:    v_mov_b32_e32 v6, 0xffff
176; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
177; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
178; GFX10-NEXT:    ; implicit-def: $vcc_hi
179; GFX10-NEXT:    v_and_or_b32 v3, v2, v6, v3
180; GFX10-NEXT:    v_and_or_b32 v10, v0, v6, v1
181; GFX10-NEXT:    image_sample_cd_g16 v[0:3], [v10, v3, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
182; GFX10-NEXT:    s_waitcnt vmcnt(0)
183; GFX10-NEXT:    ; return to shader part epilog
184main_body:
185  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
186  ret <4 x float> %v
187}
188
189define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
190; GFX10-LABEL: sample_c_cd_1d:
191; GFX10:       ; %bb.0: ; %main_body
192; GFX10-NEXT:    v_mov_b32_e32 v4, 0xffff
193; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
194; GFX10-NEXT:    ; implicit-def: $vcc_hi
195; GFX10-NEXT:    v_and_or_b32 v1, v1, v4, s12
196; GFX10-NEXT:    v_and_or_b32 v2, v2, v4, s12
197; GFX10-NEXT:    image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
198; GFX10-NEXT:    s_waitcnt vmcnt(0)
199; GFX10-NEXT:    ; return to shader part epilog
200main_body:
201  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
202  ret <4 x float> %v
203}
204
205define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
206; GFX10-LABEL: sample_c_cd_2d:
207; GFX10:       ; %bb.0: ; %main_body
208; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
209; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
210; GFX10-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
211; GFX10-NEXT:    ; implicit-def: $vcc_hi
212; GFX10-NEXT:    v_and_or_b32 v1, v1, v7, v2
213; GFX10-NEXT:    v_and_or_b32 v3, v3, v7, v4
214; GFX10-NEXT:    image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
215; GFX10-NEXT:    s_waitcnt vmcnt(0)
216; GFX10-NEXT:    ; return to shader part epilog
217main_body:
218  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
219  ret <4 x float> %v
220}
221
222define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
223; GFX10-LABEL: sample_cd_cl_1d:
224; GFX10:       ; %bb.0: ; %main_body
225; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
226; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
227; GFX10-NEXT:    ; implicit-def: $vcc_hi
228; GFX10-NEXT:    v_and_or_b32 v0, v0, v7, s12
229; GFX10-NEXT:    v_and_or_b32 v1, v1, v7, s12
230; GFX10-NEXT:    image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
231; GFX10-NEXT:    s_waitcnt vmcnt(0)
232; GFX10-NEXT:    ; return to shader part epilog
233main_body:
234  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
235  ret <4 x float> %v
236}
237
238define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
239; GFX10-LABEL: sample_cd_cl_2d:
240; GFX10:       ; %bb.0: ; %main_body
241; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
242; GFX10-NEXT:    v_lshlrev_b32_e32 v9, 16, v3
243; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
244; GFX10-NEXT:    ; implicit-def: $vcc_hi
245; GFX10-NEXT:    v_and_or_b32 v3, v2, v7, v9
246; GFX10-NEXT:    v_and_or_b32 v0, v0, v7, v1
247; GFX10-NEXT:    image_sample_cd_cl_g16 v[0:3], [v0, v3, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
248; GFX10-NEXT:    s_waitcnt vmcnt(0)
249; GFX10-NEXT:    ; return to shader part epilog
250main_body:
251  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
252  ret <4 x float> %v
253}
254
255define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
256; GFX10-LABEL: sample_c_cd_cl_1d:
257; GFX10:       ; %bb.0: ; %main_body
258; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
259; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
260; GFX10-NEXT:    ; implicit-def: $vcc_hi
261; GFX10-NEXT:    v_and_or_b32 v1, v1, v7, s12
262; GFX10-NEXT:    v_and_or_b32 v2, v2, v7, s12
263; GFX10-NEXT:    image_sample_c_cd_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
264; GFX10-NEXT:    s_waitcnt vmcnt(0)
265; GFX10-NEXT:    ; return to shader part epilog
266main_body:
267  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
268  ret <4 x float> %v
269}
270
271define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
272; GFX10-LABEL: sample_c_cd_cl_2d:
273; GFX10:       ; %bb.0: ; %main_body
274; GFX10-NEXT:    v_mov_b32_e32 v8, 0xffff
275; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
276; GFX10-NEXT:    v_lshlrev_b32_e32 v10, 16, v4
277; GFX10-NEXT:    ; implicit-def: $vcc_hi
278; GFX10-NEXT:    v_and_or_b32 v1, v1, v8, v2
279; GFX10-NEXT:    v_and_or_b32 v2, v3, v8, v10
280; GFX10-NEXT:    image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
281; GFX10-NEXT:    s_waitcnt vmcnt(0)
282; GFX10-NEXT:    ; return to shader part epilog
283main_body:
284  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
285  ret <4 x float> %v
286}
287
288define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
289; GFX10-LABEL: sample_c_d_o_2darray_V1:
290; GFX10:       ; %bb.0: ; %main_body
291; GFX10-NEXT:    v_mov_b32_e32 v9, 0xffff
292; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
293; GFX10-NEXT:    v_lshlrev_b32_e32 v11, 16, v5
294; GFX10-NEXT:    ; implicit-def: $vcc_hi
295; GFX10-NEXT:    v_and_or_b32 v2, v2, v9, v3
296; GFX10-NEXT:    v_and_or_b32 v3, v4, v9, v11
297; GFX10-NEXT:    image_sample_c_d_o_g16 v0, [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
298; GFX10-NEXT:    s_waitcnt vmcnt(0)
299; GFX10-NEXT:    ; return to shader part epilog
300main_body:
301  %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
302  ret float %v
303}
304
305define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
306; GFX10-LABEL: sample_c_d_o_2darray_V2:
307; GFX10:       ; %bb.0: ; %main_body
308; GFX10-NEXT:    v_mov_b32_e32 v9, 0xffff
309; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
310; GFX10-NEXT:    v_lshlrev_b32_e32 v11, 16, v5
311; GFX10-NEXT:    ; implicit-def: $vcc_hi
312; GFX10-NEXT:    v_and_or_b32 v2, v2, v9, v3
313; GFX10-NEXT:    v_and_or_b32 v3, v4, v9, v11
314; GFX10-NEXT:    image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
315; GFX10-NEXT:    s_waitcnt vmcnt(0)
316; GFX10-NEXT:    ; return to shader part epilog
317main_body:
318  %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
319  ret <2 x float> %v
320}
321
322declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
323declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
324declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
325declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
326declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
327declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
328declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
329declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
330declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
331
332declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
333declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
334declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
335declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
336declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
337declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
338declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
339declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
340
341declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
342declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
343
344attributes #0 = { nounwind }
345attributes #1 = { nounwind readonly }
346attributes #2 = { nounwind readnone }
347