1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
3
4define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
5; GFX10-LABEL: sample_d_1d:
6; GFX10:       ; %bb.0: ; %main_body
7; GFX10-NEXT:    v_mov_b32_e32 v3, 0xffff
8; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
9; GFX10-NEXT:    v_and_or_b32 v0, v0, v3, s12
10; GFX10-NEXT:    v_and_or_b32 v1, v1, v3, s12
11; GFX10-NEXT:    image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
12; GFX10-NEXT:    s_waitcnt vmcnt(0)
13; GFX10-NEXT:    ; return to shader part epilog
14main_body:
15  %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
16  ret <4 x float> %v
17}
18
19define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
20; GFX10-LABEL: sample_d_2d:
21; GFX10:       ; %bb.0: ; %main_body
22; GFX10-NEXT:    v_mov_b32_e32 v6, 0xffff
23; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
24; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
25; GFX10-NEXT:    v_and_or_b32 v0, v0, v6, v1
26; GFX10-NEXT:    v_and_or_b32 v1, v2, v6, v3
27; GFX10-NEXT:    image_sample_d_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
28; GFX10-NEXT:    s_waitcnt vmcnt(0)
29; GFX10-NEXT:    ; return to shader part epilog
30main_body:
31  %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
32  ret <4 x float> %v
33}
34
35define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
36; GFX10-LABEL: sample_d_3d:
37; GFX10:       ; %bb.0: ; %main_body
38; GFX10-NEXT:    v_mov_b32_e32 v9, v2
39; GFX10-NEXT:    v_mov_b32_e32 v10, v3
40; GFX10-NEXT:    v_mov_b32_e32 v11, 0xffff
41; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
42; GFX10-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
43; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
44; GFX10-NEXT:    v_and_or_b32 v3, v9, v11, s12
45; GFX10-NEXT:    v_and_or_b32 v2, v0, v11, v1
46; GFX10-NEXT:    v_and_or_b32 v4, v10, v11, v4
47; GFX10-NEXT:    v_and_or_b32 v5, v5, v11, s12
48; GFX10-NEXT:    image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
49; GFX10-NEXT:    s_waitcnt vmcnt(0)
50; GFX10-NEXT:    ; return to shader part epilog
51main_body:
52  %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
53  ret <4 x float> %v
54}
55
56define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
57; GFX10-LABEL: sample_c_d_1d:
58; GFX10:       ; %bb.0: ; %main_body
59; GFX10-NEXT:    v_mov_b32_e32 v4, 0xffff
60; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
61; GFX10-NEXT:    v_and_or_b32 v1, v1, v4, s12
62; GFX10-NEXT:    v_and_or_b32 v2, v2, v4, s12
63; GFX10-NEXT:    image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
64; GFX10-NEXT:    s_waitcnt vmcnt(0)
65; GFX10-NEXT:    ; return to shader part epilog
66main_body:
67  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
68  ret <4 x float> %v
69}
70
71define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
72; GFX10-LABEL: sample_c_d_2d:
73; GFX10:       ; %bb.0: ; %main_body
74; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
75; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
76; GFX10-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
77; GFX10-NEXT:    v_and_or_b32 v1, v1, v7, v2
78; GFX10-NEXT:    v_and_or_b32 v2, v3, v7, v4
79; GFX10-NEXT:    image_sample_c_d_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
80; GFX10-NEXT:    s_waitcnt vmcnt(0)
81; GFX10-NEXT:    ; return to shader part epilog
82main_body:
83  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
84  ret <4 x float> %v
85}
86
87define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
88; GFX10-LABEL: sample_d_cl_1d:
89; GFX10:       ; %bb.0: ; %main_body
90; GFX10-NEXT:    v_mov_b32_e32 v4, 0xffff
91; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
92; GFX10-NEXT:    v_and_or_b32 v0, v0, v4, s12
93; GFX10-NEXT:    v_and_or_b32 v1, v1, v4, s12
94; GFX10-NEXT:    image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
95; GFX10-NEXT:    s_waitcnt vmcnt(0)
96; GFX10-NEXT:    ; return to shader part epilog
97main_body:
98  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
99  ret <4 x float> %v
100}
101
102define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
103; GFX10-LABEL: sample_d_cl_2d:
104; GFX10:       ; %bb.0: ; %main_body
105; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
106; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
107; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
108; GFX10-NEXT:    v_and_or_b32 v0, v0, v7, v1
109; GFX10-NEXT:    v_and_or_b32 v1, v2, v7, v3
110; GFX10-NEXT:    image_sample_d_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
111; GFX10-NEXT:    s_waitcnt vmcnt(0)
112; GFX10-NEXT:    ; return to shader part epilog
113main_body:
114  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
115  ret <4 x float> %v
116}
117
118define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
119; GFX10-LABEL: sample_c_d_cl_1d:
120; GFX10:       ; %bb.0: ; %main_body
121; GFX10-NEXT:    v_mov_b32_e32 v5, 0xffff
122; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
123; GFX10-NEXT:    v_and_or_b32 v1, v1, v5, s12
124; GFX10-NEXT:    v_and_or_b32 v2, v2, v5, s12
125; GFX10-NEXT:    image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
126; GFX10-NEXT:    s_waitcnt vmcnt(0)
127; GFX10-NEXT:    ; return to shader part epilog
128main_body:
129  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
130  ret <4 x float> %v
131}
132
133define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
134; GFX10-LABEL: sample_c_d_cl_2d:
135; GFX10:       ; %bb.0: ; %main_body
136; GFX10-NEXT:    v_mov_b32_e32 v8, v2
137; GFX10-NEXT:    v_mov_b32_e32 v9, v3
138; GFX10-NEXT:    v_mov_b32_e32 v2, v0
139; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffff
140; GFX10-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
141; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v8
142; GFX10-NEXT:    v_and_or_b32 v4, v9, v0, v4
143; GFX10-NEXT:    v_and_or_b32 v3, v1, v0, v3
144; GFX10-NEXT:    image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
145; GFX10-NEXT:    s_waitcnt vmcnt(0)
146; GFX10-NEXT:    ; return to shader part epilog
147main_body:
148  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
149  ret <4 x float> %v
150}
151
152define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
153; GFX10-LABEL: sample_cd_1d:
154; GFX10:       ; %bb.0: ; %main_body
155; GFX10-NEXT:    v_mov_b32_e32 v3, 0xffff
156; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
157; GFX10-NEXT:    v_and_or_b32 v0, v0, v3, s12
158; GFX10-NEXT:    v_and_or_b32 v1, v1, v3, s12
159; GFX10-NEXT:    image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
160; GFX10-NEXT:    s_waitcnt vmcnt(0)
161; GFX10-NEXT:    ; return to shader part epilog
162main_body:
163  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
164  ret <4 x float> %v
165}
166
167define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
168; GFX10-LABEL: sample_cd_2d:
169; GFX10:       ; %bb.0: ; %main_body
170; GFX10-NEXT:    v_mov_b32_e32 v6, 0xffff
171; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
172; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
173; GFX10-NEXT:    v_and_or_b32 v0, v0, v6, v1
174; GFX10-NEXT:    v_and_or_b32 v1, v2, v6, v3
175; GFX10-NEXT:    image_sample_cd_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
176; GFX10-NEXT:    s_waitcnt vmcnt(0)
177; GFX10-NEXT:    ; return to shader part epilog
178main_body:
179  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
180  ret <4 x float> %v
181}
182
183define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
184; GFX10-LABEL: sample_c_cd_1d:
185; GFX10:       ; %bb.0: ; %main_body
186; GFX10-NEXT:    v_mov_b32_e32 v4, 0xffff
187; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
188; GFX10-NEXT:    v_and_or_b32 v1, v1, v4, s12
189; GFX10-NEXT:    v_and_or_b32 v2, v2, v4, s12
190; GFX10-NEXT:    image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
191; GFX10-NEXT:    s_waitcnt vmcnt(0)
192; GFX10-NEXT:    ; return to shader part epilog
193main_body:
194  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
195  ret <4 x float> %v
196}
197
198define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
199; GFX10-LABEL: sample_c_cd_2d:
200; GFX10:       ; %bb.0: ; %main_body
201; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
202; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
203; GFX10-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
204; GFX10-NEXT:    v_and_or_b32 v1, v1, v7, v2
205; GFX10-NEXT:    v_and_or_b32 v2, v3, v7, v4
206; GFX10-NEXT:    image_sample_c_cd_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
207; GFX10-NEXT:    s_waitcnt vmcnt(0)
208; GFX10-NEXT:    ; return to shader part epilog
209main_body:
210  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
211  ret <4 x float> %v
212}
213
214define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
215; GFX10-LABEL: sample_cd_cl_1d:
216; GFX10:       ; %bb.0: ; %main_body
217; GFX10-NEXT:    v_mov_b32_e32 v4, 0xffff
218; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
219; GFX10-NEXT:    v_and_or_b32 v0, v0, v4, s12
220; GFX10-NEXT:    v_and_or_b32 v1, v1, v4, s12
221; GFX10-NEXT:    image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
222; GFX10-NEXT:    s_waitcnt vmcnt(0)
223; GFX10-NEXT:    ; return to shader part epilog
224main_body:
225  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
226  ret <4 x float> %v
227}
228
229define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
230; GFX10-LABEL: sample_cd_cl_2d:
231; GFX10:       ; %bb.0: ; %main_body
232; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
233; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
234; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
235; GFX10-NEXT:    v_and_or_b32 v0, v0, v7, v1
236; GFX10-NEXT:    v_and_or_b32 v1, v2, v7, v3
237; GFX10-NEXT:    image_sample_cd_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
238; GFX10-NEXT:    s_waitcnt vmcnt(0)
239; GFX10-NEXT:    ; return to shader part epilog
240main_body:
241  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
242  ret <4 x float> %v
243}
244
245define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
246; GFX10-LABEL: sample_c_cd_cl_1d:
247; GFX10:       ; %bb.0: ; %main_body
248; GFX10-NEXT:    v_mov_b32_e32 v5, 0xffff
249; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
250; GFX10-NEXT:    v_and_or_b32 v1, v1, v5, s12
251; GFX10-NEXT:    v_and_or_b32 v2, v2, v5, s12
252; GFX10-NEXT:    image_sample_c_cd_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
253; GFX10-NEXT:    s_waitcnt vmcnt(0)
254; GFX10-NEXT:    ; return to shader part epilog
255main_body:
256  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
257  ret <4 x float> %v
258}
259
260define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
261; GFX10-LABEL: sample_c_cd_cl_2d:
262; GFX10:       ; %bb.0: ; %main_body
263; GFX10-NEXT:    v_mov_b32_e32 v8, v2
264; GFX10-NEXT:    v_mov_b32_e32 v9, v3
265; GFX10-NEXT:    v_mov_b32_e32 v2, v0
266; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffff
267; GFX10-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
268; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v8
269; GFX10-NEXT:    v_and_or_b32 v4, v9, v0, v4
270; GFX10-NEXT:    v_and_or_b32 v3, v1, v0, v3
271; GFX10-NEXT:    image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
272; GFX10-NEXT:    s_waitcnt vmcnt(0)
273; GFX10-NEXT:    ; return to shader part epilog
274main_body:
275  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
276  ret <4 x float> %v
277}
278
279define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
280; GFX10-LABEL: sample_c_d_o_2darray_V1:
281; GFX10:       ; %bb.0: ; %main_body
282; GFX10-NEXT:    v_mov_b32_e32 v9, v3
283; GFX10-NEXT:    v_mov_b32_e32 v10, v2
284; GFX10-NEXT:    v_mov_b32_e32 v11, v4
285; GFX10-NEXT:    v_mov_b32_e32 v2, v0
286; GFX10-NEXT:    v_mov_b32_e32 v3, v1
287; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffff
288; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v9
289; GFX10-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
290; GFX10-NEXT:    v_and_or_b32 v4, v10, v0, v1
291; GFX10-NEXT:    v_and_or_b32 v5, v11, v0, v5
292; GFX10-NEXT:    image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
293; GFX10-NEXT:    s_waitcnt vmcnt(0)
294; GFX10-NEXT:    ; return to shader part epilog
295main_body:
296  %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
297  ret float %v
298}
299
300define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
301; GFX10-LABEL: sample_c_d_o_2darray_V2:
302; GFX10:       ; %bb.0: ; %main_body
303; GFX10-NEXT:    v_mov_b32_e32 v9, v3
304; GFX10-NEXT:    v_mov_b32_e32 v10, v2
305; GFX10-NEXT:    v_mov_b32_e32 v11, v4
306; GFX10-NEXT:    v_mov_b32_e32 v2, v0
307; GFX10-NEXT:    v_mov_b32_e32 v3, v1
308; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffff
309; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v9
310; GFX10-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
311; GFX10-NEXT:    v_and_or_b32 v4, v10, v0, v1
312; GFX10-NEXT:    v_and_or_b32 v5, v11, v0, v5
313; GFX10-NEXT:    image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
314; GFX10-NEXT:    s_waitcnt vmcnt(0)
315; GFX10-NEXT:    ; return to shader part epilog
316main_body:
317  %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
318  ret <2 x float> %v
319}
320
321declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
322declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
323declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
324declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
325declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
326declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
327declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
328declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
329declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
330
331declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
332declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
333declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
334declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
335declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
336declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
337declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
338declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
339
340declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
341declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
342
343attributes #0 = { nounwind }
344attributes #1 = { nounwind readonly }
345attributes #2 = { nounwind readnone }
346