1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-UNPACKED %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-PACKED %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
6
7define amdgpu_ps half @load_1d_f16_x(<8 x i32> inreg %rsrc, i32 %s) {
8; GFX8-UNPACKED-LABEL: load_1d_f16_x:
9; GFX8-UNPACKED:       ; %bb.0:
10; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
11; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
12; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
13; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
14; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
15; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
16; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
17; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
18; GFX8-UNPACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 unorm d16
19; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
20; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
21;
22; GFX8-PACKED-LABEL: load_1d_f16_x:
23; GFX8-PACKED:       ; %bb.0:
24; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
25; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
26; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
27; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
28; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
29; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
30; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
31; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
32; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 unorm d16
33; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
34; GFX8-PACKED-NEXT:    ; return to shader part epilog
35;
36; GFX9-LABEL: load_1d_f16_x:
37; GFX9:       ; %bb.0:
38; GFX9-NEXT:    s_mov_b32 s0, s2
39; GFX9-NEXT:    s_mov_b32 s1, s3
40; GFX9-NEXT:    s_mov_b32 s2, s4
41; GFX9-NEXT:    s_mov_b32 s3, s5
42; GFX9-NEXT:    s_mov_b32 s4, s6
43; GFX9-NEXT:    s_mov_b32 s5, s7
44; GFX9-NEXT:    s_mov_b32 s6, s8
45; GFX9-NEXT:    s_mov_b32 s7, s9
46; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 unorm d16
47; GFX9-NEXT:    s_waitcnt vmcnt(0)
48; GFX9-NEXT:    ; return to shader part epilog
49;
50; GFX10-LABEL: load_1d_f16_x:
51; GFX10:       ; %bb.0:
52; GFX10-NEXT:    s_mov_b32 s0, s2
53; GFX10-NEXT:    s_mov_b32 s1, s3
54; GFX10-NEXT:    s_mov_b32 s2, s4
55; GFX10-NEXT:    s_mov_b32 s3, s5
56; GFX10-NEXT:    s_mov_b32 s4, s6
57; GFX10-NEXT:    s_mov_b32 s5, s7
58; GFX10-NEXT:    s_mov_b32 s6, s8
59; GFX10-NEXT:    s_mov_b32 s7, s9
60; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm d16
61; GFX10-NEXT:    s_waitcnt vmcnt(0)
62; GFX10-NEXT:    ; return to shader part epilog
63  %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
64  ret half %v
65}
66
67define amdgpu_ps half @load_1d_f16_y(<8 x i32> inreg %rsrc, i32 %s) {
68; GFX8-UNPACKED-LABEL: load_1d_f16_y:
69; GFX8-UNPACKED:       ; %bb.0:
70; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
71; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
72; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
73; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
74; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
75; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
76; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
77; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
78; GFX8-UNPACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 unorm d16
79; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
80; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
81;
82; GFX8-PACKED-LABEL: load_1d_f16_y:
83; GFX8-PACKED:       ; %bb.0:
84; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
85; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
86; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
87; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
88; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
89; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
90; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
91; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
92; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 unorm d16
93; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
94; GFX8-PACKED-NEXT:    ; return to shader part epilog
95;
96; GFX9-LABEL: load_1d_f16_y:
97; GFX9:       ; %bb.0:
98; GFX9-NEXT:    s_mov_b32 s0, s2
99; GFX9-NEXT:    s_mov_b32 s1, s3
100; GFX9-NEXT:    s_mov_b32 s2, s4
101; GFX9-NEXT:    s_mov_b32 s3, s5
102; GFX9-NEXT:    s_mov_b32 s4, s6
103; GFX9-NEXT:    s_mov_b32 s5, s7
104; GFX9-NEXT:    s_mov_b32 s6, s8
105; GFX9-NEXT:    s_mov_b32 s7, s9
106; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 unorm d16
107; GFX9-NEXT:    s_waitcnt vmcnt(0)
108; GFX9-NEXT:    ; return to shader part epilog
109;
110; GFX10-LABEL: load_1d_f16_y:
111; GFX10:       ; %bb.0:
112; GFX10-NEXT:    s_mov_b32 s0, s2
113; GFX10-NEXT:    s_mov_b32 s1, s3
114; GFX10-NEXT:    s_mov_b32 s2, s4
115; GFX10-NEXT:    s_mov_b32 s3, s5
116; GFX10-NEXT:    s_mov_b32 s4, s6
117; GFX10-NEXT:    s_mov_b32 s5, s7
118; GFX10-NEXT:    s_mov_b32 s6, s8
119; GFX10-NEXT:    s_mov_b32 s7, s9
120; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm d16
121; GFX10-NEXT:    s_waitcnt vmcnt(0)
122; GFX10-NEXT:    ; return to shader part epilog
123  %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
124  ret half %v
125}
126
127define amdgpu_ps half @load_1d_f16_z(<8 x i32> inreg %rsrc, i32 %s) {
128; GFX8-UNPACKED-LABEL: load_1d_f16_z:
129; GFX8-UNPACKED:       ; %bb.0:
130; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
131; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
132; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
133; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
134; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
135; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
136; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
137; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
138; GFX8-UNPACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 unorm d16
139; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
140; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
141;
142; GFX8-PACKED-LABEL: load_1d_f16_z:
143; GFX8-PACKED:       ; %bb.0:
144; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
145; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
146; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
147; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
148; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
149; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
150; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
151; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
152; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 unorm d16
153; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
154; GFX8-PACKED-NEXT:    ; return to shader part epilog
155;
156; GFX9-LABEL: load_1d_f16_z:
157; GFX9:       ; %bb.0:
158; GFX9-NEXT:    s_mov_b32 s0, s2
159; GFX9-NEXT:    s_mov_b32 s1, s3
160; GFX9-NEXT:    s_mov_b32 s2, s4
161; GFX9-NEXT:    s_mov_b32 s3, s5
162; GFX9-NEXT:    s_mov_b32 s4, s6
163; GFX9-NEXT:    s_mov_b32 s5, s7
164; GFX9-NEXT:    s_mov_b32 s6, s8
165; GFX9-NEXT:    s_mov_b32 s7, s9
166; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 unorm d16
167; GFX9-NEXT:    s_waitcnt vmcnt(0)
168; GFX9-NEXT:    ; return to shader part epilog
169;
170; GFX10-LABEL: load_1d_f16_z:
171; GFX10:       ; %bb.0:
172; GFX10-NEXT:    s_mov_b32 s0, s2
173; GFX10-NEXT:    s_mov_b32 s1, s3
174; GFX10-NEXT:    s_mov_b32 s2, s4
175; GFX10-NEXT:    s_mov_b32 s3, s5
176; GFX10-NEXT:    s_mov_b32 s4, s6
177; GFX10-NEXT:    s_mov_b32 s5, s7
178; GFX10-NEXT:    s_mov_b32 s6, s8
179; GFX10-NEXT:    s_mov_b32 s7, s9
180; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D unorm d16
181; GFX10-NEXT:    s_waitcnt vmcnt(0)
182; GFX10-NEXT:    ; return to shader part epilog
183  %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 4, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
184  ret half %v
185}
186
187define amdgpu_ps half @load_1d_f16_w(<8 x i32> inreg %rsrc, i32 %s) {
188; GFX8-UNPACKED-LABEL: load_1d_f16_w:
189; GFX8-UNPACKED:       ; %bb.0:
190; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
191; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
192; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
193; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
194; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
195; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
196; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
197; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
198; GFX8-UNPACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 unorm d16
199; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
200; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
201;
202; GFX8-PACKED-LABEL: load_1d_f16_w:
203; GFX8-PACKED:       ; %bb.0:
204; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
205; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
206; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
207; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
208; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
209; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
210; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
211; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
212; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 unorm d16
213; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
214; GFX8-PACKED-NEXT:    ; return to shader part epilog
215;
216; GFX9-LABEL: load_1d_f16_w:
217; GFX9:       ; %bb.0:
218; GFX9-NEXT:    s_mov_b32 s0, s2
219; GFX9-NEXT:    s_mov_b32 s1, s3
220; GFX9-NEXT:    s_mov_b32 s2, s4
221; GFX9-NEXT:    s_mov_b32 s3, s5
222; GFX9-NEXT:    s_mov_b32 s4, s6
223; GFX9-NEXT:    s_mov_b32 s5, s7
224; GFX9-NEXT:    s_mov_b32 s6, s8
225; GFX9-NEXT:    s_mov_b32 s7, s9
226; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 unorm d16
227; GFX9-NEXT:    s_waitcnt vmcnt(0)
228; GFX9-NEXT:    ; return to shader part epilog
229;
230; GFX10-LABEL: load_1d_f16_w:
231; GFX10:       ; %bb.0:
232; GFX10-NEXT:    s_mov_b32 s0, s2
233; GFX10-NEXT:    s_mov_b32 s1, s3
234; GFX10-NEXT:    s_mov_b32 s2, s4
235; GFX10-NEXT:    s_mov_b32 s3, s5
236; GFX10-NEXT:    s_mov_b32 s4, s6
237; GFX10-NEXT:    s_mov_b32 s5, s7
238; GFX10-NEXT:    s_mov_b32 s6, s8
239; GFX10-NEXT:    s_mov_b32 s7, s9
240; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm d16
241; GFX10-NEXT:    s_waitcnt vmcnt(0)
242; GFX10-NEXT:    ; return to shader part epilog
243  %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
244  ret half %v
245}
246
247define amdgpu_ps <2 x half> @load_1d_v2f16_xy(<8 x i32> inreg %rsrc, i32 %s) {
248; GFX8-UNPACKED-LABEL: load_1d_v2f16_xy:
249; GFX8-UNPACKED:       ; %bb.0:
250; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
251; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
252; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
253; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
254; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
255; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
256; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
257; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
258; GFX8-UNPACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x3 unorm d16
259; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
260; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v1, 0xffff, v1
261; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
262; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
263; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
264;
265; GFX8-PACKED-LABEL: load_1d_v2f16_xy:
266; GFX8-PACKED:       ; %bb.0:
267; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
268; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
269; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
270; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
271; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
272; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
273; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
274; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
275; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x3 unorm d16
276; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
277; GFX8-PACKED-NEXT:    ; return to shader part epilog
278;
279; GFX9-LABEL: load_1d_v2f16_xy:
280; GFX9:       ; %bb.0:
281; GFX9-NEXT:    s_mov_b32 s0, s2
282; GFX9-NEXT:    s_mov_b32 s1, s3
283; GFX9-NEXT:    s_mov_b32 s2, s4
284; GFX9-NEXT:    s_mov_b32 s3, s5
285; GFX9-NEXT:    s_mov_b32 s4, s6
286; GFX9-NEXT:    s_mov_b32 s5, s7
287; GFX9-NEXT:    s_mov_b32 s6, s8
288; GFX9-NEXT:    s_mov_b32 s7, s9
289; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x3 unorm d16
290; GFX9-NEXT:    s_waitcnt vmcnt(0)
291; GFX9-NEXT:    ; return to shader part epilog
292;
293; GFX10-LABEL: load_1d_v2f16_xy:
294; GFX10:       ; %bb.0:
295; GFX10-NEXT:    s_mov_b32 s0, s2
296; GFX10-NEXT:    s_mov_b32 s1, s3
297; GFX10-NEXT:    s_mov_b32 s2, s4
298; GFX10-NEXT:    s_mov_b32 s3, s5
299; GFX10-NEXT:    s_mov_b32 s4, s6
300; GFX10-NEXT:    s_mov_b32 s5, s7
301; GFX10-NEXT:    s_mov_b32 s6, s8
302; GFX10-NEXT:    s_mov_b32 s7, s9
303; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm d16
304; GFX10-NEXT:    s_waitcnt vmcnt(0)
305; GFX10-NEXT:    ; return to shader part epilog
306  %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
307  ret <2 x half> %v
308}
309
310define amdgpu_ps <2 x half> @load_1d_v2f16_xz(<8 x i32> inreg %rsrc, i32 %s) {
311; GFX8-UNPACKED-LABEL: load_1d_v2f16_xz:
312; GFX8-UNPACKED:       ; %bb.0:
313; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
314; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
315; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
316; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
317; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
318; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
319; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
320; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
321; GFX8-UNPACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x5 unorm d16
322; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
323; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v1, 0xffff, v1
324; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
325; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
326; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
327;
328; GFX8-PACKED-LABEL: load_1d_v2f16_xz:
329; GFX8-PACKED:       ; %bb.0:
330; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
331; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
332; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
333; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
334; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
335; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
336; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
337; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
338; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x5 unorm d16
339; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
340; GFX8-PACKED-NEXT:    ; return to shader part epilog
341;
342; GFX9-LABEL: load_1d_v2f16_xz:
343; GFX9:       ; %bb.0:
344; GFX9-NEXT:    s_mov_b32 s0, s2
345; GFX9-NEXT:    s_mov_b32 s1, s3
346; GFX9-NEXT:    s_mov_b32 s2, s4
347; GFX9-NEXT:    s_mov_b32 s3, s5
348; GFX9-NEXT:    s_mov_b32 s4, s6
349; GFX9-NEXT:    s_mov_b32 s5, s7
350; GFX9-NEXT:    s_mov_b32 s6, s8
351; GFX9-NEXT:    s_mov_b32 s7, s9
352; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x5 unorm d16
353; GFX9-NEXT:    s_waitcnt vmcnt(0)
354; GFX9-NEXT:    ; return to shader part epilog
355;
356; GFX10-LABEL: load_1d_v2f16_xz:
357; GFX10:       ; %bb.0:
358; GFX10-NEXT:    s_mov_b32 s0, s2
359; GFX10-NEXT:    s_mov_b32 s1, s3
360; GFX10-NEXT:    s_mov_b32 s2, s4
361; GFX10-NEXT:    s_mov_b32 s3, s5
362; GFX10-NEXT:    s_mov_b32 s4, s6
363; GFX10-NEXT:    s_mov_b32 s5, s7
364; GFX10-NEXT:    s_mov_b32 s6, s8
365; GFX10-NEXT:    s_mov_b32 s7, s9
366; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D unorm d16
367; GFX10-NEXT:    s_waitcnt vmcnt(0)
368; GFX10-NEXT:    ; return to shader part epilog
369  %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 5, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
370  ret <2 x half> %v
371}
372
373define amdgpu_ps <2 x half> @load_1d_v2f16_xw(<8 x i32> inreg %rsrc, i32 %s) {
374; GFX8-UNPACKED-LABEL: load_1d_v2f16_xw:
375; GFX8-UNPACKED:       ; %bb.0:
376; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
377; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
378; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
379; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
380; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
381; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
382; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
383; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
384; GFX8-UNPACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x9 unorm d16
385; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
386; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v1, 0xffff, v1
387; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
388; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
389; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
390;
391; GFX8-PACKED-LABEL: load_1d_v2f16_xw:
392; GFX8-PACKED:       ; %bb.0:
393; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
394; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
395; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
396; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
397; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
398; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
399; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
400; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
401; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x9 unorm d16
402; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
403; GFX8-PACKED-NEXT:    ; return to shader part epilog
404;
405; GFX9-LABEL: load_1d_v2f16_xw:
406; GFX9:       ; %bb.0:
407; GFX9-NEXT:    s_mov_b32 s0, s2
408; GFX9-NEXT:    s_mov_b32 s1, s3
409; GFX9-NEXT:    s_mov_b32 s2, s4
410; GFX9-NEXT:    s_mov_b32 s3, s5
411; GFX9-NEXT:    s_mov_b32 s4, s6
412; GFX9-NEXT:    s_mov_b32 s5, s7
413; GFX9-NEXT:    s_mov_b32 s6, s8
414; GFX9-NEXT:    s_mov_b32 s7, s9
415; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x9 unorm d16
416; GFX9-NEXT:    s_waitcnt vmcnt(0)
417; GFX9-NEXT:    ; return to shader part epilog
418;
419; GFX10-LABEL: load_1d_v2f16_xw:
420; GFX10:       ; %bb.0:
421; GFX10-NEXT:    s_mov_b32 s0, s2
422; GFX10-NEXT:    s_mov_b32 s1, s3
423; GFX10-NEXT:    s_mov_b32 s2, s4
424; GFX10-NEXT:    s_mov_b32 s3, s5
425; GFX10-NEXT:    s_mov_b32 s4, s6
426; GFX10-NEXT:    s_mov_b32 s5, s7
427; GFX10-NEXT:    s_mov_b32 s6, s8
428; GFX10-NEXT:    s_mov_b32 s7, s9
429; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm d16
430; GFX10-NEXT:    s_waitcnt vmcnt(0)
431; GFX10-NEXT:    ; return to shader part epilog
432  %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
433  ret <2 x half> %v
434}
435
436define amdgpu_ps <2 x half> @load_1d_v2f16_yz(<8 x i32> inreg %rsrc, i32 %s) {
437; GFX8-UNPACKED-LABEL: load_1d_v2f16_yz:
438; GFX8-UNPACKED:       ; %bb.0:
439; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
440; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
441; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
442; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
443; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
444; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
445; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
446; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
447; GFX8-UNPACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x6 unorm d16
448; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
449; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v1, 0xffff, v1
450; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
451; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
452; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
453;
454; GFX8-PACKED-LABEL: load_1d_v2f16_yz:
455; GFX8-PACKED:       ; %bb.0:
456; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
457; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
458; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
459; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
460; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
461; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
462; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
463; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
464; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x6 unorm d16
465; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
466; GFX8-PACKED-NEXT:    ; return to shader part epilog
467;
468; GFX9-LABEL: load_1d_v2f16_yz:
469; GFX9:       ; %bb.0:
470; GFX9-NEXT:    s_mov_b32 s0, s2
471; GFX9-NEXT:    s_mov_b32 s1, s3
472; GFX9-NEXT:    s_mov_b32 s2, s4
473; GFX9-NEXT:    s_mov_b32 s3, s5
474; GFX9-NEXT:    s_mov_b32 s4, s6
475; GFX9-NEXT:    s_mov_b32 s5, s7
476; GFX9-NEXT:    s_mov_b32 s6, s8
477; GFX9-NEXT:    s_mov_b32 s7, s9
478; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x6 unorm d16
479; GFX9-NEXT:    s_waitcnt vmcnt(0)
480; GFX9-NEXT:    ; return to shader part epilog
481;
482; GFX10-LABEL: load_1d_v2f16_yz:
483; GFX10:       ; %bb.0:
484; GFX10-NEXT:    s_mov_b32 s0, s2
485; GFX10-NEXT:    s_mov_b32 s1, s3
486; GFX10-NEXT:    s_mov_b32 s2, s4
487; GFX10-NEXT:    s_mov_b32 s3, s5
488; GFX10-NEXT:    s_mov_b32 s4, s6
489; GFX10-NEXT:    s_mov_b32 s5, s7
490; GFX10-NEXT:    s_mov_b32 s6, s8
491; GFX10-NEXT:    s_mov_b32 s7, s9
492; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm d16
493; GFX10-NEXT:    s_waitcnt vmcnt(0)
494; GFX10-NEXT:    ; return to shader part epilog
495  %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
496  ret <2 x half> %v
497}
498
499define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) {
500; GFX8-UNPACKED-LABEL: load_1d_v3f16_xyz:
501; GFX8-UNPACKED:       ; %bb.0:
502; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
503; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
504; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
505; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
506; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
507; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
508; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
509; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
510; GFX8-UNPACKED-NEXT:    image_load v[0:2], v0, s[0:7] dmask:0x7 unorm d16
511; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, 0xffff
512; GFX8-UNPACKED-NEXT:    s_and_b32 s1, s0, s0
513; GFX8-UNPACKED-NEXT:    s_lshl_b32 s1, s1, 16
514; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v3, s1
515; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
516; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v4, s0, v1
517; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
518; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
519; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
520; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
521;
522; GFX8-PACKED-LABEL: load_1d_v3f16_xyz:
523; GFX8-PACKED:       ; %bb.0:
524; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
525; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
526; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
527; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
528; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
529; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
530; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
531; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
532; GFX8-PACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16
533; GFX8-PACKED-NEXT:    s_mov_b32 s0, 0xffff
534; GFX8-PACKED-NEXT:    s_and_b32 s0, s0, s0
535; GFX8-PACKED-NEXT:    s_lshl_b32 s0, s0, 16
536; GFX8-PACKED-NEXT:    v_mov_b32_e32 v2, s0
537; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
538; GFX8-PACKED-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
539; GFX8-PACKED-NEXT:    v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
540; GFX8-PACKED-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
541; GFX8-PACKED-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
542; GFX8-PACKED-NEXT:    ; return to shader part epilog
543;
544; GFX9-LABEL: load_1d_v3f16_xyz:
545; GFX9:       ; %bb.0:
546; GFX9-NEXT:    s_mov_b32 s0, s2
547; GFX9-NEXT:    s_mov_b32 s1, s3
548; GFX9-NEXT:    s_mov_b32 s2, s4
549; GFX9-NEXT:    s_mov_b32 s3, s5
550; GFX9-NEXT:    s_mov_b32 s4, s6
551; GFX9-NEXT:    s_mov_b32 s5, s7
552; GFX9-NEXT:    s_mov_b32 s6, s8
553; GFX9-NEXT:    s_mov_b32 s7, s9
554; GFX9-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16
555; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
556; GFX9-NEXT:    s_lshl_b32 s0, s0, 16
557; GFX9-NEXT:    s_waitcnt vmcnt(0)
558; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
559; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
560; GFX9-NEXT:    v_and_or_b32 v1, v1, v2, s0
561; GFX9-NEXT:    v_and_or_b32 v0, v0, v2, v3
562; GFX9-NEXT:    ; return to shader part epilog
563;
564; GFX10-LABEL: load_1d_v3f16_xyz:
565; GFX10:       ; %bb.0:
566; GFX10-NEXT:    s_mov_b32 s0, s2
567; GFX10-NEXT:    s_mov_b32 s1, s3
568; GFX10-NEXT:    s_mov_b32 s2, s4
569; GFX10-NEXT:    s_mov_b32 s3, s5
570; GFX10-NEXT:    s_mov_b32 s4, s6
571; GFX10-NEXT:    s_mov_b32 s5, s7
572; GFX10-NEXT:    s_mov_b32 s6, s8
573; GFX10-NEXT:    s_mov_b32 s7, s9
574; GFX10-NEXT:    v_mov_b32_e32 v3, 0xffff
575; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm d16
576; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
577; GFX10-NEXT:    s_lshl_b32 s0, s0, 16
578; GFX10-NEXT:    s_waitcnt vmcnt(0)
579; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
580; GFX10-NEXT:    v_and_or_b32 v1, v1, v3, s0
581; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
582; GFX10-NEXT:    v_and_or_b32 v0, v0, v3, v2
583; GFX10-NEXT:    ; return to shader part epilog
584  %v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
585  ret <3 x half> %v
586}
587
588define amdgpu_ps <4 x half> @load_1d_v4f16_xyzw(<8 x i32> inreg %rsrc, i32 %s) {
589; GFX8-UNPACKED-LABEL: load_1d_v4f16_xyzw:
590; GFX8-UNPACKED:       ; %bb.0:
591; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
592; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
593; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
594; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
595; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
596; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
597; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
598; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
599; GFX8-UNPACKED-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm d16
600; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, 0xffff
601; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
602; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v1, s0, v1
603; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v3, s0, v3
604; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
605; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
606; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
607; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
608; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
609;
610; GFX8-PACKED-LABEL: load_1d_v4f16_xyzw:
611; GFX8-PACKED:       ; %bb.0:
612; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
613; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
614; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
615; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
616; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
617; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
618; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
619; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
620; GFX8-PACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0xf unorm d16
621; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
622; GFX8-PACKED-NEXT:    ; return to shader part epilog
623;
624; GFX9-LABEL: load_1d_v4f16_xyzw:
625; GFX9:       ; %bb.0:
626; GFX9-NEXT:    s_mov_b32 s0, s2
627; GFX9-NEXT:    s_mov_b32 s1, s3
628; GFX9-NEXT:    s_mov_b32 s2, s4
629; GFX9-NEXT:    s_mov_b32 s3, s5
630; GFX9-NEXT:    s_mov_b32 s4, s6
631; GFX9-NEXT:    s_mov_b32 s5, s7
632; GFX9-NEXT:    s_mov_b32 s6, s8
633; GFX9-NEXT:    s_mov_b32 s7, s9
634; GFX9-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0xf unorm d16
635; GFX9-NEXT:    s_waitcnt vmcnt(0)
636; GFX9-NEXT:    ; return to shader part epilog
637;
638; GFX10-LABEL: load_1d_v4f16_xyzw:
639; GFX10:       ; %bb.0:
640; GFX10-NEXT:    s_mov_b32 s0, s2
641; GFX10-NEXT:    s_mov_b32 s1, s3
642; GFX10-NEXT:    s_mov_b32 s2, s4
643; GFX10-NEXT:    s_mov_b32 s3, s5
644; GFX10-NEXT:    s_mov_b32 s4, s6
645; GFX10-NEXT:    s_mov_b32 s5, s7
646; GFX10-NEXT:    s_mov_b32 s6, s8
647; GFX10-NEXT:    s_mov_b32 s7, s9
648; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm d16
649; GFX10-NEXT:    s_waitcnt vmcnt(0)
650; GFX10-NEXT:    ; return to shader part epilog
651  %v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
652  ret <4 x half> %v
653}
654
655define amdgpu_ps float @load_1d_f16_tfe_dmask_x(<8 x i32> inreg %rsrc, i32 %s) {
656; GFX8-UNPACKED-LABEL: load_1d_f16_tfe_dmask_x:
657; GFX8-UNPACKED:       ; %bb.0:
658; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v1, 0
659; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
660; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
661; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
662; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
663; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
664; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
665; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
666; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
667; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v2, v1
668; GFX8-UNPACKED-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe d16
669; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
670; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v0, v2
671; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
672;
673; GFX8-PACKED-LABEL: load_1d_f16_tfe_dmask_x:
674; GFX8-PACKED:       ; %bb.0:
675; GFX8-PACKED-NEXT:    v_mov_b32_e32 v1, 0
676; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
677; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
678; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
679; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
680; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
681; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
682; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
683; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
684; GFX8-PACKED-NEXT:    v_mov_b32_e32 v2, v1
685; GFX8-PACKED-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe d16
686; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
687; GFX8-PACKED-NEXT:    v_mov_b32_e32 v0, v2
688; GFX8-PACKED-NEXT:    ; return to shader part epilog
689;
690; GFX9-LABEL: load_1d_f16_tfe_dmask_x:
691; GFX9:       ; %bb.0:
692; GFX9-NEXT:    v_mov_b32_e32 v1, 0
693; GFX9-NEXT:    s_mov_b32 s0, s2
694; GFX9-NEXT:    s_mov_b32 s1, s3
695; GFX9-NEXT:    s_mov_b32 s2, s4
696; GFX9-NEXT:    s_mov_b32 s3, s5
697; GFX9-NEXT:    s_mov_b32 s4, s6
698; GFX9-NEXT:    s_mov_b32 s5, s7
699; GFX9-NEXT:    s_mov_b32 s6, s8
700; GFX9-NEXT:    s_mov_b32 s7, s9
701; GFX9-NEXT:    v_mov_b32_e32 v2, v1
702; GFX9-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe d16
703; GFX9-NEXT:    s_waitcnt vmcnt(0)
704; GFX9-NEXT:    v_mov_b32_e32 v0, v2
705; GFX9-NEXT:    ; return to shader part epilog
706;
707; GFX10-LABEL: load_1d_f16_tfe_dmask_x:
708; GFX10:       ; %bb.0:
709; GFX10-NEXT:    v_mov_b32_e32 v1, 0
710; GFX10-NEXT:    s_mov_b32 s0, s2
711; GFX10-NEXT:    s_mov_b32 s1, s3
712; GFX10-NEXT:    s_mov_b32 s2, s4
713; GFX10-NEXT:    s_mov_b32 s3, s5
714; GFX10-NEXT:    s_mov_b32 s4, s6
715; GFX10-NEXT:    s_mov_b32 s5, s7
716; GFX10-NEXT:    s_mov_b32 s6, s8
717; GFX10-NEXT:    s_mov_b32 s7, s9
718; GFX10-NEXT:    v_mov_b32_e32 v2, v1
719; GFX10-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe d16
720; GFX10-NEXT:    s_waitcnt vmcnt(0)
721; GFX10-NEXT:    v_mov_b32_e32 v0, v2
722; GFX10-NEXT:    ; return to shader part epilog
723  %v = call { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
724  %v.err = extractvalue { half, i32 } %v, 1
725  %vv = bitcast i32 %v.err to float
726  ret float %vv
727}
728
729define amdgpu_ps float @load_1d_v2f16_tfe_dmask_xy(<8 x i32> inreg %rsrc, i32 %s) {
730; GFX8-UNPACKED-LABEL: load_1d_v2f16_tfe_dmask_xy:
731; GFX8-UNPACKED:       ; %bb.0:
732; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v1, 0
733; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
734; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
735; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
736; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
737; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
738; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
739; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
740; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
741; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v2, v1
742; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v3, v1
743; GFX8-UNPACKED-NEXT:    image_load v[1:3], v0, s[0:7] dmask:0x3 unorm tfe d16
744; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
745; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v0, v3
746; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
747;
748; GFX8-PACKED-LABEL: load_1d_v2f16_tfe_dmask_xy:
749; GFX8-PACKED:       ; %bb.0:
750; GFX8-PACKED-NEXT:    v_mov_b32_e32 v1, 0
751; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
752; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
753; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
754; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
755; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
756; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
757; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
758; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
759; GFX8-PACKED-NEXT:    v_mov_b32_e32 v2, v1
760; GFX8-PACKED-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x3 unorm tfe d16
761; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
762; GFX8-PACKED-NEXT:    v_mov_b32_e32 v0, v2
763; GFX8-PACKED-NEXT:    ; return to shader part epilog
764;
765; GFX9-LABEL: load_1d_v2f16_tfe_dmask_xy:
766; GFX9:       ; %bb.0:
767; GFX9-NEXT:    v_mov_b32_e32 v1, 0
768; GFX9-NEXT:    s_mov_b32 s0, s2
769; GFX9-NEXT:    s_mov_b32 s1, s3
770; GFX9-NEXT:    s_mov_b32 s2, s4
771; GFX9-NEXT:    s_mov_b32 s3, s5
772; GFX9-NEXT:    s_mov_b32 s4, s6
773; GFX9-NEXT:    s_mov_b32 s5, s7
774; GFX9-NEXT:    s_mov_b32 s6, s8
775; GFX9-NEXT:    s_mov_b32 s7, s9
776; GFX9-NEXT:    v_mov_b32_e32 v2, v1
777; GFX9-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x3 unorm tfe d16
778; GFX9-NEXT:    s_waitcnt vmcnt(0)
779; GFX9-NEXT:    v_mov_b32_e32 v0, v2
780; GFX9-NEXT:    ; return to shader part epilog
781;
782; GFX10-LABEL: load_1d_v2f16_tfe_dmask_xy:
783; GFX10:       ; %bb.0:
784; GFX10-NEXT:    v_mov_b32_e32 v1, 0
785; GFX10-NEXT:    s_mov_b32 s0, s2
786; GFX10-NEXT:    s_mov_b32 s1, s3
787; GFX10-NEXT:    s_mov_b32 s2, s4
788; GFX10-NEXT:    s_mov_b32 s3, s5
789; GFX10-NEXT:    s_mov_b32 s4, s6
790; GFX10-NEXT:    s_mov_b32 s5, s7
791; GFX10-NEXT:    s_mov_b32 s6, s8
792; GFX10-NEXT:    s_mov_b32 s7, s9
793; GFX10-NEXT:    v_mov_b32_e32 v2, v1
794; GFX10-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm tfe d16
795; GFX10-NEXT:    s_waitcnt vmcnt(0)
796; GFX10-NEXT:    v_mov_b32_e32 v0, v2
797; GFX10-NEXT:    ; return to shader part epilog
798  %v = call { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
799  %v.err = extractvalue { <2 x half>, i32 } %v, 1
800  %vv = bitcast i32 %v.err to float
801  ret float %vv
802}
803
804define amdgpu_ps float @load_1d_v3f16_tfe_dmask_xyz(<8 x i32> inreg %rsrc, i32 %s) {
805; GFX8-UNPACKED-LABEL: load_1d_v3f16_tfe_dmask_xyz:
806; GFX8-UNPACKED:       ; %bb.0:
807; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v1, 0
808; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
809; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
810; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
811; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
812; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
813; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
814; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
815; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
816; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v2, v1
817; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v3, v1
818; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v4, v1
819; GFX8-UNPACKED-NEXT:    image_load v[1:4], v0, s[0:7] dmask:0x7 unorm tfe d16
820; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
821; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v0, v4
822; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
823;
824; GFX8-PACKED-LABEL: load_1d_v3f16_tfe_dmask_xyz:
825; GFX8-PACKED:       ; %bb.0:
826; GFX8-PACKED-NEXT:    v_mov_b32_e32 v1, 0
827; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
828; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
829; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
830; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
831; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
832; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
833; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
834; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
835; GFX8-PACKED-NEXT:    v_mov_b32_e32 v2, v1
836; GFX8-PACKED-NEXT:    v_mov_b32_e32 v3, v1
837; GFX8-PACKED-NEXT:    image_load v[1:3], v0, s[0:7] dmask:0x7 unorm tfe d16
838; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
839; GFX8-PACKED-NEXT:    v_mov_b32_e32 v0, v3
840; GFX8-PACKED-NEXT:    ; return to shader part epilog
841;
842; GFX9-LABEL: load_1d_v3f16_tfe_dmask_xyz:
843; GFX9:       ; %bb.0:
844; GFX9-NEXT:    v_mov_b32_e32 v1, 0
845; GFX9-NEXT:    s_mov_b32 s0, s2
846; GFX9-NEXT:    s_mov_b32 s1, s3
847; GFX9-NEXT:    s_mov_b32 s2, s4
848; GFX9-NEXT:    s_mov_b32 s3, s5
849; GFX9-NEXT:    s_mov_b32 s4, s6
850; GFX9-NEXT:    s_mov_b32 s5, s7
851; GFX9-NEXT:    s_mov_b32 s6, s8
852; GFX9-NEXT:    s_mov_b32 s7, s9
853; GFX9-NEXT:    v_mov_b32_e32 v2, v1
854; GFX9-NEXT:    v_mov_b32_e32 v3, v1
855; GFX9-NEXT:    image_load v[1:3], v0, s[0:7] dmask:0x7 unorm tfe d16
856; GFX9-NEXT:    s_waitcnt vmcnt(0)
857; GFX9-NEXT:    v_mov_b32_e32 v0, v3
858; GFX9-NEXT:    ; return to shader part epilog
859;
860; GFX10-LABEL: load_1d_v3f16_tfe_dmask_xyz:
861; GFX10:       ; %bb.0:
862; GFX10-NEXT:    v_mov_b32_e32 v1, 0
863; GFX10-NEXT:    s_mov_b32 s0, s2
864; GFX10-NEXT:    s_mov_b32 s1, s3
865; GFX10-NEXT:    s_mov_b32 s2, s4
866; GFX10-NEXT:    s_mov_b32 s3, s5
867; GFX10-NEXT:    s_mov_b32 s4, s6
868; GFX10-NEXT:    s_mov_b32 s5, s7
869; GFX10-NEXT:    s_mov_b32 s6, s8
870; GFX10-NEXT:    s_mov_b32 s7, s9
871; GFX10-NEXT:    v_mov_b32_e32 v2, v1
872; GFX10-NEXT:    v_mov_b32_e32 v3, v1
873; GFX10-NEXT:    image_load v[1:3], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe d16
874; GFX10-NEXT:    s_waitcnt vmcnt(0)
875; GFX10-NEXT:    v_mov_b32_e32 v0, v3
876; GFX10-NEXT:    ; return to shader part epilog
877  %v = call { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
878  %v.err = extractvalue { <3 x half>, i32 } %v, 1
879  %vv = bitcast i32 %v.err to float
880  ret float %vv
881}
882
883define amdgpu_ps float @load_1d_v4f16_tfe_dmask_xyzw(<8 x i32> inreg %rsrc, i32 %s) {
884; GFX8-UNPACKED-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
885; GFX8-UNPACKED:       ; %bb.0:
886; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v1, 0
887; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
888; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
889; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
890; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
891; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
892; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
893; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
894; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
895; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v2, v1
896; GFX8-UNPACKED-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x10 unorm tfe d16
897; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
898; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v0, v2
899; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
900;
901; GFX8-PACKED-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
902; GFX8-PACKED:       ; %bb.0:
903; GFX8-PACKED-NEXT:    v_mov_b32_e32 v1, 0
904; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
905; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
906; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
907; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
908; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
909; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
910; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
911; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
912; GFX8-PACKED-NEXT:    v_mov_b32_e32 v2, v1
913; GFX8-PACKED-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x10 unorm tfe d16
914; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
915; GFX8-PACKED-NEXT:    v_mov_b32_e32 v0, v2
916; GFX8-PACKED-NEXT:    ; return to shader part epilog
917;
918; GFX9-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
919; GFX9:       ; %bb.0:
920; GFX9-NEXT:    v_mov_b32_e32 v1, 0
921; GFX9-NEXT:    s_mov_b32 s0, s2
922; GFX9-NEXT:    s_mov_b32 s1, s3
923; GFX9-NEXT:    s_mov_b32 s2, s4
924; GFX9-NEXT:    s_mov_b32 s3, s5
925; GFX9-NEXT:    s_mov_b32 s4, s6
926; GFX9-NEXT:    s_mov_b32 s5, s7
927; GFX9-NEXT:    s_mov_b32 s6, s8
928; GFX9-NEXT:    s_mov_b32 s7, s9
929; GFX9-NEXT:    v_mov_b32_e32 v2, v1
930; GFX9-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x10 unorm tfe d16
931; GFX9-NEXT:    s_waitcnt vmcnt(0)
932; GFX9-NEXT:    v_mov_b32_e32 v0, v2
933; GFX9-NEXT:    ; return to shader part epilog
934;
935; GFX10-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
936; GFX10:       ; %bb.0:
937; GFX10-NEXT:    v_mov_b32_e32 v1, 0
938; GFX10-NEXT:    s_mov_b32 s0, s2
939; GFX10-NEXT:    s_mov_b32 s1, s3
940; GFX10-NEXT:    s_mov_b32 s2, s4
941; GFX10-NEXT:    s_mov_b32 s3, s5
942; GFX10-NEXT:    s_mov_b32 s4, s6
943; GFX10-NEXT:    s_mov_b32 s5, s7
944; GFX10-NEXT:    s_mov_b32 s6, s8
945; GFX10-NEXT:    s_mov_b32 s7, s9
946; GFX10-NEXT:    v_mov_b32_e32 v2, v1
947; GFX10-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D unorm tfe d16
948; GFX10-NEXT:    s_waitcnt vmcnt(0)
949; GFX10-NEXT:    v_mov_b32_e32 v0, v2
950; GFX10-NEXT:    ; return to shader part epilog
951  %v = call { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 16, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
952  %v.err = extractvalue { <4 x half>, i32 } %v, 1
953  %vv = bitcast i32 %v.err to float
954  ret float %vv
955}
956
957declare half @llvm.amdgcn.image.load.1d.half.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
958declare <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
959declare <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
960declare <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
961
962declare { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
963declare { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
964declare { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
965declare { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
966
967attributes #0 = { nounwind readonly }
968