1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-UNPACKED %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-PACKED %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
6
7define amdgpu_ps half @load_1d_f16_x(<8 x i32> inreg %rsrc, i32 %s) {
8; GFX8-UNPACKED-LABEL: load_1d_f16_x:
9; GFX8-UNPACKED:       ; %bb.0:
10; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
11; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
12; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
13; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
14; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
15; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
16; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
17; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
18; GFX8-UNPACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 unorm d16
19; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
20; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
21;
22; GFX8-PACKED-LABEL: load_1d_f16_x:
23; GFX8-PACKED:       ; %bb.0:
24; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
25; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
26; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
27; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
28; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
29; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
30; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
31; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
32; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 unorm d16
33; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
34; GFX8-PACKED-NEXT:    ; return to shader part epilog
35;
36; GFX9-LABEL: load_1d_f16_x:
37; GFX9:       ; %bb.0:
38; GFX9-NEXT:    s_mov_b32 s0, s2
39; GFX9-NEXT:    s_mov_b32 s1, s3
40; GFX9-NEXT:    s_mov_b32 s2, s4
41; GFX9-NEXT:    s_mov_b32 s3, s5
42; GFX9-NEXT:    s_mov_b32 s4, s6
43; GFX9-NEXT:    s_mov_b32 s5, s7
44; GFX9-NEXT:    s_mov_b32 s6, s8
45; GFX9-NEXT:    s_mov_b32 s7, s9
46; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 unorm d16
47; GFX9-NEXT:    s_waitcnt vmcnt(0)
48; GFX9-NEXT:    ; return to shader part epilog
49;
50; GFX10-LABEL: load_1d_f16_x:
51; GFX10:       ; %bb.0:
52; GFX10-NEXT:    s_mov_b32 s0, s2
53; GFX10-NEXT:    s_mov_b32 s1, s3
54; GFX10-NEXT:    s_mov_b32 s2, s4
55; GFX10-NEXT:    s_mov_b32 s3, s5
56; GFX10-NEXT:    s_mov_b32 s4, s6
57; GFX10-NEXT:    s_mov_b32 s5, s7
58; GFX10-NEXT:    s_mov_b32 s6, s8
59; GFX10-NEXT:    s_mov_b32 s7, s9
60; GFX10-NEXT:    ; implicit-def: $vcc_hi
61; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm d16
62; GFX10-NEXT:    s_waitcnt vmcnt(0)
63; GFX10-NEXT:    ; return to shader part epilog
64  %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
65  ret half %v
66}
67
68define amdgpu_ps half @load_1d_f16_y(<8 x i32> inreg %rsrc, i32 %s) {
69; GFX8-UNPACKED-LABEL: load_1d_f16_y:
70; GFX8-UNPACKED:       ; %bb.0:
71; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
72; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
73; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
74; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
75; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
76; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
77; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
78; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
79; GFX8-UNPACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 unorm d16
80; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
81; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
82;
83; GFX8-PACKED-LABEL: load_1d_f16_y:
84; GFX8-PACKED:       ; %bb.0:
85; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
86; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
87; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
88; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
89; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
90; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
91; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
92; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
93; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 unorm d16
94; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
95; GFX8-PACKED-NEXT:    ; return to shader part epilog
96;
97; GFX9-LABEL: load_1d_f16_y:
98; GFX9:       ; %bb.0:
99; GFX9-NEXT:    s_mov_b32 s0, s2
100; GFX9-NEXT:    s_mov_b32 s1, s3
101; GFX9-NEXT:    s_mov_b32 s2, s4
102; GFX9-NEXT:    s_mov_b32 s3, s5
103; GFX9-NEXT:    s_mov_b32 s4, s6
104; GFX9-NEXT:    s_mov_b32 s5, s7
105; GFX9-NEXT:    s_mov_b32 s6, s8
106; GFX9-NEXT:    s_mov_b32 s7, s9
107; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 unorm d16
108; GFX9-NEXT:    s_waitcnt vmcnt(0)
109; GFX9-NEXT:    ; return to shader part epilog
110;
111; GFX10-LABEL: load_1d_f16_y:
112; GFX10:       ; %bb.0:
113; GFX10-NEXT:    s_mov_b32 s0, s2
114; GFX10-NEXT:    s_mov_b32 s1, s3
115; GFX10-NEXT:    s_mov_b32 s2, s4
116; GFX10-NEXT:    s_mov_b32 s3, s5
117; GFX10-NEXT:    s_mov_b32 s4, s6
118; GFX10-NEXT:    s_mov_b32 s5, s7
119; GFX10-NEXT:    s_mov_b32 s6, s8
120; GFX10-NEXT:    s_mov_b32 s7, s9
121; GFX10-NEXT:    ; implicit-def: $vcc_hi
122; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm d16
123; GFX10-NEXT:    s_waitcnt vmcnt(0)
124; GFX10-NEXT:    ; return to shader part epilog
125  %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
126  ret half %v
127}
128
129define amdgpu_ps half @load_1d_f16_z(<8 x i32> inreg %rsrc, i32 %s) {
130; GFX8-UNPACKED-LABEL: load_1d_f16_z:
131; GFX8-UNPACKED:       ; %bb.0:
132; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
133; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
134; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
135; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
136; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
137; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
138; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
139; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
140; GFX8-UNPACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 unorm d16
141; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
142; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
143;
144; GFX8-PACKED-LABEL: load_1d_f16_z:
145; GFX8-PACKED:       ; %bb.0:
146; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
147; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
148; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
149; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
150; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
151; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
152; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
153; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
154; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 unorm d16
155; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
156; GFX8-PACKED-NEXT:    ; return to shader part epilog
157;
158; GFX9-LABEL: load_1d_f16_z:
159; GFX9:       ; %bb.0:
160; GFX9-NEXT:    s_mov_b32 s0, s2
161; GFX9-NEXT:    s_mov_b32 s1, s3
162; GFX9-NEXT:    s_mov_b32 s2, s4
163; GFX9-NEXT:    s_mov_b32 s3, s5
164; GFX9-NEXT:    s_mov_b32 s4, s6
165; GFX9-NEXT:    s_mov_b32 s5, s7
166; GFX9-NEXT:    s_mov_b32 s6, s8
167; GFX9-NEXT:    s_mov_b32 s7, s9
168; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 unorm d16
169; GFX9-NEXT:    s_waitcnt vmcnt(0)
170; GFX9-NEXT:    ; return to shader part epilog
171;
172; GFX10-LABEL: load_1d_f16_z:
173; GFX10:       ; %bb.0:
174; GFX10-NEXT:    s_mov_b32 s0, s2
175; GFX10-NEXT:    s_mov_b32 s1, s3
176; GFX10-NEXT:    s_mov_b32 s2, s4
177; GFX10-NEXT:    s_mov_b32 s3, s5
178; GFX10-NEXT:    s_mov_b32 s4, s6
179; GFX10-NEXT:    s_mov_b32 s5, s7
180; GFX10-NEXT:    s_mov_b32 s6, s8
181; GFX10-NEXT:    s_mov_b32 s7, s9
182; GFX10-NEXT:    ; implicit-def: $vcc_hi
183; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D unorm d16
184; GFX10-NEXT:    s_waitcnt vmcnt(0)
185; GFX10-NEXT:    ; return to shader part epilog
186  %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 4, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
187  ret half %v
188}
189
190define amdgpu_ps half @load_1d_f16_w(<8 x i32> inreg %rsrc, i32 %s) {
191; GFX8-UNPACKED-LABEL: load_1d_f16_w:
192; GFX8-UNPACKED:       ; %bb.0:
193; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
194; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
195; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
196; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
197; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
198; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
199; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
200; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
201; GFX8-UNPACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 unorm d16
202; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
203; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
204;
205; GFX8-PACKED-LABEL: load_1d_f16_w:
206; GFX8-PACKED:       ; %bb.0:
207; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
208; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
209; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
210; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
211; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
212; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
213; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
214; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
215; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 unorm d16
216; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
217; GFX8-PACKED-NEXT:    ; return to shader part epilog
218;
219; GFX9-LABEL: load_1d_f16_w:
220; GFX9:       ; %bb.0:
221; GFX9-NEXT:    s_mov_b32 s0, s2
222; GFX9-NEXT:    s_mov_b32 s1, s3
223; GFX9-NEXT:    s_mov_b32 s2, s4
224; GFX9-NEXT:    s_mov_b32 s3, s5
225; GFX9-NEXT:    s_mov_b32 s4, s6
226; GFX9-NEXT:    s_mov_b32 s5, s7
227; GFX9-NEXT:    s_mov_b32 s6, s8
228; GFX9-NEXT:    s_mov_b32 s7, s9
229; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 unorm d16
230; GFX9-NEXT:    s_waitcnt vmcnt(0)
231; GFX9-NEXT:    ; return to shader part epilog
232;
233; GFX10-LABEL: load_1d_f16_w:
234; GFX10:       ; %bb.0:
235; GFX10-NEXT:    s_mov_b32 s0, s2
236; GFX10-NEXT:    s_mov_b32 s1, s3
237; GFX10-NEXT:    s_mov_b32 s2, s4
238; GFX10-NEXT:    s_mov_b32 s3, s5
239; GFX10-NEXT:    s_mov_b32 s4, s6
240; GFX10-NEXT:    s_mov_b32 s5, s7
241; GFX10-NEXT:    s_mov_b32 s6, s8
242; GFX10-NEXT:    s_mov_b32 s7, s9
243; GFX10-NEXT:    ; implicit-def: $vcc_hi
244; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm d16
245; GFX10-NEXT:    s_waitcnt vmcnt(0)
246; GFX10-NEXT:    ; return to shader part epilog
247  %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
248  ret half %v
249}
250
251define amdgpu_ps <2 x half> @load_1d_v2f16_xy(<8 x i32> inreg %rsrc, i32 %s) {
252; GFX8-UNPACKED-LABEL: load_1d_v2f16_xy:
253; GFX8-UNPACKED:       ; %bb.0:
254; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
255; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
256; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
257; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
258; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
259; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
260; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
261; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
262; GFX8-UNPACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x3 unorm d16
263; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
264; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v1, 0xffff, v1
265; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
266; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
267; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
268;
269; GFX8-PACKED-LABEL: load_1d_v2f16_xy:
270; GFX8-PACKED:       ; %bb.0:
271; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
272; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
273; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
274; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
275; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
276; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
277; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
278; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
279; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x3 unorm d16
280; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
281; GFX8-PACKED-NEXT:    ; return to shader part epilog
282;
283; GFX9-LABEL: load_1d_v2f16_xy:
284; GFX9:       ; %bb.0:
285; GFX9-NEXT:    s_mov_b32 s0, s2
286; GFX9-NEXT:    s_mov_b32 s1, s3
287; GFX9-NEXT:    s_mov_b32 s2, s4
288; GFX9-NEXT:    s_mov_b32 s3, s5
289; GFX9-NEXT:    s_mov_b32 s4, s6
290; GFX9-NEXT:    s_mov_b32 s5, s7
291; GFX9-NEXT:    s_mov_b32 s6, s8
292; GFX9-NEXT:    s_mov_b32 s7, s9
293; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x3 unorm d16
294; GFX9-NEXT:    s_waitcnt vmcnt(0)
295; GFX9-NEXT:    ; return to shader part epilog
296;
297; GFX10-LABEL: load_1d_v2f16_xy:
298; GFX10:       ; %bb.0:
299; GFX10-NEXT:    s_mov_b32 s0, s2
300; GFX10-NEXT:    s_mov_b32 s1, s3
301; GFX10-NEXT:    s_mov_b32 s2, s4
302; GFX10-NEXT:    s_mov_b32 s3, s5
303; GFX10-NEXT:    s_mov_b32 s4, s6
304; GFX10-NEXT:    s_mov_b32 s5, s7
305; GFX10-NEXT:    s_mov_b32 s6, s8
306; GFX10-NEXT:    s_mov_b32 s7, s9
307; GFX10-NEXT:    ; implicit-def: $vcc_hi
308; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm d16
309; GFX10-NEXT:    s_waitcnt vmcnt(0)
310; GFX10-NEXT:    ; return to shader part epilog
311  %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
312  ret <2 x half> %v
313}
314
315define amdgpu_ps <2 x half> @load_1d_v2f16_xz(<8 x i32> inreg %rsrc, i32 %s) {
316; GFX8-UNPACKED-LABEL: load_1d_v2f16_xz:
317; GFX8-UNPACKED:       ; %bb.0:
318; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
319; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
320; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
321; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
322; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
323; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
324; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
325; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
326; GFX8-UNPACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x5 unorm d16
327; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
328; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v1, 0xffff, v1
329; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
330; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
331; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
332;
333; GFX8-PACKED-LABEL: load_1d_v2f16_xz:
334; GFX8-PACKED:       ; %bb.0:
335; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
336; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
337; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
338; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
339; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
340; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
341; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
342; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
343; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x5 unorm d16
344; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
345; GFX8-PACKED-NEXT:    ; return to shader part epilog
346;
347; GFX9-LABEL: load_1d_v2f16_xz:
348; GFX9:       ; %bb.0:
349; GFX9-NEXT:    s_mov_b32 s0, s2
350; GFX9-NEXT:    s_mov_b32 s1, s3
351; GFX9-NEXT:    s_mov_b32 s2, s4
352; GFX9-NEXT:    s_mov_b32 s3, s5
353; GFX9-NEXT:    s_mov_b32 s4, s6
354; GFX9-NEXT:    s_mov_b32 s5, s7
355; GFX9-NEXT:    s_mov_b32 s6, s8
356; GFX9-NEXT:    s_mov_b32 s7, s9
357; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x5 unorm d16
358; GFX9-NEXT:    s_waitcnt vmcnt(0)
359; GFX9-NEXT:    ; return to shader part epilog
360;
361; GFX10-LABEL: load_1d_v2f16_xz:
362; GFX10:       ; %bb.0:
363; GFX10-NEXT:    s_mov_b32 s0, s2
364; GFX10-NEXT:    s_mov_b32 s1, s3
365; GFX10-NEXT:    s_mov_b32 s2, s4
366; GFX10-NEXT:    s_mov_b32 s3, s5
367; GFX10-NEXT:    s_mov_b32 s4, s6
368; GFX10-NEXT:    s_mov_b32 s5, s7
369; GFX10-NEXT:    s_mov_b32 s6, s8
370; GFX10-NEXT:    s_mov_b32 s7, s9
371; GFX10-NEXT:    ; implicit-def: $vcc_hi
372; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D unorm d16
373; GFX10-NEXT:    s_waitcnt vmcnt(0)
374; GFX10-NEXT:    ; return to shader part epilog
375  %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 5, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
376  ret <2 x half> %v
377}
378
379define amdgpu_ps <2 x half> @load_1d_v2f16_xw(<8 x i32> inreg %rsrc, i32 %s) {
380; GFX8-UNPACKED-LABEL: load_1d_v2f16_xw:
381; GFX8-UNPACKED:       ; %bb.0:
382; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
383; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
384; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
385; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
386; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
387; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
388; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
389; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
390; GFX8-UNPACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x9 unorm d16
391; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
392; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v1, 0xffff, v1
393; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
394; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
395; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
396;
397; GFX8-PACKED-LABEL: load_1d_v2f16_xw:
398; GFX8-PACKED:       ; %bb.0:
399; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
400; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
401; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
402; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
403; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
404; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
405; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
406; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
407; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x9 unorm d16
408; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
409; GFX8-PACKED-NEXT:    ; return to shader part epilog
410;
411; GFX9-LABEL: load_1d_v2f16_xw:
412; GFX9:       ; %bb.0:
413; GFX9-NEXT:    s_mov_b32 s0, s2
414; GFX9-NEXT:    s_mov_b32 s1, s3
415; GFX9-NEXT:    s_mov_b32 s2, s4
416; GFX9-NEXT:    s_mov_b32 s3, s5
417; GFX9-NEXT:    s_mov_b32 s4, s6
418; GFX9-NEXT:    s_mov_b32 s5, s7
419; GFX9-NEXT:    s_mov_b32 s6, s8
420; GFX9-NEXT:    s_mov_b32 s7, s9
421; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x9 unorm d16
422; GFX9-NEXT:    s_waitcnt vmcnt(0)
423; GFX9-NEXT:    ; return to shader part epilog
424;
425; GFX10-LABEL: load_1d_v2f16_xw:
426; GFX10:       ; %bb.0:
427; GFX10-NEXT:    s_mov_b32 s0, s2
428; GFX10-NEXT:    s_mov_b32 s1, s3
429; GFX10-NEXT:    s_mov_b32 s2, s4
430; GFX10-NEXT:    s_mov_b32 s3, s5
431; GFX10-NEXT:    s_mov_b32 s4, s6
432; GFX10-NEXT:    s_mov_b32 s5, s7
433; GFX10-NEXT:    s_mov_b32 s6, s8
434; GFX10-NEXT:    s_mov_b32 s7, s9
435; GFX10-NEXT:    ; implicit-def: $vcc_hi
436; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm d16
437; GFX10-NEXT:    s_waitcnt vmcnt(0)
438; GFX10-NEXT:    ; return to shader part epilog
439  %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
440  ret <2 x half> %v
441}
442
443define amdgpu_ps <2 x half> @load_1d_v2f16_yz(<8 x i32> inreg %rsrc, i32 %s) {
444; GFX8-UNPACKED-LABEL: load_1d_v2f16_yz:
445; GFX8-UNPACKED:       ; %bb.0:
446; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
447; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
448; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
449; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
450; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
451; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
452; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
453; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
454; GFX8-UNPACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x6 unorm d16
455; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
456; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v1, 0xffff, v1
457; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
458; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
459; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
460;
461; GFX8-PACKED-LABEL: load_1d_v2f16_yz:
462; GFX8-PACKED:       ; %bb.0:
463; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
464; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
465; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
466; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
467; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
468; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
469; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
470; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
471; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x6 unorm d16
472; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
473; GFX8-PACKED-NEXT:    ; return to shader part epilog
474;
475; GFX9-LABEL: load_1d_v2f16_yz:
476; GFX9:       ; %bb.0:
477; GFX9-NEXT:    s_mov_b32 s0, s2
478; GFX9-NEXT:    s_mov_b32 s1, s3
479; GFX9-NEXT:    s_mov_b32 s2, s4
480; GFX9-NEXT:    s_mov_b32 s3, s5
481; GFX9-NEXT:    s_mov_b32 s4, s6
482; GFX9-NEXT:    s_mov_b32 s5, s7
483; GFX9-NEXT:    s_mov_b32 s6, s8
484; GFX9-NEXT:    s_mov_b32 s7, s9
485; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x6 unorm d16
486; GFX9-NEXT:    s_waitcnt vmcnt(0)
487; GFX9-NEXT:    ; return to shader part epilog
488;
489; GFX10-LABEL: load_1d_v2f16_yz:
490; GFX10:       ; %bb.0:
491; GFX10-NEXT:    s_mov_b32 s0, s2
492; GFX10-NEXT:    s_mov_b32 s1, s3
493; GFX10-NEXT:    s_mov_b32 s2, s4
494; GFX10-NEXT:    s_mov_b32 s3, s5
495; GFX10-NEXT:    s_mov_b32 s4, s6
496; GFX10-NEXT:    s_mov_b32 s5, s7
497; GFX10-NEXT:    s_mov_b32 s6, s8
498; GFX10-NEXT:    s_mov_b32 s7, s9
499; GFX10-NEXT:    ; implicit-def: $vcc_hi
500; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm d16
501; GFX10-NEXT:    s_waitcnt vmcnt(0)
502; GFX10-NEXT:    ; return to shader part epilog
503  %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
504  ret <2 x half> %v
505}
506
507; FIXME:
508; define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) {
509;   %v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
510;   ret <3 x half> %v
511; }
512
513define amdgpu_ps <4 x half> @load_1d_v4f16_xyzw(<8 x i32> inreg %rsrc, i32 %s) {
514; GFX8-UNPACKED-LABEL: load_1d_v4f16_xyzw:
515; GFX8-UNPACKED:       ; %bb.0:
516; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
517; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
518; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
519; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
520; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
521; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
522; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
523; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
524; GFX8-UNPACKED-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm d16
525; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, 0xffff
526; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
527; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v1, s0, v1
528; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v3, s0, v3
529; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
530; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
531; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
532; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
533; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
534;
535; GFX8-PACKED-LABEL: load_1d_v4f16_xyzw:
536; GFX8-PACKED:       ; %bb.0:
537; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
538; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
539; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
540; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
541; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
542; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
543; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
544; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
545; GFX8-PACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0xf unorm d16
546; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
547; GFX8-PACKED-NEXT:    ; return to shader part epilog
548;
549; GFX9-LABEL: load_1d_v4f16_xyzw:
550; GFX9:       ; %bb.0:
551; GFX9-NEXT:    s_mov_b32 s0, s2
552; GFX9-NEXT:    s_mov_b32 s1, s3
553; GFX9-NEXT:    s_mov_b32 s2, s4
554; GFX9-NEXT:    s_mov_b32 s3, s5
555; GFX9-NEXT:    s_mov_b32 s4, s6
556; GFX9-NEXT:    s_mov_b32 s5, s7
557; GFX9-NEXT:    s_mov_b32 s6, s8
558; GFX9-NEXT:    s_mov_b32 s7, s9
559; GFX9-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0xf unorm d16
560; GFX9-NEXT:    s_waitcnt vmcnt(0)
561; GFX9-NEXT:    ; return to shader part epilog
562;
563; GFX10-LABEL: load_1d_v4f16_xyzw:
564; GFX10:       ; %bb.0:
565; GFX10-NEXT:    s_mov_b32 s0, s2
566; GFX10-NEXT:    s_mov_b32 s1, s3
567; GFX10-NEXT:    s_mov_b32 s2, s4
568; GFX10-NEXT:    s_mov_b32 s3, s5
569; GFX10-NEXT:    s_mov_b32 s4, s6
570; GFX10-NEXT:    s_mov_b32 s5, s7
571; GFX10-NEXT:    s_mov_b32 s6, s8
572; GFX10-NEXT:    s_mov_b32 s7, s9
573; GFX10-NEXT:    ; implicit-def: $vcc_hi
574; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm d16
575; GFX10-NEXT:    s_waitcnt vmcnt(0)
576; GFX10-NEXT:    ; return to shader part epilog
577  %v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
578  ret <4 x half> %v
579}
580
581define amdgpu_ps float @load_1d_f16_tfe_dmask_x(<8 x i32> inreg %rsrc, i32 %s) {
582; GFX8-UNPACKED-LABEL: load_1d_f16_tfe_dmask_x:
583; GFX8-UNPACKED:       ; %bb.0:
584; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
585; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
586; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
587; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
588; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
589; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
590; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
591; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
592; GFX8-UNPACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe d16
593; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
594; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v0, v1
595; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
596;
597; GFX8-PACKED-LABEL: load_1d_f16_tfe_dmask_x:
598; GFX8-PACKED:       ; %bb.0:
599; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
600; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
601; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
602; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
603; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
604; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
605; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
606; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
607; GFX8-PACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe d16
608; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
609; GFX8-PACKED-NEXT:    v_mov_b32_e32 v0, v1
610; GFX8-PACKED-NEXT:    ; return to shader part epilog
611;
612; GFX9-LABEL: load_1d_f16_tfe_dmask_x:
613; GFX9:       ; %bb.0:
614; GFX9-NEXT:    s_mov_b32 s0, s2
615; GFX9-NEXT:    s_mov_b32 s1, s3
616; GFX9-NEXT:    s_mov_b32 s2, s4
617; GFX9-NEXT:    s_mov_b32 s3, s5
618; GFX9-NEXT:    s_mov_b32 s4, s6
619; GFX9-NEXT:    s_mov_b32 s5, s7
620; GFX9-NEXT:    s_mov_b32 s6, s8
621; GFX9-NEXT:    s_mov_b32 s7, s9
622; GFX9-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe d16
623; GFX9-NEXT:    s_waitcnt vmcnt(0)
624; GFX9-NEXT:    v_mov_b32_e32 v0, v1
625; GFX9-NEXT:    ; return to shader part epilog
626;
627; GFX10-LABEL: load_1d_f16_tfe_dmask_x:
628; GFX10:       ; %bb.0:
629; GFX10-NEXT:    s_mov_b32 s0, s2
630; GFX10-NEXT:    s_mov_b32 s1, s3
631; GFX10-NEXT:    s_mov_b32 s2, s4
632; GFX10-NEXT:    s_mov_b32 s3, s5
633; GFX10-NEXT:    s_mov_b32 s4, s6
634; GFX10-NEXT:    s_mov_b32 s5, s7
635; GFX10-NEXT:    s_mov_b32 s6, s8
636; GFX10-NEXT:    s_mov_b32 s7, s9
637; GFX10-NEXT:    ; implicit-def: $vcc_hi
638; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe d16
639; GFX10-NEXT:    s_waitcnt vmcnt(0)
640; GFX10-NEXT:    v_mov_b32_e32 v0, v1
641; GFX10-NEXT:    ; return to shader part epilog
642  %v = call { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
643  %v.err = extractvalue { half, i32 } %v, 1
644  %vv = bitcast i32 %v.err to float
645  ret float %vv
646}
647
648define amdgpu_ps float @load_1d_v2f16_tfe_dmask_xy(<8 x i32> inreg %rsrc, i32 %s) {
649; GFX8-UNPACKED-LABEL: load_1d_v2f16_tfe_dmask_xy:
650; GFX8-UNPACKED:       ; %bb.0:
651; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
652; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
653; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
654; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
655; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
656; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
657; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
658; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
659; GFX8-UNPACKED-NEXT:    image_load v[0:2], v0, s[0:7] dmask:0x3 unorm tfe d16
660; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
661; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v0, v2
662; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
663;
664; GFX8-PACKED-LABEL: load_1d_v2f16_tfe_dmask_xy:
665; GFX8-PACKED:       ; %bb.0:
666; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
667; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
668; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
669; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
670; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
671; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
672; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
673; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
674; GFX8-PACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x3 unorm tfe d16
675; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
676; GFX8-PACKED-NEXT:    v_mov_b32_e32 v0, v1
677; GFX8-PACKED-NEXT:    ; return to shader part epilog
678;
679; GFX9-LABEL: load_1d_v2f16_tfe_dmask_xy:
680; GFX9:       ; %bb.0:
681; GFX9-NEXT:    s_mov_b32 s0, s2
682; GFX9-NEXT:    s_mov_b32 s1, s3
683; GFX9-NEXT:    s_mov_b32 s2, s4
684; GFX9-NEXT:    s_mov_b32 s3, s5
685; GFX9-NEXT:    s_mov_b32 s4, s6
686; GFX9-NEXT:    s_mov_b32 s5, s7
687; GFX9-NEXT:    s_mov_b32 s6, s8
688; GFX9-NEXT:    s_mov_b32 s7, s9
689; GFX9-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x3 unorm tfe d16
690; GFX9-NEXT:    s_waitcnt vmcnt(0)
691; GFX9-NEXT:    v_mov_b32_e32 v0, v1
692; GFX9-NEXT:    ; return to shader part epilog
693;
694; GFX10-LABEL: load_1d_v2f16_tfe_dmask_xy:
695; GFX10:       ; %bb.0:
696; GFX10-NEXT:    s_mov_b32 s0, s2
697; GFX10-NEXT:    s_mov_b32 s1, s3
698; GFX10-NEXT:    s_mov_b32 s2, s4
699; GFX10-NEXT:    s_mov_b32 s3, s5
700; GFX10-NEXT:    s_mov_b32 s4, s6
701; GFX10-NEXT:    s_mov_b32 s5, s7
702; GFX10-NEXT:    s_mov_b32 s6, s8
703; GFX10-NEXT:    s_mov_b32 s7, s9
704; GFX10-NEXT:    ; implicit-def: $vcc_hi
705; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm tfe d16
706; GFX10-NEXT:    s_waitcnt vmcnt(0)
707; GFX10-NEXT:    v_mov_b32_e32 v0, v1
708; GFX10-NEXT:    ; return to shader part epilog
709  %v = call { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
710  %v.err = extractvalue { <2 x half>, i32 } %v, 1
711  %vv = bitcast i32 %v.err to float
712  ret float %vv
713}
714
715; FIXME:
716; define amdgpu_ps float @load_1d_v3f16_tfe_dmask_xyz(<8 x i32> inreg %rsrc, i32 %s) {
717;   %v = call { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
718;   %v.err = extractvalue { <3 x half>, i32 } %v, 1
719;   %vv = bitcast i32 %v.err to float
720;   ret float %vv
721; }
722
723define amdgpu_ps float @load_1d_v4f16_tfe_dmask_xyzw(<8 x i32> inreg %rsrc, i32 %s) {
724; GFX8-UNPACKED-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
725; GFX8-UNPACKED:       ; %bb.0:
726; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
727; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
728; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
729; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
730; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
731; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
732; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
733; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
734; GFX8-UNPACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x10 unorm tfe d16
735; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
736; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v0, v1
737; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
738;
739; GFX8-PACKED-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
740; GFX8-PACKED:       ; %bb.0:
741; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
742; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
743; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
744; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
745; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
746; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
747; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
748; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
749; GFX8-PACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x10 unorm tfe d16
750; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
751; GFX8-PACKED-NEXT:    v_mov_b32_e32 v0, v1
752; GFX8-PACKED-NEXT:    ; return to shader part epilog
753;
754; GFX9-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
755; GFX9:       ; %bb.0:
756; GFX9-NEXT:    s_mov_b32 s0, s2
757; GFX9-NEXT:    s_mov_b32 s1, s3
758; GFX9-NEXT:    s_mov_b32 s2, s4
759; GFX9-NEXT:    s_mov_b32 s3, s5
760; GFX9-NEXT:    s_mov_b32 s4, s6
761; GFX9-NEXT:    s_mov_b32 s5, s7
762; GFX9-NEXT:    s_mov_b32 s6, s8
763; GFX9-NEXT:    s_mov_b32 s7, s9
764; GFX9-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x10 unorm tfe d16
765; GFX9-NEXT:    s_waitcnt vmcnt(0)
766; GFX9-NEXT:    v_mov_b32_e32 v0, v1
767; GFX9-NEXT:    ; return to shader part epilog
768;
769; GFX10-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
770; GFX10:       ; %bb.0:
771; GFX10-NEXT:    s_mov_b32 s0, s2
772; GFX10-NEXT:    s_mov_b32 s1, s3
773; GFX10-NEXT:    s_mov_b32 s2, s4
774; GFX10-NEXT:    s_mov_b32 s3, s5
775; GFX10-NEXT:    s_mov_b32 s4, s6
776; GFX10-NEXT:    s_mov_b32 s5, s7
777; GFX10-NEXT:    s_mov_b32 s6, s8
778; GFX10-NEXT:    s_mov_b32 s7, s9
779; GFX10-NEXT:    ; implicit-def: $vcc_hi
780; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D unorm tfe d16
781; GFX10-NEXT:    s_waitcnt vmcnt(0)
782; GFX10-NEXT:    v_mov_b32_e32 v0, v1
783; GFX10-NEXT:    ; return to shader part epilog
784  %v = call { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 16, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
785  %v.err = extractvalue { <4 x half>, i32 } %v, 1
786  %vv = bitcast i32 %v.err to float
787  ret float %vv
788}
789
790declare half @llvm.amdgcn.image.load.1d.half.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
791declare <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
792declare <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
793declare <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
794
795declare { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
796declare { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
797declare { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
798declare { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
799
800attributes #0 = { nounwind readonly }
801