1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=GCN-NOHSA-SI %s
3; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=GCN-HSA %s
4; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=GCN-NOHSA-VI %s
5; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=EG %s
6; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=CM %s
7
8; FIXME: r600 is broken because the bigger testcases spill and it's not implemented
9
10define amdgpu_kernel void @global_load_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
11; GCN-NOHSA-SI-LABEL: global_load_i16:
12; GCN-NOHSA-SI:       ; %bb.0: ; %entry
13; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
14; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
15; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
16; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
17; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
18; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
19; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
20; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
21; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
22; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
23; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
24; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
25; GCN-NOHSA-SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
26; GCN-NOHSA-SI-NEXT:    s_endpgm
27;
28; GCN-HSA-LABEL: global_load_i16:
29; GCN-HSA:       ; %bb.0: ; %entry
30; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
31; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
32; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
33; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
34; GCN-HSA-NEXT:    flat_load_ushort v2, v[2:3]
35; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
36; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
37; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
38; GCN-HSA-NEXT:    flat_store_short v[0:1], v2
39; GCN-HSA-NEXT:    s_endpgm
40;
41; GCN-NOHSA-VI-LABEL: global_load_i16:
42; GCN-NOHSA-VI:       ; %bb.0: ; %entry
43; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
44; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
45; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
46; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
47; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
48; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
49; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s6
50; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s7
51; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s2
52; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, s3
53; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
54; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
55; GCN-NOHSA-VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
56; GCN-NOHSA-VI-NEXT:    s_endpgm
57;
58; EG-LABEL: global_load_i16:
59; EG:       ; %bb.0: ; %entry
60; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
61; EG-NEXT:    TEX 0 @6
62; EG-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
63; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
64; EG-NEXT:    CF_END
65; EG-NEXT:    PAD
66; EG-NEXT:    Fetch clause starting at 6:
67; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
68; EG-NEXT:    ALU clause starting at 8:
69; EG-NEXT:     MOV * T0.X, KC0[2].Z,
70; EG-NEXT:    ALU clause starting at 9:
71; EG-NEXT:     AND_INT T0.W, KC0[2].Y, literal.x,
72; EG-NEXT:     AND_INT * T1.W, T0.X, literal.y,
73; EG-NEXT:    3(4.203895e-45), 65535(9.183409e-41)
74; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
75; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
76; EG-NEXT:     LSHL T0.X, T1.W, PV.W,
77; EG-NEXT:     LSHL * T0.W, literal.x, PV.W,
78; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
79; EG-NEXT:     MOV T0.Y, 0.0,
80; EG-NEXT:     MOV * T0.Z, 0.0,
81; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
82; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
83;
84; CM-LABEL: global_load_i16:
85; CM:       ; %bb.0: ; %entry
86; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
87; CM-NEXT:    TEX 0 @6
88; CM-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
89; CM-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
90; CM-NEXT:    CF_END
91; CM-NEXT:    PAD
92; CM-NEXT:    Fetch clause starting at 6:
93; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
94; CM-NEXT:    ALU clause starting at 8:
95; CM-NEXT:     MOV * T0.X, KC0[2].Z,
96; CM-NEXT:    ALU clause starting at 9:
97; CM-NEXT:     AND_INT * T0.W, KC0[2].Y, literal.x,
98; CM-NEXT:    3(4.203895e-45), 0(0.000000e+00)
99; CM-NEXT:     AND_INT T0.Z, T0.X, literal.x,
100; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
101; CM-NEXT:    65535(9.183409e-41), 3(4.203895e-45)
102; CM-NEXT:     LSHL T0.X, PV.Z, PV.W,
103; CM-NEXT:     LSHL * T0.W, literal.x, PV.W,
104; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
105; CM-NEXT:     MOV T0.Y, 0.0,
106; CM-NEXT:     MOV * T0.Z, 0.0,
107; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
108; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
109entry:
110  %ld = load i16, i16 addrspace(1)* %in
111  store i16 %ld, i16 addrspace(1)* %out
112  ret void
113}
114
115define amdgpu_kernel void @global_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
116; GCN-NOHSA-SI-LABEL: global_load_v2i16:
117; GCN-NOHSA-SI:       ; %bb.0: ; %entry
118; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
119; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
120; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
121; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
122; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
123; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
124; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
125; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
126; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
127; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
128; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
129; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
130; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
131; GCN-NOHSA-SI-NEXT:    s_endpgm
132;
133; GCN-HSA-LABEL: global_load_v2i16:
134; GCN-HSA:       ; %bb.0: ; %entry
135; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
136; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
137; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
138; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
139; GCN-HSA-NEXT:    flat_load_dword v2, v[2:3]
140; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
141; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
142; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
143; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
144; GCN-HSA-NEXT:    s_endpgm
145;
146; GCN-NOHSA-VI-LABEL: global_load_v2i16:
147; GCN-NOHSA-VI:       ; %bb.0: ; %entry
148; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
149; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
150; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
151; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
152; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
153; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
154; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s6
155; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s7
156; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s2
157; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, s3
158; GCN-NOHSA-VI-NEXT:    buffer_load_dword v0, off, s[4:7], 0
159; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
160; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
161; GCN-NOHSA-VI-NEXT:    s_endpgm
162;
163; EG-LABEL: global_load_v2i16:
164; EG:       ; %bb.0: ; %entry
165; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
166; EG-NEXT:    TEX 0 @6
167; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
168; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
169; EG-NEXT:    CF_END
170; EG-NEXT:    PAD
171; EG-NEXT:    Fetch clause starting at 6:
172; EG-NEXT:     VTX_READ_32 T0.X, T0.X, 0, #1
173; EG-NEXT:    ALU clause starting at 8:
174; EG-NEXT:     MOV * T0.X, KC0[2].Z,
175; EG-NEXT:    ALU clause starting at 9:
176; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
177; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
178;
179; CM-LABEL: global_load_v2i16:
180; CM:       ; %bb.0: ; %entry
181; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
182; CM-NEXT:    TEX 0 @6
183; CM-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
184; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
185; CM-NEXT:    CF_END
186; CM-NEXT:    PAD
187; CM-NEXT:    Fetch clause starting at 6:
188; CM-NEXT:     VTX_READ_32 T0.X, T0.X, 0, #1
189; CM-NEXT:    ALU clause starting at 8:
190; CM-NEXT:     MOV * T0.X, KC0[2].Z,
191; CM-NEXT:    ALU clause starting at 9:
192; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
193; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
194entry:
195  %ld = load <2 x i16>, <2 x i16> addrspace(1)* %in
196  store <2 x i16> %ld, <2 x i16> addrspace(1)* %out
197  ret void
198}
199
200define amdgpu_kernel void @global_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
201; GCN-NOHSA-SI-LABEL: global_load_v3i16:
202; GCN-NOHSA-SI:       ; %bb.0: ; %entry
203; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
204; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
205; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
206; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
207; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
208; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
209; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
210; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
211; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
212; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
213; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
214; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
215; GCN-NOHSA-SI-NEXT:    buffer_store_short v1, off, s[4:7], 0 offset:4
216; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
217; GCN-NOHSA-SI-NEXT:    s_endpgm
218;
219; GCN-HSA-LABEL: global_load_v3i16:
220; GCN-HSA:       ; %bb.0: ; %entry
221; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
222; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
223; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
224; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
225; GCN-HSA-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
226; GCN-HSA-NEXT:    s_add_u32 s2, s0, 4
227; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
228; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
229; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
230; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
231; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
232; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
233; GCN-HSA-NEXT:    flat_store_short v[4:5], v1
234; GCN-HSA-NEXT:    flat_store_dword v[2:3], v0
235; GCN-HSA-NEXT:    s_endpgm
236;
237; GCN-NOHSA-VI-LABEL: global_load_v3i16:
238; GCN-NOHSA-VI:       ; %bb.0: ; %entry
239; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
240; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
241; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
242; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
243; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
244; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
245; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
246; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
247; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
248; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
249; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
250; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
251; GCN-NOHSA-VI-NEXT:    buffer_store_short v1, off, s[0:3], 0 offset:4
252; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
253; GCN-NOHSA-VI-NEXT:    s_endpgm
254;
255; EG-LABEL: global_load_v3i16:
256; EG:       ; %bb.0: ; %entry
257; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
258; EG-NEXT:    TEX 2 @6
259; EG-NEXT:    ALU 19, @13, KC0[CB0:0-32], KC1[]
260; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
261; EG-NEXT:    MEM_RAT MSKOR T5.XW, T8.X
262; EG-NEXT:    CF_END
263; EG-NEXT:    Fetch clause starting at 6:
264; EG-NEXT:     VTX_READ_16 T6.X, T5.X, 0, #1
265; EG-NEXT:     VTX_READ_16 T7.X, T5.X, 2, #1
266; EG-NEXT:     VTX_READ_16 T5.X, T5.X, 4, #1
267; EG-NEXT:    ALU clause starting at 12:
268; EG-NEXT:     MOV * T5.X, KC0[2].Z,
269; EG-NEXT:    ALU clause starting at 13:
270; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
271; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
272; EG-NEXT:     AND_INT T1.W, PV.W, literal.x,
273; EG-NEXT:     AND_INT * T2.W, T5.X, literal.y,
274; EG-NEXT:    3(4.203895e-45), 65535(9.183409e-41)
275; EG-NEXT:     LSHL * T1.W, PV.W, literal.x,
276; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
277; EG-NEXT:     LSHL T5.X, T2.W, PV.W,
278; EG-NEXT:     LSHL * T5.W, literal.x, PV.W,
279; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
280; EG-NEXT:     MOV T5.Y, 0.0,
281; EG-NEXT:     MOV * T5.Z, 0.0,
282; EG-NEXT:     LSHR T8.X, T0.W, literal.x,
283; EG-NEXT:     LSHL T0.W, T7.X, literal.y,
284; EG-NEXT:     AND_INT * T1.W, T6.X, literal.z,
285; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
286; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
287; EG-NEXT:     OR_INT T6.X, PV.W, PS,
288; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.x,
289; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
290;
291; CM-LABEL: global_load_v3i16:
292; CM:       ; %bb.0: ; %entry
293; CM-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
294; CM-NEXT:    TEX 2 @6
295; CM-NEXT:    ALU 19, @13, KC0[CB0:0-32], KC1[]
296; CM-NEXT:    MEM_RAT MSKOR T5.XW, T8.X
297; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T6.X, T7.X
298; CM-NEXT:    CF_END
299; CM-NEXT:    Fetch clause starting at 6:
300; CM-NEXT:     VTX_READ_16 T6.X, T5.X, 0, #1
301; CM-NEXT:     VTX_READ_16 T7.X, T5.X, 2, #1
302; CM-NEXT:     VTX_READ_16 T5.X, T5.X, 4, #1
303; CM-NEXT:    ALU clause starting at 12:
304; CM-NEXT:     MOV * T5.X, KC0[2].Z,
305; CM-NEXT:    ALU clause starting at 13:
306; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
307; CM-NEXT:    4(5.605194e-45), 0(0.000000e+00)
308; CM-NEXT:     AND_INT * T1.W, PV.W, literal.x,
309; CM-NEXT:    3(4.203895e-45), 0(0.000000e+00)
310; CM-NEXT:     AND_INT T0.Z, T5.X, literal.x,
311; CM-NEXT:     LSHL * T1.W, PV.W, literal.y,
312; CM-NEXT:    65535(9.183409e-41), 3(4.203895e-45)
313; CM-NEXT:     LSHL T5.X, PV.Z, PV.W,
314; CM-NEXT:     LSHL * T5.W, literal.x, PV.W,
315; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
316; CM-NEXT:     MOV T5.Y, 0.0,
317; CM-NEXT:     MOV * T5.Z, 0.0,
318; CM-NEXT:     LSHL T0.Z, T7.X, literal.x,
319; CM-NEXT:     AND_INT * T1.W, T6.X, literal.y, BS:VEC_120/SCL_212
320; CM-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
321; CM-NEXT:     OR_INT * T6.X, PV.Z, PV.W,
322; CM-NEXT:     LSHR * T7.X, KC0[2].Y, literal.x,
323; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
324; CM-NEXT:     LSHR * T8.X, T0.W, literal.x,
325; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
326entry:
327  %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
328  store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
329  ret void
330}
331
332define amdgpu_kernel void @global_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
333; GCN-NOHSA-SI-LABEL: global_load_v4i16:
334; GCN-NOHSA-SI:       ; %bb.0: ; %entry
335; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
336; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
337; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
338; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
339; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
340; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
341; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
342; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
343; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
344; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
345; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
346; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
347; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
348; GCN-NOHSA-SI-NEXT:    s_endpgm
349;
350; GCN-HSA-LABEL: global_load_v4i16:
351; GCN-HSA:       ; %bb.0: ; %entry
352; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
353; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
354; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
355; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
356; GCN-HSA-NEXT:    flat_load_dwordx2 v[2:3], v[2:3]
357; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
358; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
359; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
360; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
361; GCN-HSA-NEXT:    s_endpgm
362;
363; GCN-NOHSA-VI-LABEL: global_load_v4i16:
364; GCN-NOHSA-VI:       ; %bb.0: ; %entry
365; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
366; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
367; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
368; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
369; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
370; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
371; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s6
372; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s7
373; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s2
374; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, s3
375; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
376; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
377; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
378; GCN-NOHSA-VI-NEXT:    s_endpgm
379;
380; EG-LABEL: global_load_v4i16:
381; EG:       ; %bb.0: ; %entry
382; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
383; EG-NEXT:    TEX 0 @6
384; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
385; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
386; EG-NEXT:    CF_END
387; EG-NEXT:    PAD
388; EG-NEXT:    Fetch clause starting at 6:
389; EG-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
390; EG-NEXT:    ALU clause starting at 8:
391; EG-NEXT:     MOV * T0.X, KC0[2].Z,
392; EG-NEXT:    ALU clause starting at 9:
393; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
394; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
395;
396; CM-LABEL: global_load_v4i16:
397; CM:       ; %bb.0: ; %entry
398; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
399; CM-NEXT:    TEX 0 @6
400; CM-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
401; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
402; CM-NEXT:    CF_END
403; CM-NEXT:    PAD
404; CM-NEXT:    Fetch clause starting at 6:
405; CM-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
406; CM-NEXT:    ALU clause starting at 8:
407; CM-NEXT:     MOV * T0.X, KC0[2].Z,
408; CM-NEXT:    ALU clause starting at 9:
409; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
410; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
411entry:
412  %ld = load <4 x i16>, <4 x i16> addrspace(1)* %in
413  store <4 x i16> %ld, <4 x i16> addrspace(1)* %out
414  ret void
415}
416
417define amdgpu_kernel void @global_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) {
418; GCN-NOHSA-SI-LABEL: global_load_v8i16:
419; GCN-NOHSA-SI:       ; %bb.0: ; %entry
420; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
421; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
422; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
423; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
424; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
425; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
426; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
427; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
428; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
429; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
430; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
431; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
432; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
433; GCN-NOHSA-SI-NEXT:    s_endpgm
434;
435; GCN-HSA-LABEL: global_load_v8i16:
436; GCN-HSA:       ; %bb.0: ; %entry
437; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
438; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
439; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
440; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
441; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
442; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
443; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
444; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
445; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
446; GCN-HSA-NEXT:    s_endpgm
447;
448; GCN-NOHSA-VI-LABEL: global_load_v8i16:
449; GCN-NOHSA-VI:       ; %bb.0: ; %entry
450; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
451; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
452; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
453; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
454; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
455; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
456; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s6
457; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s7
458; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s2
459; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, s3
460; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
461; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
462; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
463; GCN-NOHSA-VI-NEXT:    s_endpgm
464;
465; EG-LABEL: global_load_v8i16:
466; EG:       ; %bb.0: ; %entry
467; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
468; EG-NEXT:    TEX 0 @6
469; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
470; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
471; EG-NEXT:    CF_END
472; EG-NEXT:    PAD
473; EG-NEXT:    Fetch clause starting at 6:
474; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
475; EG-NEXT:    ALU clause starting at 8:
476; EG-NEXT:     MOV * T0.X, KC0[2].Z,
477; EG-NEXT:    ALU clause starting at 9:
478; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
479; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
480;
481; CM-LABEL: global_load_v8i16:
482; CM:       ; %bb.0: ; %entry
483; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
484; CM-NEXT:    TEX 0 @6
485; CM-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
486; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
487; CM-NEXT:    CF_END
488; CM-NEXT:    PAD
489; CM-NEXT:    Fetch clause starting at 6:
490; CM-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
491; CM-NEXT:    ALU clause starting at 8:
492; CM-NEXT:     MOV * T0.X, KC0[2].Z,
493; CM-NEXT:    ALU clause starting at 9:
494; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
495; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
496entry:
497  %ld = load <8 x i16>, <8 x i16> addrspace(1)* %in
498  store <8 x i16> %ld, <8 x i16> addrspace(1)* %out
499  ret void
500}
501
502define amdgpu_kernel void @global_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) {
503; GCN-NOHSA-SI-LABEL: global_load_v16i16:
504; GCN-NOHSA-SI:       ; %bb.0: ; %entry
505; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
506; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
507; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
508; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
509; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
510; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
511; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
512; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
513; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:16
514; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0
515; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
516; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
517; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(1)
518; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16
519; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(1)
520; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
521; GCN-NOHSA-SI-NEXT:    s_endpgm
522;
523; GCN-HSA-LABEL: global_load_v16i16:
524; GCN-HSA:       ; %bb.0: ; %entry
525; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
526; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
527; GCN-HSA-NEXT:    s_add_u32 s4, s0, 16
528; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
529; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
530; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
531; GCN-HSA-NEXT:    s_add_u32 s2, s2, 16
532; GCN-HSA-NEXT:    s_addc_u32 s3, s3, 0
533; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
534; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
535; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
536; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
537; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s5
538; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s1
539; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s4
540; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s0
541; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
542; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
543; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
544; GCN-HSA-NEXT:    s_endpgm
545;
546; GCN-NOHSA-VI-LABEL: global_load_v16i16:
547; GCN-NOHSA-VI:       ; %bb.0: ; %entry
548; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
549; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
550; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
551; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
552; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
553; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
554; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
555; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
556; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
557; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
558; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
559; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
560; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
561; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
562; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
563; GCN-NOHSA-VI-NEXT:    s_endpgm
564;
565; EG-LABEL: global_load_v16i16:
566; EG:       ; %bb.0: ; %entry
567; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
568; EG-NEXT:    TEX 0 @8
569; EG-NEXT:    ALU 1, @13, KC0[CB0:0-32], KC1[]
570; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
571; EG-NEXT:    TEX 0 @10
572; EG-NEXT:    ALU 3, @15, KC0[CB0:0-32], KC1[]
573; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
574; EG-NEXT:    CF_END
575; EG-NEXT:    Fetch clause starting at 8:
576; EG-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 0, #1
577; EG-NEXT:    Fetch clause starting at 10:
578; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 16, #1
579; EG-NEXT:    ALU clause starting at 12:
580; EG-NEXT:     MOV * T0.X, KC0[2].Z,
581; EG-NEXT:    ALU clause starting at 13:
582; EG-NEXT:     LSHR * T2.X, KC0[2].Y, literal.x,
583; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
584; EG-NEXT:    ALU clause starting at 15:
585; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.x,
586; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
587; EG-NEXT:     LSHR * T1.X, PV.W, literal.x,
588; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
589;
590; CM-LABEL: global_load_v16i16:
591; CM:       ; %bb.0: ; %entry
592; CM-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
593; CM-NEXT:    TEX 0 @8
594; CM-NEXT:    ALU 1, @13, KC0[CB0:0-32], KC1[]
595; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T1, T2.X
596; CM-NEXT:    TEX 0 @10
597; CM-NEXT:    ALU 3, @15, KC0[CB0:0-32], KC1[]
598; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
599; CM-NEXT:    CF_END
600; CM-NEXT:    Fetch clause starting at 8:
601; CM-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 0, #1
602; CM-NEXT:    Fetch clause starting at 10:
603; CM-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 16, #1
604; CM-NEXT:    ALU clause starting at 12:
605; CM-NEXT:     MOV * T0.X, KC0[2].Z,
606; CM-NEXT:    ALU clause starting at 13:
607; CM-NEXT:     LSHR * T2.X, KC0[2].Y, literal.x,
608; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
609; CM-NEXT:    ALU clause starting at 15:
610; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.x,
611; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
612; CM-NEXT:     LSHR * T1.X, PV.W, literal.x,
613; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
614entry:
615  %ld = load <16 x i16>, <16 x i16> addrspace(1)* %in
616  store <16 x i16> %ld, <16 x i16> addrspace(1)* %out
617  ret void
618}
619
620define amdgpu_kernel void @global_load_v16i16_align2(<16 x i16> addrspace(1)* %in, <16 x i16> addrspace(1)* %out) #0 {
621; GCN-NOHSA-SI-LABEL: global_load_v16i16_align2:
622; GCN-NOHSA-SI:       ; %bb.0: ; %entry
623; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
624; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
625; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
626; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
627; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
628; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
629; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
630; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
631; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
632; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
633; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
634; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v1, off, s[4:7], 0 offset:2
635; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v4, off, s[4:7], 0 offset:4
636; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v2, off, s[4:7], 0 offset:6
637; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v5, off, s[4:7], 0 offset:8
638; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v3, off, s[4:7], 0 offset:10
639; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v6, off, s[4:7], 0 offset:12
640; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v7, off, s[4:7], 0 offset:14
641; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v8, off, s[4:7], 0 offset:16
642; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v9, off, s[4:7], 0 offset:18
643; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v10, off, s[4:7], 0 offset:20
644; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v11, off, s[4:7], 0 offset:22
645; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v12, off, s[4:7], 0 offset:24
646; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v13, off, s[4:7], 0 offset:26
647; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v14, off, s[4:7], 0 offset:28
648; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v15, off, s[4:7], 0 offset:30
649; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(8)
650; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
651; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v16, 16, v3
652; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v17, 16, v2
653; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v18, 16, v1
654; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
655; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v15, 16, v15
656; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v13, 16, v13
657; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v11, 16, v11
658; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
659; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v3, v7, v6
660; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v2, v16, v5
661; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v1, v17, v4
662; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v0, v18, v0
663; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v7, v15, v14
664; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v6, v13, v12
665; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v5, v11, v10
666; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v4, v9, v8
667; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[8:11], 0 offset:16
668; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
669; GCN-NOHSA-SI-NEXT:    s_endpgm
670;
671; GCN-HSA-LABEL: global_load_v16i16_align2:
672; GCN-HSA:       ; %bb.0: ; %entry
673; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
674; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
675; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
676; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
677; GCN-HSA-NEXT:    s_add_u32 s0, s0, 16
678; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
679; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
680; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
681; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
682; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
683; GCN-HSA-NEXT:    s_add_u32 s0, s2, 16
684; GCN-HSA-NEXT:    s_addc_u32 s1, s3, 0
685; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s1
686; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
687; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s0
688; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
689; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
690; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
691; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
692; GCN-HSA-NEXT:    s_endpgm
693;
694; GCN-NOHSA-VI-LABEL: global_load_v16i16_align2:
695; GCN-NOHSA-VI:       ; %bb.0: ; %entry
696; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
697; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
698; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
699; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
700; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
701; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
702; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
703; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v1, off, s[0:3], 0 offset:2
704; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v4, off, s[0:3], 0 offset:4
705; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v2, off, s[0:3], 0 offset:6
706; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v5, off, s[0:3], 0 offset:8
707; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v3, off, s[0:3], 0 offset:10
708; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v6, off, s[0:3], 0 offset:12
709; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v7, off, s[0:3], 0 offset:14
710; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v8, off, s[0:3], 0 offset:16
711; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v9, off, s[0:3], 0 offset:18
712; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v10, off, s[0:3], 0 offset:20
713; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v11, off, s[0:3], 0 offset:22
714; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v12, off, s[0:3], 0 offset:24
715; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v13, off, s[0:3], 0 offset:26
716; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v14, off, s[0:3], 0 offset:28
717; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v15, off, s[0:3], 0 offset:30
718; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s6
719; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s7
720; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(14)
721; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v18, 16, v1
722; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v0, v18, v0
723; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(12)
724; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v17, 16, v2
725; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v1, v17, v4
726; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(10)
727; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v16, 16, v3
728; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v2, v16, v5
729; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(8)
730; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
731; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v3, v7, v6
732; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(6)
733; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
734; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v4, v9, v8
735; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(4)
736; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v11, 16, v11
737; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v5, v11, v10
738; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(2)
739; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v13, 16, v13
740; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v6, v13, v12
741; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
742; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v15, 16, v15
743; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v7, v15, v14
744; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
745; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
746; GCN-NOHSA-VI-NEXT:    s_endpgm
747;
748; EG-LABEL: global_load_v16i16_align2:
749; EG:       ; %bb.0: ; %entry
750; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
751; EG-NEXT:    TEX 1 @6
752; EG-NEXT:    ALU 4, @11, KC0[CB0:0-32], KC1[]
753; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T3.X, 0
754; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1
755; EG-NEXT:    CF_END
756; EG-NEXT:    Fetch clause starting at 6:
757; EG-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 16, #1
758; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
759; EG-NEXT:    ALU clause starting at 10:
760; EG-NEXT:     MOV * T0.X, KC0[2].Y,
761; EG-NEXT:    ALU clause starting at 11:
762; EG-NEXT:     LSHR T2.X, KC0[2].Z, literal.x,
763; EG-NEXT:     ADD_INT * T2.W, KC0[2].Z, literal.y,
764; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
765; EG-NEXT:     LSHR * T3.X, PV.W, literal.x,
766; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
767;
768; CM-LABEL: global_load_v16i16_align2:
769; CM:       ; %bb.0: ; %entry
770; CM-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
771; CM-NEXT:    TEX 1 @6
772; CM-NEXT:    ALU 5, @11, KC0[CB0:0-32], KC1[]
773; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T3.X
774; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T1, T2.X
775; CM-NEXT:    CF_END
776; CM-NEXT:    Fetch clause starting at 6:
777; CM-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 16, #1
778; CM-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
779; CM-NEXT:    ALU clause starting at 10:
780; CM-NEXT:     MOV * T0.X, KC0[2].Y,
781; CM-NEXT:    ALU clause starting at 11:
782; CM-NEXT:     ADD_INT * T2.W, KC0[2].Z, literal.x,
783; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
784; CM-NEXT:     LSHR * T2.X, PV.W, literal.x,
785; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
786; CM-NEXT:     LSHR * T3.X, KC0[2].Z, literal.x,
787; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
788entry:
789  %ld =  load <16 x i16>, <16 x i16> addrspace(1)* %in, align 2
790  store <16 x i16> %ld, <16 x i16> addrspace(1)* %out, align 32
791  ret void
792}
793
794define amdgpu_kernel void @global_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
795; GCN-NOHSA-SI-LABEL: global_zextload_i16_to_i32:
796; GCN-NOHSA-SI:       ; %bb.0:
797; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
798; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
799; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
800; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
801; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
802; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
803; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
804; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
805; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
806; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
807; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
808; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
809; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
810; GCN-NOHSA-SI-NEXT:    s_endpgm
811;
812; GCN-HSA-LABEL: global_zextload_i16_to_i32:
813; GCN-HSA:       ; %bb.0:
814; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
815; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
816; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
817; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
818; GCN-HSA-NEXT:    flat_load_ushort v2, v[2:3]
819; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
820; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
821; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
822; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
823; GCN-HSA-NEXT:    s_endpgm
824;
825; GCN-NOHSA-VI-LABEL: global_zextload_i16_to_i32:
826; GCN-NOHSA-VI:       ; %bb.0:
827; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
828; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
829; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
830; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
831; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
832; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
833; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s6
834; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s7
835; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s2
836; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, s3
837; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
838; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
839; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
840; GCN-NOHSA-VI-NEXT:    s_endpgm
841;
842; EG-LABEL: global_zextload_i16_to_i32:
843; EG:       ; %bb.0:
844; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
845; EG-NEXT:    TEX 0 @6
846; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
847; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
848; EG-NEXT:    CF_END
849; EG-NEXT:    PAD
850; EG-NEXT:    Fetch clause starting at 6:
851; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
852; EG-NEXT:    ALU clause starting at 8:
853; EG-NEXT:     MOV * T0.X, KC0[2].Z,
854; EG-NEXT:    ALU clause starting at 9:
855; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
856; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
857;
858; CM-LABEL: global_zextload_i16_to_i32:
859; CM:       ; %bb.0:
860; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
861; CM-NEXT:    TEX 0 @6
862; CM-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
863; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
864; CM-NEXT:    CF_END
865; CM-NEXT:    PAD
866; CM-NEXT:    Fetch clause starting at 6:
867; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
868; CM-NEXT:    ALU clause starting at 8:
869; CM-NEXT:     MOV * T0.X, KC0[2].Z,
870; CM-NEXT:    ALU clause starting at 9:
871; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
872; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
873  %a = load i16, i16 addrspace(1)* %in
874  %ext = zext i16 %a to i32
875  store i32 %ext, i32 addrspace(1)* %out
876  ret void
877}
878
879define amdgpu_kernel void @global_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
880; GCN-NOHSA-SI-LABEL: global_sextload_i16_to_i32:
881; GCN-NOHSA-SI:       ; %bb.0:
882; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
883; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
884; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
885; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
886; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
887; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
888; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
889; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
890; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
891; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
892; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
893; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
894; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
895; GCN-NOHSA-SI-NEXT:    s_endpgm
896;
897; GCN-HSA-LABEL: global_sextload_i16_to_i32:
898; GCN-HSA:       ; %bb.0:
899; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
900; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
901; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
902; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
903; GCN-HSA-NEXT:    flat_load_sshort v2, v[2:3]
904; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
905; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
906; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
907; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
908; GCN-HSA-NEXT:    s_endpgm
909;
910; GCN-NOHSA-VI-LABEL: global_sextload_i16_to_i32:
911; GCN-NOHSA-VI:       ; %bb.0:
912; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
913; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
914; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
915; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
916; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
917; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
918; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s6
919; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s7
920; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s2
921; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, s3
922; GCN-NOHSA-VI-NEXT:    buffer_load_sshort v0, off, s[4:7], 0
923; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
924; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
925; GCN-NOHSA-VI-NEXT:    s_endpgm
926;
927; EG-LABEL: global_sextload_i16_to_i32:
928; EG:       ; %bb.0:
929; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
930; EG-NEXT:    TEX 0 @6
931; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
932; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
933; EG-NEXT:    CF_END
934; EG-NEXT:    PAD
935; EG-NEXT:    Fetch clause starting at 6:
936; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
937; EG-NEXT:    ALU clause starting at 8:
938; EG-NEXT:     MOV * T0.X, KC0[2].Z,
939; EG-NEXT:    ALU clause starting at 9:
940; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
941; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
942; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
943;
944; CM-LABEL: global_sextload_i16_to_i32:
945; CM:       ; %bb.0:
946; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
947; CM-NEXT:    TEX 0 @6
948; CM-NEXT:    ALU 3, @9, KC0[CB0:0-32], KC1[]
949; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
950; CM-NEXT:    CF_END
951; CM-NEXT:    PAD
952; CM-NEXT:    Fetch clause starting at 6:
953; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
954; CM-NEXT:    ALU clause starting at 8:
955; CM-NEXT:     MOV * T0.X, KC0[2].Z,
956; CM-NEXT:    ALU clause starting at 9:
957; CM-NEXT:     BFE_INT * T0.X, T0.X, 0.0, literal.x,
958; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
959; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
960; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
961  %a = load i16, i16 addrspace(1)* %in
962  %ext = sext i16 %a to i32
963  store i32 %ext, i32 addrspace(1)* %out
964  ret void
965}
966
967define amdgpu_kernel void @global_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
968; GCN-NOHSA-SI-LABEL: global_zextload_v1i16_to_v1i32:
969; GCN-NOHSA-SI:       ; %bb.0:
970; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
971; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
972; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
973; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
974; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
975; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
976; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
977; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
978; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
979; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
980; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
981; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
982; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
983; GCN-NOHSA-SI-NEXT:    s_endpgm
984;
985; GCN-HSA-LABEL: global_zextload_v1i16_to_v1i32:
986; GCN-HSA:       ; %bb.0:
987; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
988; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
989; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
990; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
991; GCN-HSA-NEXT:    flat_load_ushort v2, v[2:3]
992; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
993; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
994; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
995; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
996; GCN-HSA-NEXT:    s_endpgm
997;
998; GCN-NOHSA-VI-LABEL: global_zextload_v1i16_to_v1i32:
999; GCN-NOHSA-VI:       ; %bb.0:
1000; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1001; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1002; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1003; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1004; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
1005; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
1006; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s6
1007; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s7
1008; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s2
1009; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, s3
1010; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
1011; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
1012; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1013; GCN-NOHSA-VI-NEXT:    s_endpgm
1014;
1015; EG-LABEL: global_zextload_v1i16_to_v1i32:
1016; EG:       ; %bb.0:
1017; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1018; EG-NEXT:    TEX 0 @6
1019; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
1020; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
1021; EG-NEXT:    CF_END
1022; EG-NEXT:    PAD
1023; EG-NEXT:    Fetch clause starting at 6:
1024; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
1025; EG-NEXT:    ALU clause starting at 8:
1026; EG-NEXT:     MOV * T0.X, KC0[2].Z,
1027; EG-NEXT:    ALU clause starting at 9:
1028; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
1029; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1030;
1031; CM-LABEL: global_zextload_v1i16_to_v1i32:
1032; CM:       ; %bb.0:
1033; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1034; CM-NEXT:    TEX 0 @6
1035; CM-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
1036; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
1037; CM-NEXT:    CF_END
1038; CM-NEXT:    PAD
1039; CM-NEXT:    Fetch clause starting at 6:
1040; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
1041; CM-NEXT:    ALU clause starting at 8:
1042; CM-NEXT:     MOV * T0.X, KC0[2].Z,
1043; CM-NEXT:    ALU clause starting at 9:
1044; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
1045; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1046  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
1047  %ext = zext <1 x i16> %load to <1 x i32>
1048  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
1049  ret void
1050}
1051
1052define amdgpu_kernel void @global_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
1053; GCN-NOHSA-SI-LABEL: global_sextload_v1i16_to_v1i32:
1054; GCN-NOHSA-SI:       ; %bb.0:
1055; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1056; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
1057; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
1058; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
1059; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
1060; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1061; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
1062; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
1063; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
1064; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
1065; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
1066; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
1067; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1068; GCN-NOHSA-SI-NEXT:    s_endpgm
1069;
1070; GCN-HSA-LABEL: global_sextload_v1i16_to_v1i32:
1071; GCN-HSA:       ; %bb.0:
1072; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1073; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1074; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
1075; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
1076; GCN-HSA-NEXT:    flat_load_sshort v2, v[2:3]
1077; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
1078; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
1079; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
1080; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
1081; GCN-HSA-NEXT:    s_endpgm
1082;
1083; GCN-NOHSA-VI-LABEL: global_sextload_v1i16_to_v1i32:
1084; GCN-NOHSA-VI:       ; %bb.0:
1085; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1086; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1087; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1088; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1089; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
1090; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
1091; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s6
1092; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s7
1093; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s2
1094; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, s3
1095; GCN-NOHSA-VI-NEXT:    buffer_load_sshort v0, off, s[4:7], 0
1096; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
1097; GCN-NOHSA-VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1098; GCN-NOHSA-VI-NEXT:    s_endpgm
1099;
1100; EG-LABEL: global_sextload_v1i16_to_v1i32:
1101; EG:       ; %bb.0:
1102; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1103; EG-NEXT:    TEX 0 @6
1104; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
1105; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
1106; EG-NEXT:    CF_END
1107; EG-NEXT:    PAD
1108; EG-NEXT:    Fetch clause starting at 6:
1109; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
1110; EG-NEXT:    ALU clause starting at 8:
1111; EG-NEXT:     MOV * T0.X, KC0[2].Z,
1112; EG-NEXT:    ALU clause starting at 9:
1113; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
1114; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
1115; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
1116;
1117; CM-LABEL: global_sextload_v1i16_to_v1i32:
1118; CM:       ; %bb.0:
1119; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1120; CM-NEXT:    TEX 0 @6
1121; CM-NEXT:    ALU 3, @9, KC0[CB0:0-32], KC1[]
1122; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
1123; CM-NEXT:    CF_END
1124; CM-NEXT:    PAD
1125; CM-NEXT:    Fetch clause starting at 6:
1126; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
1127; CM-NEXT:    ALU clause starting at 8:
1128; CM-NEXT:     MOV * T0.X, KC0[2].Z,
1129; CM-NEXT:    ALU clause starting at 9:
1130; CM-NEXT:     BFE_INT * T0.X, T0.X, 0.0, literal.x,
1131; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1132; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
1133; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1134  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
1135  %ext = sext <1 x i16> %load to <1 x i32>
1136  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
1137  ret void
1138}
1139
1140define amdgpu_kernel void @global_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
1141; GCN-NOHSA-SI-LABEL: global_zextload_v2i16_to_v2i32:
1142; GCN-NOHSA-SI:       ; %bb.0:
1143; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1144; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
1145; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
1146; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
1147; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
1148; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1149; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
1150; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
1151; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1152; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
1153; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
1154; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
1155; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
1156; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1157; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1158; GCN-NOHSA-SI-NEXT:    s_endpgm
1159;
1160; GCN-HSA-LABEL: global_zextload_v2i16_to_v2i32:
1161; GCN-HSA:       ; %bb.0:
1162; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1163; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1164; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1165; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1166; GCN-HSA-NEXT:    flat_load_dword v2, v[0:1]
1167; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
1168; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
1169; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
1170; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
1171; GCN-HSA-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1172; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
1173; GCN-HSA-NEXT:    s_endpgm
1174;
1175; GCN-NOHSA-VI-LABEL: global_zextload_v2i16_to_v2i32:
1176; GCN-NOHSA-VI:       ; %bb.0:
1177; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1178; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1179; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1180; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
1181; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
1182; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1183; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
1184; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
1185; GCN-NOHSA-VI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1186; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
1187; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
1188; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
1189; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
1190; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1191; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1192; GCN-NOHSA-VI-NEXT:    s_endpgm
1193;
1194; EG-LABEL: global_zextload_v2i16_to_v2i32:
1195; EG:       ; %bb.0:
1196; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1197; EG-NEXT:    TEX 0 @6
1198; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
1199; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1
1200; EG-NEXT:    CF_END
1201; EG-NEXT:    PAD
1202; EG-NEXT:    Fetch clause starting at 6:
1203; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
1204; EG-NEXT:    ALU clause starting at 8:
1205; EG-NEXT:     MOV * T4.X, KC0[2].Z,
1206; EG-NEXT:    ALU clause starting at 9:
1207; EG-NEXT:     LSHR * T4.Y, T4.X, literal.x,
1208; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1209; EG-NEXT:     AND_INT T4.X, T4.X, literal.x,
1210; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
1211; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
1212;
1213; CM-LABEL: global_zextload_v2i16_to_v2i32:
1214; CM:       ; %bb.0:
1215; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1216; CM-NEXT:    TEX 0 @6
1217; CM-NEXT:    ALU 5, @9, KC0[CB0:0-32], KC1[]
1218; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T4, T5.X
1219; CM-NEXT:    CF_END
1220; CM-NEXT:    PAD
1221; CM-NEXT:    Fetch clause starting at 6:
1222; CM-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
1223; CM-NEXT:    ALU clause starting at 8:
1224; CM-NEXT:     MOV * T4.X, KC0[2].Z,
1225; CM-NEXT:    ALU clause starting at 9:
1226; CM-NEXT:     LSHR * T4.Y, T4.X, literal.x,
1227; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1228; CM-NEXT:     AND_INT * T4.X, T4.X, literal.x,
1229; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1230; CM-NEXT:     LSHR * T5.X, KC0[2].Y, literal.x,
1231; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1232  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
1233  %ext = zext <2 x i16> %load to <2 x i32>
1234  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
1235  ret void
1236}
1237
1238; TODO: This should use ASHR instead of LSHR + BFE
1239define amdgpu_kernel void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
1240; GCN-NOHSA-SI-LABEL: global_sextload_v2i16_to_v2i32:
1241; GCN-NOHSA-SI:       ; %bb.0:
1242; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1243; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
1244; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
1245; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
1246; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
1247; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1248; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
1249; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
1250; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1251; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
1252; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
1253; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
1254; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 16, v0
1255; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v0, 0, 16
1256; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1257; GCN-NOHSA-SI-NEXT:    s_endpgm
1258;
1259; GCN-HSA-LABEL: global_sextload_v2i16_to_v2i32:
1260; GCN-HSA:       ; %bb.0:
1261; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1262; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1263; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1264; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1265; GCN-HSA-NEXT:    flat_load_dword v2, v[0:1]
1266; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
1267; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
1268; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
1269; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v2
1270; GCN-HSA-NEXT:    v_bfe_i32 v2, v2, 0, 16
1271; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
1272; GCN-HSA-NEXT:    s_endpgm
1273;
1274; GCN-NOHSA-VI-LABEL: global_sextload_v2i16_to_v2i32:
1275; GCN-NOHSA-VI:       ; %bb.0:
1276; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1277; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1278; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1279; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
1280; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
1281; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1282; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
1283; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
1284; GCN-NOHSA-VI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1285; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
1286; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
1287; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
1288; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 16, v0
1289; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v0, 0, 16
1290; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1291; GCN-NOHSA-VI-NEXT:    s_endpgm
1292;
1293; EG-LABEL: global_sextload_v2i16_to_v2i32:
1294; EG:       ; %bb.0:
1295; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1296; EG-NEXT:    TEX 0 @6
1297; EG-NEXT:    ALU 5, @9, KC0[CB0:0-32], KC1[]
1298; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1
1299; EG-NEXT:    CF_END
1300; EG-NEXT:    PAD
1301; EG-NEXT:    Fetch clause starting at 6:
1302; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
1303; EG-NEXT:    ALU clause starting at 8:
1304; EG-NEXT:     MOV * T4.X, KC0[2].Z,
1305; EG-NEXT:    ALU clause starting at 9:
1306; EG-NEXT:     BFE_INT T5.X, T4.X, 0.0, literal.x,
1307; EG-NEXT:     LSHR T0.W, T4.X, literal.x,
1308; EG-NEXT:     LSHR * T4.X, KC0[2].Y, literal.y,
1309; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
1310; EG-NEXT:     BFE_INT * T5.Y, PV.W, 0.0, literal.x,
1311; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1312;
1313; CM-LABEL: global_sextload_v2i16_to_v2i32:
1314; CM:       ; %bb.0:
1315; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1316; CM-NEXT:    TEX 0 @6
1317; CM-NEXT:    ALU 5, @9, KC0[CB0:0-32], KC1[]
1318; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T5, T4.X
1319; CM-NEXT:    CF_END
1320; CM-NEXT:    PAD
1321; CM-NEXT:    Fetch clause starting at 6:
1322; CM-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
1323; CM-NEXT:    ALU clause starting at 8:
1324; CM-NEXT:     MOV * T4.X, KC0[2].Z,
1325; CM-NEXT:    ALU clause starting at 9:
1326; CM-NEXT:     BFE_INT T5.X, T4.X, 0.0, literal.x,
1327; CM-NEXT:     LSHR * T0.W, T4.X, literal.x,
1328; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1329; CM-NEXT:     LSHR T4.X, KC0[2].Y, literal.x,
1330; CM-NEXT:     BFE_INT * T5.Y, PV.W, 0.0, literal.y,
1331; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1332  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
1333  %ext = sext <2 x i16> %load to <2 x i32>
1334  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
1335  ret void
1336}
1337
1338define amdgpu_kernel void @global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
1339; GCN-NOHSA-SI-LABEL: global_zextload_v3i16_to_v3i32:
1340; GCN-NOHSA-SI:       ; %bb.0: ; %entry
1341; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1342; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
1343; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
1344; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
1345; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
1346; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1347; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
1348; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
1349; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
1350; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, 0xffff
1351; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
1352; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
1353; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
1354; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
1355; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v2, s2, v0
1356; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, s2, v1
1357; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0 offset:8
1358; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[2:3], off, s[4:7], 0
1359; GCN-NOHSA-SI-NEXT:    s_endpgm
1360;
1361; GCN-HSA-LABEL: global_zextload_v3i16_to_v3i32:
1362; GCN-HSA:       ; %bb.0: ; %entry
1363; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1364; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1365; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1366; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1367; GCN-HSA-NEXT:    flat_load_dwordx2 v[3:4], v[0:1]
1368; GCN-HSA-NEXT:    s_mov_b32 s2, 0xffff
1369; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s0
1370; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s1
1371; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
1372; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
1373; GCN-HSA-NEXT:    v_and_b32_e32 v2, s2, v4
1374; GCN-HSA-NEXT:    v_and_b32_e32 v0, s2, v3
1375; GCN-HSA-NEXT:    flat_store_dwordx3 v[5:6], v[0:2]
1376; GCN-HSA-NEXT:    s_endpgm
1377;
1378; GCN-NOHSA-VI-LABEL: global_zextload_v3i16_to_v3i32:
1379; GCN-NOHSA-VI:       ; %bb.0: ; %entry
1380; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1381; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1382; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1383; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
1384; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
1385; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1386; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
1387; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
1388; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
1389; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, 0xffff
1390; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
1391; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
1392; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
1393; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v2, s6, v1
1394; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
1395; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, s6, v0
1396; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
1397; GCN-NOHSA-VI-NEXT:    s_endpgm
1398;
1399; EG-LABEL: global_zextload_v3i16_to_v3i32:
1400; EG:       ; %bb.0: ; %entry
1401; EG-NEXT:    ALU 4, @12, KC0[CB0:0-32], KC1[]
1402; EG-NEXT:    TEX 2 @6
1403; EG-NEXT:    ALU 2, @17, KC0[], KC1[]
1404; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T2.X, T4.X, 0
1405; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T3.XY, T0.X, 1
1406; EG-NEXT:    CF_END
1407; EG-NEXT:    Fetch clause starting at 6:
1408; EG-NEXT:     VTX_READ_16 T2.X, T1.X, 4, #1
1409; EG-NEXT:     VTX_READ_16 T3.X, T1.X, 0, #1
1410; EG-NEXT:     VTX_READ_16 T1.X, T1.X, 2, #1
1411; EG-NEXT:    ALU clause starting at 12:
1412; EG-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
1413; EG-NEXT:     MOV * T1.X, KC0[2].Z,
1414; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1415; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
1416; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
1417; EG-NEXT:    ALU clause starting at 17:
1418; EG-NEXT:     LSHR T4.X, T0.W, literal.x,
1419; EG-NEXT:     MOV * T3.Y, T1.X,
1420; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1421;
1422; CM-LABEL: global_zextload_v3i16_to_v3i32:
1423; CM:       ; %bb.0: ; %entry
1424; CM-NEXT:    ALU 4, @12, KC0[CB0:0-32], KC1[]
1425; CM-NEXT:    TEX 2 @6
1426; CM-NEXT:    ALU 2, @17, KC0[CB0:0-32], KC1[]
1427; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T3, T4.X
1428; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T2.X, T0.X
1429; CM-NEXT:    CF_END
1430; CM-NEXT:    Fetch clause starting at 6:
1431; CM-NEXT:     VTX_READ_16 T2.X, T1.X, 4, #1
1432; CM-NEXT:     VTX_READ_16 T3.X, T1.X, 0, #1
1433; CM-NEXT:     VTX_READ_16 T1.X, T1.X, 2, #1
1434; CM-NEXT:    ALU clause starting at 12:
1435; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
1436; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
1437; CM-NEXT:     LSHR * T0.X, PV.W, literal.x,
1438; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1439; CM-NEXT:     MOV * T1.X, KC0[2].Z,
1440; CM-NEXT:    ALU clause starting at 17:
1441; CM-NEXT:     LSHR T4.X, KC0[2].Y, literal.x,
1442; CM-NEXT:     MOV * T3.Y, T1.X,
1443; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1444entry:
1445  %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
1446  %ext = zext <3 x i16> %ld to <3 x i32>
1447  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
1448  ret void
1449}
1450
1451define amdgpu_kernel void @global_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
1452; GCN-NOHSA-SI-LABEL: global_sextload_v3i16_to_v3i32:
1453; GCN-NOHSA-SI:       ; %bb.0: ; %entry
1454; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1455; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
1456; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
1457; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
1458; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
1459; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1460; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
1461; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
1462; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
1463; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
1464; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
1465; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
1466; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 16, v0
1467; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v0, 0, 16
1468; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v1, 0, 16
1469; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0 offset:8
1470; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[2:3], off, s[4:7], 0
1471; GCN-NOHSA-SI-NEXT:    s_endpgm
1472;
1473; GCN-HSA-LABEL: global_sextload_v3i16_to_v3i32:
1474; GCN-HSA:       ; %bb.0: ; %entry
1475; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1476; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1477; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1478; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1479; GCN-HSA-NEXT:    flat_load_dwordx2 v[3:4], v[0:1]
1480; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s0
1481; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s1
1482; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
1483; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v3
1484; GCN-HSA-NEXT:    v_bfe_i32 v2, v4, 0, 16
1485; GCN-HSA-NEXT:    v_bfe_i32 v0, v3, 0, 16
1486; GCN-HSA-NEXT:    flat_store_dwordx3 v[5:6], v[0:2]
1487; GCN-HSA-NEXT:    s_endpgm
1488;
1489; GCN-NOHSA-VI-LABEL: global_sextload_v3i16_to_v3i32:
1490; GCN-NOHSA-VI:       ; %bb.0: ; %entry
1491; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1492; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1493; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1494; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
1495; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
1496; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1497; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
1498; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
1499; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx2 v[3:4], off, s[8:11], 0
1500; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
1501; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
1502; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
1503; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 16, v3
1504; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v2, v4, 0, 16
1505; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v3, 0, 16
1506; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
1507; GCN-NOHSA-VI-NEXT:    s_endpgm
1508;
1509; EG-LABEL: global_sextload_v3i16_to_v3i32:
1510; EG:       ; %bb.0: ; %entry
1511; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
1512; EG-NEXT:    TEX 2 @6
1513; EG-NEXT:    ALU 9, @13, KC0[CB0:0-32], KC1[]
1514; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0
1515; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
1516; EG-NEXT:    CF_END
1517; EG-NEXT:    Fetch clause starting at 6:
1518; EG-NEXT:     VTX_READ_16 T1.X, T0.X, 2, #1
1519; EG-NEXT:     VTX_READ_16 T2.X, T0.X, 4, #1
1520; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
1521; EG-NEXT:    ALU clause starting at 12:
1522; EG-NEXT:     MOV * T0.X, KC0[2].Z,
1523; EG-NEXT:    ALU clause starting at 13:
1524; EG-NEXT:     BFE_INT * T0.Y, T1.X, 0.0, literal.x,
1525; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1526; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
1527; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
1528; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
1529; EG-NEXT:     BFE_INT T2.X, T2.X, 0.0, literal.x,
1530; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1531; EG-NEXT:    16(2.242078e-44), 8(1.121039e-44)
1532; EG-NEXT:     LSHR * T3.X, PV.W, literal.x,
1533; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1534;
1535; CM-LABEL: global_sextload_v3i16_to_v3i32:
1536; CM:       ; %bb.0: ; %entry
1537; CM-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
1538; CM-NEXT:    TEX 2 @6
1539; CM-NEXT:    ALU 9, @13, KC0[CB0:0-32], KC1[]
1540; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T2.X
1541; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T1.X, T3.X
1542; CM-NEXT:    CF_END
1543; CM-NEXT:    Fetch clause starting at 6:
1544; CM-NEXT:     VTX_READ_16 T1.X, T0.X, 4, #1
1545; CM-NEXT:     VTX_READ_16 T2.X, T0.X, 0, #1
1546; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 2, #1
1547; CM-NEXT:    ALU clause starting at 12:
1548; CM-NEXT:     MOV * T0.X, KC0[2].Z,
1549; CM-NEXT:    ALU clause starting at 13:
1550; CM-NEXT:     BFE_INT T1.X, T1.X, 0.0, literal.x,
1551; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1552; CM-NEXT:    16(2.242078e-44), 8(1.121039e-44)
1553; CM-NEXT:     LSHR T3.X, PV.W, literal.x,
1554; CM-NEXT:     BFE_INT * T0.Y, T0.X, 0.0, literal.y,
1555; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1556; CM-NEXT:     BFE_INT * T0.X, T2.X, 0.0, literal.x,
1557; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1558; CM-NEXT:     LSHR * T2.X, KC0[2].Y, literal.x,
1559; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1560entry:
1561  %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
1562  %ext = sext <3 x i16> %ld to <3 x i32>
1563  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
1564  ret void
1565}
1566
1567; TODO: This should use DST, but for some there are redundant MOVs
1568define amdgpu_kernel void @global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
1569; GCN-NOHSA-SI-LABEL: global_zextload_v4i16_to_v4i32:
1570; GCN-NOHSA-SI:       ; %bb.0:
1571; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1572; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
1573; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
1574; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
1575; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
1576; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1577; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
1578; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
1579; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx2 v[4:5], off, s[8:11], 0
1580; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, 0xffff
1581; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
1582; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
1583; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
1584; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v3, 16, v5
1585; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
1586; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v2, s2, v5
1587; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, s2, v4
1588; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1589; GCN-NOHSA-SI-NEXT:    s_endpgm
1590;
1591; GCN-HSA-LABEL: global_zextload_v4i16_to_v4i32:
1592; GCN-HSA:       ; %bb.0:
1593; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1594; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1595; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1596; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1597; GCN-HSA-NEXT:    flat_load_dwordx2 v[4:5], v[0:1]
1598; GCN-HSA-NEXT:    s_mov_b32 s2, 0xffff
1599; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s0
1600; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s1
1601; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
1602; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v5
1603; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
1604; GCN-HSA-NEXT:    v_and_b32_e32 v2, s2, v5
1605; GCN-HSA-NEXT:    v_and_b32_e32 v0, s2, v4
1606; GCN-HSA-NEXT:    flat_store_dwordx4 v[6:7], v[0:3]
1607; GCN-HSA-NEXT:    s_endpgm
1608;
1609; GCN-NOHSA-VI-LABEL: global_zextload_v4i16_to_v4i32:
1610; GCN-NOHSA-VI:       ; %bb.0:
1611; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1612; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1613; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1614; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
1615; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
1616; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1617; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
1618; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
1619; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
1620; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, 0xffff
1621; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
1622; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
1623; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
1624; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
1625; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v2, s6, v1
1626; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
1627; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, s6, v0
1628; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1629; GCN-NOHSA-VI-NEXT:    s_endpgm
1630;
1631; EG-LABEL: global_zextload_v4i16_to_v4i32:
1632; EG:       ; %bb.0:
1633; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1634; EG-NEXT:    TEX 0 @6
1635; EG-NEXT:    ALU 12, @9, KC0[CB0:0-32], KC1[]
1636; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
1637; EG-NEXT:    CF_END
1638; EG-NEXT:    PAD
1639; EG-NEXT:    Fetch clause starting at 6:
1640; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
1641; EG-NEXT:    ALU clause starting at 8:
1642; EG-NEXT:     MOV * T5.X, KC0[2].Z,
1643; EG-NEXT:    ALU clause starting at 9:
1644; EG-NEXT:     MOV T2.X, T5.X,
1645; EG-NEXT:     MOV * T3.X, T5.Y,
1646; EG-NEXT:     MOV T0.Y, PV.X,
1647; EG-NEXT:     MOV * T0.Z, PS,
1648; EG-NEXT:     LSHR * T5.W, PV.Z, literal.x,
1649; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1650; EG-NEXT:     AND_INT * T5.Z, T0.Z, literal.x,
1651; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1652; EG-NEXT:     LSHR * T5.Y, T0.Y, literal.x,
1653; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1654; EG-NEXT:     AND_INT T5.X, T0.Y, literal.x,
1655; EG-NEXT:     LSHR * T6.X, KC0[2].Y, literal.y,
1656; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
1657;
1658; CM-LABEL: global_zextload_v4i16_to_v4i32:
1659; CM:       ; %bb.0:
1660; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1661; CM-NEXT:    TEX 0 @6
1662; CM-NEXT:    ALU 13, @9, KC0[CB0:0-32], KC1[]
1663; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T5, T6.X
1664; CM-NEXT:    CF_END
1665; CM-NEXT:    PAD
1666; CM-NEXT:    Fetch clause starting at 6:
1667; CM-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
1668; CM-NEXT:    ALU clause starting at 8:
1669; CM-NEXT:     MOV * T5.X, KC0[2].Z,
1670; CM-NEXT:    ALU clause starting at 9:
1671; CM-NEXT:     MOV * T2.X, T5.X,
1672; CM-NEXT:     MOV T3.X, T5.Y,
1673; CM-NEXT:     MOV * T0.Y, PV.X,
1674; CM-NEXT:     MOV * T0.Z, PV.X,
1675; CM-NEXT:     LSHR * T5.W, PV.Z, literal.x,
1676; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1677; CM-NEXT:     AND_INT * T5.Z, T0.Z, literal.x,
1678; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1679; CM-NEXT:     LSHR * T5.Y, T0.Y, literal.x,
1680; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1681; CM-NEXT:     AND_INT * T5.X, T0.Y, literal.x,
1682; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1683; CM-NEXT:     LSHR * T6.X, KC0[2].Y, literal.x,
1684; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1685  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
1686  %ext = zext <4 x i16> %load to <4 x i32>
1687  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
1688  ret void
1689}
1690
1691; TODO: We should use ASHR instead of LSHR + BFE
1692; TODO: This should use DST, but for some there are redundant MOVs
1693define amdgpu_kernel void @global_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
1694; GCN-NOHSA-SI-LABEL: global_sextload_v4i16_to_v4i32:
1695; GCN-NOHSA-SI:       ; %bb.0:
1696; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1697; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
1698; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
1699; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
1700; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
1701; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1702; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
1703; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
1704; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx2 v[3:4], off, s[8:11], 0
1705; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
1706; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
1707; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
1708; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 16, v3
1709; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[5:6], v[3:4], 48
1710; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v4, 0, 16
1711; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v3, 0, 16
1712; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v5
1713; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1714; GCN-NOHSA-SI-NEXT:    s_endpgm
1715;
1716; GCN-HSA-LABEL: global_sextload_v4i16_to_v4i32:
1717; GCN-HSA:       ; %bb.0:
1718; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1719; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1720; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1721; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1722; GCN-HSA-NEXT:    flat_load_dwordx2 v[3:4], v[0:1]
1723; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s0
1724; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s1
1725; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
1726; GCN-HSA-NEXT:    v_ashr_i64 v[7:8], v[3:4], 48
1727; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v3
1728; GCN-HSA-NEXT:    v_bfe_i32 v0, v3, 0, 16
1729; GCN-HSA-NEXT:    v_bfe_i32 v2, v4, 0, 16
1730; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v7
1731; GCN-HSA-NEXT:    flat_store_dwordx4 v[5:6], v[0:3]
1732; GCN-HSA-NEXT:    s_endpgm
1733;
1734; GCN-NOHSA-VI-LABEL: global_sextload_v4i16_to_v4i32:
1735; GCN-NOHSA-VI:       ; %bb.0:
1736; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1737; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1738; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1739; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
1740; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
1741; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1742; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
1743; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
1744; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx2 v[4:5], off, s[8:11], 0
1745; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
1746; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
1747; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
1748; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v3, 16, v5
1749; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 16, v4
1750; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v2, v5, 0, 16
1751; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v4, 0, 16
1752; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1753; GCN-NOHSA-VI-NEXT:    s_endpgm
1754;
1755; EG-LABEL: global_sextload_v4i16_to_v4i32:
1756; EG:       ; %bb.0:
1757; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1758; EG-NEXT:    TEX 0 @6
1759; EG-NEXT:    ALU 14, @9, KC0[CB0:0-32], KC1[]
1760; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
1761; EG-NEXT:    CF_END
1762; EG-NEXT:    PAD
1763; EG-NEXT:    Fetch clause starting at 6:
1764; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
1765; EG-NEXT:    ALU clause starting at 8:
1766; EG-NEXT:     MOV * T5.X, KC0[2].Z,
1767; EG-NEXT:    ALU clause starting at 9:
1768; EG-NEXT:     MOV T2.X, T5.X,
1769; EG-NEXT:     MOV * T3.X, T5.Y,
1770; EG-NEXT:     MOV T0.Y, PV.X,
1771; EG-NEXT:     MOV * T0.Z, PS,
1772; EG-NEXT:     BFE_INT * T5.Z, PV.Z, 0.0, literal.x,
1773; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1774; EG-NEXT:     BFE_INT T5.X, T0.Y, 0.0, literal.x,
1775; EG-NEXT:     LSHR * T0.W, T0.Z, literal.x,
1776; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1777; EG-NEXT:     BFE_INT T5.W, PV.W, 0.0, literal.x,
1778; EG-NEXT:     LSHR * T0.W, T0.Y, literal.x,
1779; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1780; EG-NEXT:     LSHR T6.X, KC0[2].Y, literal.x,
1781; EG-NEXT:     BFE_INT * T5.Y, PS, 0.0, literal.y,
1782; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1783;
1784; CM-LABEL: global_sextload_v4i16_to_v4i32:
1785; CM:       ; %bb.0:
1786; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1787; CM-NEXT:    TEX 0 @6
1788; CM-NEXT:    ALU 14, @9, KC0[CB0:0-32], KC1[]
1789; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T5, T6.X
1790; CM-NEXT:    CF_END
1791; CM-NEXT:    PAD
1792; CM-NEXT:    Fetch clause starting at 6:
1793; CM-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
1794; CM-NEXT:    ALU clause starting at 8:
1795; CM-NEXT:     MOV * T5.X, KC0[2].Z,
1796; CM-NEXT:    ALU clause starting at 9:
1797; CM-NEXT:     MOV * T2.X, T5.X,
1798; CM-NEXT:     MOV T3.X, T5.Y,
1799; CM-NEXT:     MOV * T0.Y, PV.X,
1800; CM-NEXT:     MOV * T0.Z, PV.X,
1801; CM-NEXT:     BFE_INT * T5.Z, PV.Z, 0.0, literal.x,
1802; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1803; CM-NEXT:     BFE_INT T5.X, T0.Y, 0.0, literal.x,
1804; CM-NEXT:     LSHR * T0.W, T0.Z, literal.x,
1805; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1806; CM-NEXT:     LSHR T0.Z, T0.Y, literal.x,
1807; CM-NEXT:     BFE_INT * T5.W, PV.W, 0.0, literal.x,
1808; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1809; CM-NEXT:     LSHR T6.X, KC0[2].Y, literal.x,
1810; CM-NEXT:     BFE_INT * T5.Y, PV.Z, 0.0, literal.y,
1811; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1812  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
1813  %ext = sext <4 x i16> %load to <4 x i32>
1814  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
1815  ret void
1816}
1817
1818; TODO: These should use LSHR instead of BFE_UINT
1819define amdgpu_kernel void @global_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
1820; GCN-NOHSA-SI-LABEL: global_zextload_v8i16_to_v8i32:
1821; GCN-NOHSA-SI:       ; %bb.0:
1822; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1823; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
1824; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
1825; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
1826; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
1827; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1828; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
1829; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
1830; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
1831; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, 0xffff
1832; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
1833; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
1834; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
1835; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
1836; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
1837; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v11, 16, v3
1838; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
1839; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v6, s2, v1
1840; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v4, s2, v0
1841; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v10, s2, v3
1842; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v8, s2, v2
1843; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16
1844; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
1845; GCN-NOHSA-SI-NEXT:    s_endpgm
1846;
1847; GCN-HSA-LABEL: global_zextload_v8i16_to_v8i32:
1848; GCN-HSA:       ; %bb.0:
1849; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
1850; GCN-HSA-NEXT:    s_mov_b32 s4, 0xffff
1851; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1852; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1853; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1854; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
1855; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
1856; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1857; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s3
1858; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s1
1859; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s2
1860; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s0
1861; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
1862; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v11, 16, v3
1863; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
1864; GCN-HSA-NEXT:    v_and_b32_e32 v10, s4, v3
1865; GCN-HSA-NEXT:    v_and_b32_e32 v8, s4, v2
1866; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
1867; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
1868; GCN-HSA-NEXT:    v_and_b32_e32 v6, s4, v1
1869; GCN-HSA-NEXT:    v_and_b32_e32 v4, s4, v0
1870; GCN-HSA-NEXT:    flat_store_dwordx4 v[14:15], v[8:11]
1871; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[4:7]
1872; GCN-HSA-NEXT:    s_endpgm
1873;
1874; GCN-NOHSA-VI-LABEL: global_zextload_v8i16_to_v8i32:
1875; GCN-NOHSA-VI:       ; %bb.0:
1876; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
1877; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
1878; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
1879; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
1880; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
1881; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1882; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
1883; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
1884; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
1885; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, 0xffff
1886; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
1887; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
1888; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
1889; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v11, 16, v3
1890; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v10, s6, v3
1891; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
1892; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v8, s6, v2
1893; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
1894; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v6, s6, v1
1895; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
1896; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v4, s6, v0
1897; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16
1898; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
1899; GCN-NOHSA-VI-NEXT:    s_endpgm
1900;
1901; EG-LABEL: global_zextload_v8i16_to_v8i32:
1902; EG:       ; %bb.0:
1903; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1904; EG-NEXT:    TEX 0 @6
1905; EG-NEXT:    ALU 17, @9, KC0[CB0:0-32], KC1[]
1906; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
1907; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
1908; EG-NEXT:    CF_END
1909; EG-NEXT:    Fetch clause starting at 6:
1910; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
1911; EG-NEXT:    ALU clause starting at 8:
1912; EG-NEXT:     MOV * T7.X, KC0[2].Z,
1913; EG-NEXT:    ALU clause starting at 9:
1914; EG-NEXT:     LSHR * T8.W, T7.Y, literal.x,
1915; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1916; EG-NEXT:     AND_INT * T8.Z, T7.Y, literal.x,
1917; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1918; EG-NEXT:     LSHR T8.Y, T7.X, literal.x,
1919; EG-NEXT:     LSHR * T9.W, T7.W, literal.x,
1920; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1921; EG-NEXT:     AND_INT T8.X, T7.X, literal.x,
1922; EG-NEXT:     AND_INT T9.Z, T7.W, literal.x,
1923; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.y,
1924; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
1925; EG-NEXT:     LSHR * T9.Y, T7.Z, literal.x,
1926; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1927; EG-NEXT:     AND_INT T9.X, T7.Z, literal.x,
1928; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1929; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
1930; EG-NEXT:     LSHR * T10.X, PV.W, literal.x,
1931; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1932;
1933; CM-LABEL: global_zextload_v8i16_to_v8i32:
1934; CM:       ; %bb.0:
1935; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1936; CM-NEXT:    TEX 0 @6
1937; CM-NEXT:    ALU 17, @9, KC0[CB0:0-32], KC1[]
1938; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T7, T10.X
1939; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T8, T9.X
1940; CM-NEXT:    CF_END
1941; CM-NEXT:    Fetch clause starting at 6:
1942; CM-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
1943; CM-NEXT:    ALU clause starting at 8:
1944; CM-NEXT:     MOV * T7.X, KC0[2].Z,
1945; CM-NEXT:    ALU clause starting at 9:
1946; CM-NEXT:     LSHR * T8.W, T7.W, literal.x,
1947; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1948; CM-NEXT:     AND_INT * T8.Z, T7.W, literal.x,
1949; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1950; CM-NEXT:     LSHR T8.Y, T7.Z, literal.x,
1951; CM-NEXT:     LSHR * T7.W, T7.Y, literal.x,
1952; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1953; CM-NEXT:     AND_INT T8.X, T7.Z, literal.x,
1954; CM-NEXT:     AND_INT T7.Z, T7.Y, literal.x,
1955; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1956; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
1957; CM-NEXT:     LSHR T9.X, PV.W, literal.x,
1958; CM-NEXT:     LSHR * T7.Y, T7.X, literal.y,
1959; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1960; CM-NEXT:     AND_INT * T7.X, T7.X, literal.x,
1961; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1962; CM-NEXT:     LSHR * T10.X, KC0[2].Y, literal.x,
1963; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1964  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
1965  %ext = zext <8 x i16> %load to <8 x i32>
1966  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
1967  ret void
1968}
1969
1970; TODO: These should use ASHR instead of LSHR + BFE_INT
1971define amdgpu_kernel void @global_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
1972; GCN-NOHSA-SI-LABEL: global_sextload_v8i16_to_v8i32:
1973; GCN-NOHSA-SI:       ; %bb.0:
1974; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1975; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
1976; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
1977; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
1978; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
1979; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1980; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
1981; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
1982; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
1983; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
1984; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
1985; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
1986; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v7, 16, v1
1987; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v5, 16, v0
1988; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v6, v1, 0, 16
1989; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v4, v0, 0, 16
1990; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v11, 16, v3
1991; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v9, 16, v2
1992; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v10, v3, 0, 16
1993; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v8, v2, 0, 16
1994; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16
1995; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
1996; GCN-NOHSA-SI-NEXT:    s_endpgm
1997;
1998; GCN-HSA-LABEL: global_sextload_v8i16_to_v8i32:
1999; GCN-HSA:       ; %bb.0:
2000; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
2001; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2002; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
2003; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
2004; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2005; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
2006; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2007; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s3
2008; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s1
2009; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s2
2010; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s0
2011; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
2012; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v11, 16, v3
2013; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 16, v2
2014; GCN-HSA-NEXT:    v_bfe_i32 v10, v3, 0, 16
2015; GCN-HSA-NEXT:    v_bfe_i32 v8, v2, 0, 16
2016; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v7, 16, v1
2017; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v5, 16, v0
2018; GCN-HSA-NEXT:    v_bfe_i32 v6, v1, 0, 16
2019; GCN-HSA-NEXT:    v_bfe_i32 v4, v0, 0, 16
2020; GCN-HSA-NEXT:    flat_store_dwordx4 v[14:15], v[8:11]
2021; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[4:7]
2022; GCN-HSA-NEXT:    s_endpgm
2023;
2024; GCN-NOHSA-VI-LABEL: global_sextload_v8i16_to_v8i32:
2025; GCN-NOHSA-VI:       ; %bb.0:
2026; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
2027; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
2028; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
2029; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
2030; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
2031; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2032; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
2033; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
2034; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
2035; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
2036; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
2037; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
2038; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v11, 16, v3
2039; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v9, 16, v2
2040; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v10, v3, 0, 16
2041; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v8, v2, 0, 16
2042; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v7, 16, v1
2043; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v5, 16, v0
2044; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v6, v1, 0, 16
2045; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v4, v0, 0, 16
2046; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16
2047; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
2048; GCN-NOHSA-VI-NEXT:    s_endpgm
2049;
2050; EG-LABEL: global_sextload_v8i16_to_v8i32:
2051; EG:       ; %bb.0:
2052; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
2053; EG-NEXT:    TEX 0 @6
2054; EG-NEXT:    ALU 19, @9, KC0[CB0:0-32], KC1[]
2055; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
2056; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
2057; EG-NEXT:    CF_END
2058; EG-NEXT:    Fetch clause starting at 6:
2059; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
2060; EG-NEXT:    ALU clause starting at 8:
2061; EG-NEXT:     MOV * T7.X, KC0[2].Z,
2062; EG-NEXT:    ALU clause starting at 9:
2063; EG-NEXT:     BFE_INT * T8.Z, T7.Y, 0.0, literal.x,
2064; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2065; EG-NEXT:     BFE_INT T8.X, T7.X, 0.0, literal.x,
2066; EG-NEXT:     BFE_INT T9.Z, T7.W, 0.0, literal.x,
2067; EG-NEXT:     LSHR * T0.W, T7.Y, literal.x,
2068; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2069; EG-NEXT:     BFE_INT T9.X, T7.Z, 0.0, literal.x,
2070; EG-NEXT:     LSHR T0.Z, T7.W, literal.x,
2071; EG-NEXT:     BFE_INT T8.W, PV.W, 0.0, literal.x,
2072; EG-NEXT:     LSHR * T0.W, T7.X, literal.x,
2073; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2074; EG-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
2075; EG-NEXT:     BFE_INT T8.Y, PS, 0.0, literal.y,
2076; EG-NEXT:     LSHR T1.Z, T7.Z, literal.y,
2077; EG-NEXT:     BFE_INT T9.W, PV.Z, 0.0, literal.y,
2078; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2079; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2080; EG-NEXT:     LSHR T10.X, PS, literal.x,
2081; EG-NEXT:     BFE_INT * T9.Y, PV.Z, 0.0, literal.y,
2082; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2083;
2084; CM-LABEL: global_sextload_v8i16_to_v8i32:
2085; CM:       ; %bb.0:
2086; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
2087; CM-NEXT:    TEX 0 @6
2088; CM-NEXT:    ALU 19, @9, KC0[CB0:0-32], KC1[]
2089; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T9, T7.X
2090; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T8, T10.X
2091; CM-NEXT:    CF_END
2092; CM-NEXT:    Fetch clause starting at 6:
2093; CM-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
2094; CM-NEXT:    ALU clause starting at 8:
2095; CM-NEXT:     MOV * T7.X, KC0[2].Z,
2096; CM-NEXT:    ALU clause starting at 9:
2097; CM-NEXT:     BFE_INT * T8.Z, T7.W, 0.0, literal.x,
2098; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2099; CM-NEXT:     BFE_INT T8.X, T7.Z, 0.0, literal.x,
2100; CM-NEXT:     LSHR T0.Y, T7.Y, literal.x,
2101; CM-NEXT:     BFE_INT T9.Z, T7.Y, 0.0, literal.x,
2102; CM-NEXT:     LSHR * T0.W, T7.W, literal.x,
2103; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2104; CM-NEXT:     BFE_INT T9.X, T7.X, 0.0, literal.x,
2105; CM-NEXT:     LSHR T1.Y, T7.Z, literal.x,
2106; CM-NEXT:     ADD_INT T0.Z, KC0[2].Y, literal.x,
2107; CM-NEXT:     BFE_INT * T8.W, PV.W, 0.0, literal.x,
2108; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2109; CM-NEXT:     LSHR T10.X, PV.Z, literal.x,
2110; CM-NEXT:     BFE_INT T8.Y, PV.Y, 0.0, literal.y,
2111; CM-NEXT:     LSHR T0.Z, T7.X, literal.y,
2112; CM-NEXT:     BFE_INT * T9.W, T0.Y, 0.0, literal.y,
2113; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2114; CM-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
2115; CM-NEXT:     BFE_INT * T9.Y, PV.Z, 0.0, literal.y,
2116; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2117  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
2118  %ext = sext <8 x i16> %load to <8 x i32>
2119  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
2120  ret void
2121}
2122
2123define amdgpu_kernel void @global_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
2124; GCN-NOHSA-SI-LABEL: global_zextload_v16i16_to_v16i32:
2125; GCN-NOHSA-SI:       ; %bb.0:
2126; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
2127; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
2128; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
2129; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
2130; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
2131; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2132; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
2133; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
2134; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
2135; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, 0xffff
2136; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
2137; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
2138; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
2139; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(1)
2140; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v11, 16, v1
2141; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
2142; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v15, 16, v3
2143; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v13, 16, v2
2144; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
2145; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v19, 16, v5
2146; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v17, 16, v4
2147; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v10, s2, v1
2148; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v8, s2, v0
2149; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v14, s2, v3
2150; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v12, s2, v2
2151; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v3, 16, v7
2152; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v6
2153; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v18, s2, v5
2154; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v16, s2, v4
2155; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v2, s2, v7
2156; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, s2, v6
2157; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:48
2158; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0 offset:32
2159; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0 offset:16
2160; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
2161; GCN-NOHSA-SI-NEXT:    s_endpgm
2162;
2163; GCN-HSA-LABEL: global_zextload_v16i16_to_v16i32:
2164; GCN-HSA:       ; %bb.0:
2165; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
2166; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2167; GCN-HSA-NEXT:    s_add_u32 s4, s2, 16
2168; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2169; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
2170; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
2171; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2172; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
2173; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2174; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
2175; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
2176; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2177; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s3
2178; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s2
2179; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
2180; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2181; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s1
2182; GCN-HSA-NEXT:    s_mov_b32 s4, 0xffff
2183; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s0
2184; GCN-HSA-NEXT:    s_add_u32 s0, s0, 32
2185; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
2186; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s3
2187; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s2
2188; GCN-HSA-NEXT:    s_waitcnt vmcnt(1)
2189; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v13, 16, v3
2190; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
2191; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v10, 16, v7
2192; GCN-HSA-NEXT:    v_and_b32_e32 v9, s4, v7
2193; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
2194; GCN-HSA-NEXT:    v_and_b32_e32 v7, s4, v6
2195; GCN-HSA-NEXT:    flat_store_dwordx4 v[11:12], v[7:10]
2196; GCN-HSA-NEXT:    v_and_b32_e32 v12, s4, v3
2197; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v9, 16, v1
2198; GCN-HSA-NEXT:    v_and_b32_e32 v8, s4, v1
2199; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v0
2200; GCN-HSA-NEXT:    v_and_b32_e32 v6, s4, v0
2201; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v11, 16, v2
2202; GCN-HSA-NEXT:    v_and_b32_e32 v10, s4, v2
2203; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v5
2204; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
2205; GCN-HSA-NEXT:    v_and_b32_e32 v2, s4, v5
2206; GCN-HSA-NEXT:    v_and_b32_e32 v0, s4, v4
2207; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
2208; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
2209; GCN-HSA-NEXT:    flat_store_dwordx4 v[14:15], v[0:3]
2210; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[10:13]
2211; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[6:9]
2212; GCN-HSA-NEXT:    s_endpgm
2213;
2214; GCN-NOHSA-VI-LABEL: global_zextload_v16i16_to_v16i32:
2215; GCN-NOHSA-VI:       ; %bb.0:
2216; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
2217; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
2218; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
2219; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
2220; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
2221; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2222; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
2223; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
2224; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
2225; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
2226; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, 0xffff
2227; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
2228; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
2229; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(1)
2230; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v11, 16, v1
2231; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
2232; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v19, 16, v7
2233; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v18, s6, v7
2234; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v17, 16, v6
2235; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v16, s6, v6
2236; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v10, s6, v1
2237; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
2238; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v8, s6, v0
2239; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v15, 16, v3
2240; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v14, s6, v3
2241; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v13, 16, v2
2242; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v12, s6, v2
2243; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v5
2244; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v2, s6, v5
2245; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
2246; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, s6, v4
2247; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48
2248; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2249; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16
2250; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0
2251; GCN-NOHSA-VI-NEXT:    s_endpgm
2252;
2253; EG-LABEL: global_zextload_v16i16_to_v16i32:
2254; EG:       ; %bb.0:
2255; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
2256; EG-NEXT:    TEX 1 @8
2257; EG-NEXT:    ALU 35, @13, KC0[CB0:0-32], KC1[]
2258; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0
2259; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T11.X, 0
2260; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
2261; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T12.X, 1
2262; EG-NEXT:    CF_END
2263; EG-NEXT:    Fetch clause starting at 8:
2264; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 0, #1
2265; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 16, #1
2266; EG-NEXT:    ALU clause starting at 12:
2267; EG-NEXT:     MOV * T11.X, KC0[2].Z,
2268; EG-NEXT:    ALU clause starting at 13:
2269; EG-NEXT:     LSHR * T13.W, T12.Y, literal.x,
2270; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2271; EG-NEXT:     AND_INT * T13.Z, T12.Y, literal.x,
2272; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2273; EG-NEXT:     LSHR T13.Y, T12.X, literal.x,
2274; EG-NEXT:     LSHR * T14.W, T12.W, literal.x,
2275; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2276; EG-NEXT:     AND_INT T13.X, T12.X, literal.x,
2277; EG-NEXT:     AND_INT T14.Z, T12.W, literal.x,
2278; EG-NEXT:     LSHR * T12.X, KC0[2].Y, literal.y,
2279; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
2280; EG-NEXT:     LSHR T14.Y, T12.Z, literal.x,
2281; EG-NEXT:     LSHR * T15.W, T11.Y, literal.x,
2282; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2283; EG-NEXT:     AND_INT T14.X, T12.Z, literal.x,
2284; EG-NEXT:     AND_INT T15.Z, T11.Y, literal.x,
2285; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2286; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
2287; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
2288; EG-NEXT:     LSHR T15.Y, T11.X, literal.y,
2289; EG-NEXT:     LSHR T17.W, T11.W, literal.y,
2290; EG-NEXT:     AND_INT * T15.X, T11.X, literal.z,
2291; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2292; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2293; EG-NEXT:     AND_INT T17.Z, T11.W, literal.x,
2294; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2295; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
2296; EG-NEXT:     LSHR T11.X, PV.W, literal.x,
2297; EG-NEXT:     LSHR T17.Y, T11.Z, literal.y,
2298; EG-NEXT:     AND_INT * T17.X, T11.Z, literal.z,
2299; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2300; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2301; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
2302; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
2303; EG-NEXT:     LSHR * T18.X, PV.W, literal.x,
2304; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2305;
2306; CM-LABEL: global_zextload_v16i16_to_v16i32:
2307; CM:       ; %bb.0:
2308; CM-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
2309; CM-NEXT:    TEX 1 @8
2310; CM-NEXT:    ALU 33, @13, KC0[CB0:0-32], KC1[]
2311; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T11, T18.X
2312; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T15, T17.X
2313; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T12, T16.X
2314; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T13, T14.X
2315; CM-NEXT:    CF_END
2316; CM-NEXT:    Fetch clause starting at 8:
2317; CM-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
2318; CM-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
2319; CM-NEXT:    ALU clause starting at 12:
2320; CM-NEXT:     MOV * T11.X, KC0[2].Z,
2321; CM-NEXT:    ALU clause starting at 13:
2322; CM-NEXT:     LSHR * T13.W, T12.W, literal.x,
2323; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2324; CM-NEXT:     AND_INT * T13.Z, T12.W, literal.x,
2325; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2326; CM-NEXT:     LSHR T13.Y, T12.Z, literal.x,
2327; CM-NEXT:     LSHR * T12.W, T12.Y, literal.x,
2328; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2329; CM-NEXT:     AND_INT T13.X, T12.Z, literal.x,
2330; CM-NEXT:     AND_INT T12.Z, T12.Y, literal.x,
2331; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2332; CM-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
2333; CM-NEXT:     LSHR T14.X, PV.W, literal.x,
2334; CM-NEXT:     LSHR T12.Y, T12.X, literal.y,
2335; CM-NEXT:     LSHR * T15.W, T11.W, literal.y,
2336; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2337; CM-NEXT:     AND_INT T12.X, T12.X, literal.x,
2338; CM-NEXT:     AND_INT T15.Z, T11.W, literal.x,
2339; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2340; CM-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
2341; CM-NEXT:     LSHR T16.X, PV.W, literal.x,
2342; CM-NEXT:     LSHR T15.Y, T11.Z, literal.y,
2343; CM-NEXT:     LSHR * T11.W, T11.Y, literal.y,
2344; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2345; CM-NEXT:     AND_INT T15.X, T11.Z, literal.x,
2346; CM-NEXT:     AND_INT T11.Z, T11.Y, literal.x,
2347; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2348; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
2349; CM-NEXT:     LSHR T17.X, PV.W, literal.x,
2350; CM-NEXT:     LSHR * T11.Y, T11.X, literal.y,
2351; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2352; CM-NEXT:     AND_INT * T11.X, T11.X, literal.x,
2353; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2354; CM-NEXT:     LSHR * T18.X, KC0[2].Y, literal.x,
2355; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2356  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
2357  %ext = zext <16 x i16> %load to <16 x i32>
2358  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
2359  ret void
2360}
2361
2362define amdgpu_kernel void @global_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
2363; GCN-NOHSA-SI-LABEL: global_sextload_v16i16_to_v16i32:
2364; GCN-NOHSA-SI:       ; %bb.0:
2365; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
2366; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
2367; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
2368; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
2369; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
2370; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2371; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
2372; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
2373; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
2374; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
2375; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
2376; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
2377; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(1)
2378; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v11, 16, v1
2379; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v9, 16, v0
2380; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v10, v1, 0, 16
2381; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v8, v0, 0, 16
2382; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v15, 16, v3
2383; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v13, 16, v2
2384; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v14, v3, 0, 16
2385; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v12, v2, 0, 16
2386; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
2387; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 16, v5
2388; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 16, v4
2389; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v5, 0, 16
2390; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v4, 0, 16
2391; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v19, 16, v7
2392; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v17, 16, v6
2393; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v18, v7, 0, 16
2394; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v16, v6, 0, 16
2395; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0 offset:48
2396; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:32
2397; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0 offset:16
2398; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
2399; GCN-NOHSA-SI-NEXT:    s_endpgm
2400;
2401; GCN-HSA-LABEL: global_sextload_v16i16_to_v16i32:
2402; GCN-HSA:       ; %bb.0:
2403; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
2404; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2405; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
2406; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
2407; GCN-HSA-NEXT:    s_add_u32 s2, s2, 16
2408; GCN-HSA-NEXT:    s_addc_u32 s3, s3, 0
2409; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2410; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2411; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2412; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
2413; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
2414; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2415; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s3
2416; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s2
2417; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
2418; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2419; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s1
2420; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s0
2421; GCN-HSA-NEXT:    s_add_u32 s0, s0, 32
2422; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
2423; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s3
2424; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s1
2425; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s2
2426; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s0
2427; GCN-HSA-NEXT:    s_waitcnt vmcnt(1)
2428; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 16, v0
2429; GCN-HSA-NEXT:    v_bfe_i32 v8, v0, 0, 16
2430; GCN-HSA-NEXT:    v_bfe_i32 v10, v1, 0, 16
2431; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v15, 16, v3
2432; GCN-HSA-NEXT:    v_bfe_i32 v14, v3, 0, 16
2433; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v13, 16, v2
2434; GCN-HSA-NEXT:    v_bfe_i32 v12, v2, 0, 16
2435; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v11, 16, v1
2436; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[12:15]
2437; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
2438; GCN-HSA-NEXT:    s_waitcnt vmcnt(2)
2439; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v5
2440; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v10, 16, v7
2441; GCN-HSA-NEXT:    v_bfe_i32 v9, v7, 0, 16
2442; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v8, 16, v6
2443; GCN-HSA-NEXT:    v_bfe_i32 v7, v6, 0, 16
2444; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v4
2445; GCN-HSA-NEXT:    v_bfe_i32 v2, v5, 0, 16
2446; GCN-HSA-NEXT:    v_bfe_i32 v0, v4, 0, 16
2447; GCN-HSA-NEXT:    flat_store_dwordx4 v[20:21], v[7:10]
2448; GCN-HSA-NEXT:    flat_store_dwordx4 v[22:23], v[0:3]
2449; GCN-HSA-NEXT:    s_endpgm
2450;
2451; GCN-NOHSA-VI-LABEL: global_sextload_v16i16_to_v16i32:
2452; GCN-NOHSA-VI:       ; %bb.0:
2453; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
2454; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
2455; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
2456; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
2457; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
2458; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2459; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
2460; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
2461; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
2462; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
2463; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
2464; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
2465; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(1)
2466; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v11, 16, v1
2467; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
2468; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v19, 16, v7
2469; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v17, 16, v6
2470; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v18, v7, 0, 16
2471; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v16, v6, 0, 16
2472; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v10, v1, 0, 16
2473; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v9, 16, v0
2474; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v8, v0, 0, 16
2475; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v15, 16, v3
2476; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v14, v3, 0, 16
2477; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v13, 16, v2
2478; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v12, v2, 0, 16
2479; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v3, 16, v5
2480; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 16, v4
2481; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v2, v5, 0, 16
2482; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v4, 0, 16
2483; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48
2484; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2485; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16
2486; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0
2487; GCN-NOHSA-VI-NEXT:    s_endpgm
2488;
2489; EG-LABEL: global_sextload_v16i16_to_v16i32:
2490; EG:       ; %bb.0:
2491; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
2492; EG-NEXT:    TEX 1 @8
2493; EG-NEXT:    ALU 39, @13, KC0[CB0:0-32], KC1[]
2494; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0
2495; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0
2496; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0
2497; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1
2498; EG-NEXT:    CF_END
2499; EG-NEXT:    Fetch clause starting at 8:
2500; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
2501; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
2502; EG-NEXT:    ALU clause starting at 12:
2503; EG-NEXT:     MOV * T11.X, KC0[2].Z,
2504; EG-NEXT:    ALU clause starting at 13:
2505; EG-NEXT:     LSHR T13.X, KC0[2].Y, literal.x,
2506; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2507; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2508; EG-NEXT:     LSHR T14.X, PV.W, literal.x,
2509; EG-NEXT:     BFE_INT * T15.Z, T11.Y, 0.0, literal.y,
2510; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2511; EG-NEXT:     BFE_INT T15.X, T11.X, 0.0, literal.x,
2512; EG-NEXT:     LSHR T0.Y, T12.W, literal.x,
2513; EG-NEXT:     BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212
2514; EG-NEXT:     LSHR T0.W, T12.Y, literal.x,
2515; EG-NEXT:     LSHR * T1.W, T11.Y, literal.x,
2516; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2517; EG-NEXT:     BFE_INT T16.X, T11.Z, 0.0, literal.x,
2518; EG-NEXT:     LSHR T1.Y, T11.W, literal.x,
2519; EG-NEXT:     BFE_INT T17.Z, T12.Y, 0.0, literal.x,
2520; EG-NEXT:     BFE_INT T15.W, PS, 0.0, literal.x,
2521; EG-NEXT:     LSHR * T1.W, T11.X, literal.x,
2522; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2523; EG-NEXT:     BFE_INT T17.X, T12.X, 0.0, literal.x,
2524; EG-NEXT:     BFE_INT T15.Y, PS, 0.0, literal.x,
2525; EG-NEXT:     BFE_INT T18.Z, T12.W, 0.0, literal.x,
2526; EG-NEXT:     BFE_INT T16.W, PV.Y, 0.0, literal.x,
2527; EG-NEXT:     LSHR * T1.W, T11.Z, literal.x,
2528; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2529; EG-NEXT:     BFE_INT T18.X, T12.Z, 0.0, literal.x,
2530; EG-NEXT:     BFE_INT T16.Y, PS, 0.0, literal.x,
2531; EG-NEXT:     LSHR T0.Z, T12.X, literal.x,
2532; EG-NEXT:     BFE_INT T17.W, T0.W, 0.0, literal.x,
2533; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2534; EG-NEXT:    16(2.242078e-44), 32(4.484155e-44)
2535; EG-NEXT:     LSHR T11.X, PS, literal.x,
2536; EG-NEXT:     BFE_INT T17.Y, PV.Z, 0.0, literal.y,
2537; EG-NEXT:     LSHR T0.Z, T12.Z, literal.y,
2538; EG-NEXT:     BFE_INT T18.W, T0.Y, 0.0, literal.y,
2539; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
2540; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2541; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
2542; EG-NEXT:     LSHR T12.X, PS, literal.x,
2543; EG-NEXT:     BFE_INT * T18.Y, PV.Z, 0.0, literal.y,
2544; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2545;
2546; CM-LABEL: global_sextload_v16i16_to_v16i32:
2547; CM:       ; %bb.0:
2548; CM-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
2549; CM-NEXT:    TEX 1 @8
2550; CM-NEXT:    ALU 40, @13, KC0[CB0:0-32], KC1[]
2551; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T17, T11.X
2552; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T12, T18.X
2553; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T16, T14.X
2554; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T15, T13.X
2555; CM-NEXT:    CF_END
2556; CM-NEXT:    Fetch clause starting at 8:
2557; CM-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
2558; CM-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
2559; CM-NEXT:    ALU clause starting at 12:
2560; CM-NEXT:     MOV * T11.X, KC0[2].Z,
2561; CM-NEXT:    ALU clause starting at 13:
2562; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
2563; CM-NEXT:    48(6.726233e-44), 0(0.000000e+00)
2564; CM-NEXT:     LSHR T13.X, PV.W, literal.x,
2565; CM-NEXT:     LSHR T0.Y, T11.Y, literal.y,
2566; CM-NEXT:     LSHR T0.Z, T11.Z, literal.y,
2567; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
2568; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2569; CM-NEXT:    32(4.484155e-44), 0(0.000000e+00)
2570; CM-NEXT:     LSHR T14.X, PV.W, literal.x,
2571; CM-NEXT:     LSHR T1.Y, T11.W, literal.y,
2572; CM-NEXT:     BFE_INT T15.Z, T12.W, 0.0, literal.y, BS:VEC_120/SCL_212
2573; CM-NEXT:     LSHR * T0.W, T12.X, literal.y,
2574; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2575; CM-NEXT:     BFE_INT T15.X, T12.Z, 0.0, literal.x,
2576; CM-NEXT:     LSHR T2.Y, T12.Y, literal.x,
2577; CM-NEXT:     BFE_INT T16.Z, T12.Y, 0.0, literal.x,
2578; CM-NEXT:     LSHR * T1.W, T12.W, literal.x,
2579; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2580; CM-NEXT:     BFE_INT T16.X, T12.X, 0.0, literal.x,
2581; CM-NEXT:     LSHR T3.Y, T12.Z, literal.x,
2582; CM-NEXT:     BFE_INT T12.Z, T11.W, 0.0, literal.x,
2583; CM-NEXT:     BFE_INT * T15.W, PV.W, 0.0, literal.x,
2584; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2585; CM-NEXT:     BFE_INT T12.X, T11.Z, 0.0, literal.x,
2586; CM-NEXT:     BFE_INT T15.Y, PV.Y, 0.0, literal.x,
2587; CM-NEXT:     BFE_INT T17.Z, T11.Y, 0.0, literal.x,
2588; CM-NEXT:     BFE_INT * T16.W, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212
2589; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2590; CM-NEXT:     BFE_INT T17.X, T11.X, 0.0, literal.x,
2591; CM-NEXT:     BFE_INT T16.Y, T0.W, 0.0, literal.x,
2592; CM-NEXT:     ADD_INT T1.Z, KC0[2].Y, literal.x,
2593; CM-NEXT:     BFE_INT * T12.W, T1.Y, 0.0, literal.x,
2594; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2595; CM-NEXT:     LSHR T18.X, PV.Z, literal.x,
2596; CM-NEXT:     BFE_INT T12.Y, T0.Z, 0.0, literal.y,
2597; CM-NEXT:     LSHR T0.Z, T11.X, literal.y,
2598; CM-NEXT:     BFE_INT * T17.W, T0.Y, 0.0, literal.y,
2599; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2600; CM-NEXT:     LSHR T11.X, KC0[2].Y, literal.x,
2601; CM-NEXT:     BFE_INT * T17.Y, PV.Z, 0.0, literal.y,
2602; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2603  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
2604  %ext = sext <16 x i16> %load to <16 x i32>
2605  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
2606  ret void
2607}
2608
2609define amdgpu_kernel void @global_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
2610; GCN-NOHSA-SI-LABEL: global_zextload_v32i16_to_v32i32:
2611; GCN-NOHSA-SI:       ; %bb.0:
2612; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
2613; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
2614; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
2615; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, 0xffff
2616; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s2
2617; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s3
2618; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2619; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
2620; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
2621; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
2622; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
2623; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
2624; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
2625; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(3)
2626; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v19, 16, v3
2627; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v17, 16, v2
2628; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v23, 16, v1
2629; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v21, 16, v0
2630; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(2)
2631; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v27, 16, v7
2632; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v25, 16, v6
2633; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v18, s0, v3
2634; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v16, s0, v2
2635; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v22, s0, v1
2636; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v20, s0, v0
2637; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v3, 16, v5
2638; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
2639; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v26, s0, v7
2640; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v24, s0, v6
2641; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v2, s0, v5
2642; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, s0, v4
2643; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(1)
2644; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v7, 16, v11
2645; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v10
2646; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v31, 16, v9
2647; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v29, 16, v8
2648; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v6, s0, v11
2649; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v4, s0, v10
2650; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v30, s0, v9
2651; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v28, s0, v8
2652; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
2653; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v11, 16, v15
2654; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v9, 16, v14
2655; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v35, 16, v13
2656; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v33, 16, v12
2657; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v10, s0, v15
2658; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v8, s0, v14
2659; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v34, s0, v13
2660; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v32, s0, v12
2661; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
2662; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
2663; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96
2664; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
2665; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64
2666; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80
2667; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2668; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:48
2669; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0
2670; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16
2671; GCN-NOHSA-SI-NEXT:    s_endpgm
2672;
2673; GCN-HSA-LABEL: global_zextload_v32i16_to_v32i32:
2674; GCN-HSA:       ; %bb.0:
2675; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
2676; GCN-HSA-NEXT:    s_mov_b32 s14, 0xffff
2677; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2678; GCN-HSA-NEXT:    s_add_u32 s4, s2, 16
2679; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
2680; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
2681; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
2682; GCN-HSA-NEXT:    s_add_u32 s4, s2, 32
2683; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
2684; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
2685; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
2686; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
2687; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2688; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
2689; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
2690; GCN-HSA-NEXT:    s_add_u32 s2, s2, 48
2691; GCN-HSA-NEXT:    s_addc_u32 s3, s3, 0
2692; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
2693; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
2694; GCN-HSA-NEXT:    flat_load_dwordx4 v[8:11], v[8:9]
2695; GCN-HSA-NEXT:    flat_load_dwordx4 v[12:15], v[12:13]
2696; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
2697; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2698; GCN-HSA-NEXT:    s_add_u32 s4, s0, 0x60
2699; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
2700; GCN-HSA-NEXT:    s_add_u32 s6, s0, 0x70
2701; GCN-HSA-NEXT:    s_addc_u32 s7, s1, 0
2702; GCN-HSA-NEXT:    s_add_u32 s8, s0, 64
2703; GCN-HSA-NEXT:    s_addc_u32 s9, s1, 0
2704; GCN-HSA-NEXT:    s_add_u32 s10, s0, 0x50
2705; GCN-HSA-NEXT:    s_addc_u32 s11, s1, 0
2706; GCN-HSA-NEXT:    s_add_u32 s12, s0, 32
2707; GCN-HSA-NEXT:    s_addc_u32 s13, s1, 0
2708; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s7
2709; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s6
2710; GCN-HSA-NEXT:    s_waitcnt vmcnt(3)
2711; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v19, 16, v1
2712; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
2713; GCN-HSA-NEXT:    v_and_b32_e32 v18, s14, v1
2714; GCN-HSA-NEXT:    v_and_b32_e32 v16, s14, v0
2715; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s12
2716; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s13
2717; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[16:19]
2718; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
2719; GCN-HSA-NEXT:    s_waitcnt vmcnt(3)
2720; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v19, 16, v5
2721; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v17, 16, v4
2722; GCN-HSA-NEXT:    v_and_b32_e32 v18, s14, v5
2723; GCN-HSA-NEXT:    v_and_b32_e32 v16, s14, v4
2724; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
2725; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s10
2726; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[16:19]
2727; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
2728; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v19, 16, v7
2729; GCN-HSA-NEXT:    v_and_b32_e32 v18, s14, v7
2730; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v17, 16, v6
2731; GCN-HSA-NEXT:    v_and_b32_e32 v16, s14, v6
2732; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s11
2733; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[16:19]
2734; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
2735; GCN-HSA-NEXT:    s_add_u32 s0, s0, 48
2736; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s3
2737; GCN-HSA-NEXT:    s_waitcnt vmcnt(3)
2738; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v13
2739; GCN-HSA-NEXT:    v_and_b32_e32 v6, s14, v13
2740; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v5, 16, v12
2741; GCN-HSA-NEXT:    v_and_b32_e32 v4, s14, v12
2742; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
2743; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
2744; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s2
2745; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v18, 16, v15
2746; GCN-HSA-NEXT:    v_and_b32_e32 v17, s14, v15
2747; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v16, 16, v14
2748; GCN-HSA-NEXT:    v_and_b32_e32 v15, s14, v14
2749; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
2750; GCN-HSA-NEXT:    flat_store_dwordx4 v[19:20], v[15:18]
2751; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
2752; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v18, 16, v9
2753; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v16, 16, v8
2754; GCN-HSA-NEXT:    v_and_b32_e32 v15, s14, v8
2755; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s1
2756; GCN-HSA-NEXT:    v_and_b32_e32 v17, s14, v9
2757; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
2758; GCN-HSA-NEXT:    v_and_b32_e32 v5, s14, v3
2759; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
2760; GCN-HSA-NEXT:    v_and_b32_e32 v3, s14, v2
2761; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v14, 16, v11
2762; GCN-HSA-NEXT:    v_and_b32_e32 v13, s14, v11
2763; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v12, 16, v10
2764; GCN-HSA-NEXT:    v_and_b32_e32 v11, s14, v10
2765; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s0
2766; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[15:18]
2767; GCN-HSA-NEXT:    flat_store_dwordx4 v[21:22], v[11:14]
2768; GCN-HSA-NEXT:    flat_store_dwordx4 v[7:8], v[3:6]
2769; GCN-HSA-NEXT:    s_endpgm
2770;
2771; GCN-NOHSA-VI-LABEL: global_zextload_v32i16_to_v32i32:
2772; GCN-NOHSA-VI:       ; %bb.0:
2773; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
2774; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
2775; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
2776; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
2777; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
2778; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2779; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
2780; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
2781; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
2782; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
2783; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
2784; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
2785; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, 0xffff
2786; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
2787; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(3)
2788; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v19, 16, v3
2789; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v18, s0, v3
2790; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v17, 16, v2
2791; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v16, s0, v2
2792; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
2793; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v2, s0, v1
2794; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
2795; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, s0, v0
2796; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(2)
2797; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v23, 16, v7
2798; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v22, s0, v7
2799; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v21, 16, v6
2800; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v20, s0, v6
2801; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v7, 16, v5
2802; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v6, s0, v5
2803; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
2804; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v4, s0, v4
2805; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(1)
2806; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v27, 16, v11
2807; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v26, s0, v11
2808; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v25, 16, v10
2809; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v24, s0, v10
2810; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v11, 16, v9
2811; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v10, s0, v9
2812; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v9, 16, v8
2813; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v8, s0, v8
2814; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
2815; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v31, 16, v15
2816; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v30, s0, v15
2817; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v29, 16, v14
2818; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v28, s0, v14
2819; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v15, 16, v13
2820; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v14, s0, v13
2821; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v13, 16, v12
2822; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v12, s0, v12
2823; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
2824; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:96
2825; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:112
2826; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:64
2827; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:80
2828; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
2829; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:48
2830; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2831; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16
2832; GCN-NOHSA-VI-NEXT:    s_endpgm
2833;
2834; EG-LABEL: global_zextload_v32i16_to_v32i32:
2835; EG:       ; %bb.0:
2836; EG-NEXT:    ALU 0, @20, KC0[CB0:0-32], KC1[]
2837; EG-NEXT:    TEX 3 @12
2838; EG-NEXT:    ALU 72, @21, KC0[CB0:0-32], KC1[]
2839; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T34.X, 0
2840; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T33.X, 0
2841; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T32.X, 0
2842; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T30.X, 0
2843; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T29.X, 0
2844; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T27.X, 0
2845; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T26.X, 0
2846; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T24.X, 1
2847; EG-NEXT:    CF_END
2848; EG-NEXT:    Fetch clause starting at 12:
2849; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 0, #1
2850; EG-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 48, #1
2851; EG-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 32, #1
2852; EG-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 16, #1
2853; EG-NEXT:    ALU clause starting at 20:
2854; EG-NEXT:     MOV * T19.X, KC0[2].Z,
2855; EG-NEXT:    ALU clause starting at 21:
2856; EG-NEXT:     LSHR * T23.W, T20.W, literal.x,
2857; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2858; EG-NEXT:     AND_INT * T23.Z, T20.W, literal.x,
2859; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2860; EG-NEXT:     LSHR T23.Y, T20.Z, literal.x,
2861; EG-NEXT:     LSHR * T20.W, T20.Y, literal.x,
2862; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2863; EG-NEXT:     AND_INT T23.X, T20.Z, literal.x,
2864; EG-NEXT:     AND_INT T20.Z, T20.Y, literal.x,
2865; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2866; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
2867; EG-NEXT:     LSHR T24.X, PV.W, literal.x,
2868; EG-NEXT:     LSHR T20.Y, T20.X, literal.y,
2869; EG-NEXT:     LSHR T25.W, T19.W, literal.y,
2870; EG-NEXT:     AND_INT * T20.X, T20.X, literal.z,
2871; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2872; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2873; EG-NEXT:     AND_INT * T25.Z, T19.W, literal.x,
2874; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2875; EG-NEXT:     LSHR T26.X, KC0[2].Y, literal.x,
2876; EG-NEXT:     LSHR T25.Y, T19.Z, literal.y,
2877; EG-NEXT:     LSHR T19.W, T19.Y, literal.y,
2878; EG-NEXT:     AND_INT * T25.X, T19.Z, literal.z,
2879; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2880; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2881; EG-NEXT:     AND_INT T19.Z, T19.Y, literal.x,
2882; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2883; EG-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
2884; EG-NEXT:     LSHR T27.X, PV.W, literal.x,
2885; EG-NEXT:     LSHR T19.Y, T19.X, literal.y,
2886; EG-NEXT:     LSHR T28.W, T22.W, literal.y,
2887; EG-NEXT:     AND_INT * T19.X, T19.X, literal.z,
2888; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2889; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2890; EG-NEXT:     AND_INT T28.Z, T22.W, literal.x,
2891; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2892; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
2893; EG-NEXT:     LSHR T29.X, PV.W, literal.x,
2894; EG-NEXT:     LSHR T28.Y, T22.Z, literal.y,
2895; EG-NEXT:     LSHR T22.W, T22.Y, literal.y,
2896; EG-NEXT:     AND_INT * T28.X, T22.Z, literal.z,
2897; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2898; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2899; EG-NEXT:     AND_INT T22.Z, T22.Y, literal.x,
2900; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2901; EG-NEXT:    65535(9.183409e-41), 80(1.121039e-43)
2902; EG-NEXT:     LSHR T30.X, PV.W, literal.x,
2903; EG-NEXT:     LSHR T22.Y, T22.X, literal.y,
2904; EG-NEXT:     LSHR T31.W, T21.W, literal.y,
2905; EG-NEXT:     AND_INT * T22.X, T22.X, literal.z,
2906; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2907; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2908; EG-NEXT:     AND_INT T31.Z, T21.W, literal.x,
2909; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2910; EG-NEXT:    65535(9.183409e-41), 64(8.968310e-44)
2911; EG-NEXT:     LSHR T32.X, PV.W, literal.x,
2912; EG-NEXT:     LSHR T31.Y, T21.Z, literal.y,
2913; EG-NEXT:     LSHR T21.W, T21.Y, literal.y,
2914; EG-NEXT:     AND_INT * T31.X, T21.Z, literal.z,
2915; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2916; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2917; EG-NEXT:     AND_INT T21.Z, T21.Y, literal.x,
2918; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2919; EG-NEXT:    65535(9.183409e-41), 112(1.569454e-43)
2920; EG-NEXT:     LSHR T33.X, PV.W, literal.x,
2921; EG-NEXT:     LSHR T21.Y, T21.X, literal.y,
2922; EG-NEXT:     AND_INT * T21.X, T21.X, literal.z,
2923; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2924; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2925; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
2926; EG-NEXT:    96(1.345247e-43), 0(0.000000e+00)
2927; EG-NEXT:     LSHR * T34.X, PV.W, literal.x,
2928; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2929;
2930; CM-LABEL: global_zextload_v32i16_to_v32i32:
2931; CM:       ; %bb.0:
2932; CM-NEXT:    ALU 0, @20, KC0[CB0:0-32], KC1[]
2933; CM-NEXT:    TEX 3 @12
2934; CM-NEXT:    ALU 65, @21, KC0[CB0:0-32], KC1[]
2935; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T33, T34.X
2936; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T31, T21.X
2937; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T30, T32.X
2938; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T28, T22.X
2939; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T27, T29.X
2940; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T25, T19.X
2941; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T24, T26.X
2942; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T23, T20.X
2943; CM-NEXT:    CF_END
2944; CM-NEXT:    Fetch clause starting at 12:
2945; CM-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 48, #1
2946; CM-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 0, #1
2947; CM-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 16, #1
2948; CM-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 32, #1
2949; CM-NEXT:    ALU clause starting at 20:
2950; CM-NEXT:     MOV * T19.X, KC0[2].Z,
2951; CM-NEXT:    ALU clause starting at 21:
2952; CM-NEXT:     LSHR * T23.W, T20.Y, literal.x,
2953; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2954; CM-NEXT:     AND_INT * T23.Z, T20.Y, literal.x,
2955; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2956; CM-NEXT:     LSHR T23.Y, T20.X, literal.x,
2957; CM-NEXT:     LSHR * T24.W, T20.W, literal.x,
2958; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2959; CM-NEXT:     AND_INT T23.X, T20.X, literal.x,
2960; CM-NEXT:     AND_INT T24.Z, T20.W, literal.x,
2961; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2962; CM-NEXT:    65535(9.183409e-41), 96(1.345247e-43)
2963; CM-NEXT:     LSHR T20.X, PV.W, literal.x,
2964; CM-NEXT:     LSHR T24.Y, T20.Z, literal.y,
2965; CM-NEXT:     LSHR * T25.W, T19.Y, literal.y,
2966; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2967; CM-NEXT:     AND_INT T24.X, T20.Z, literal.x,
2968; CM-NEXT:     AND_INT T25.Z, T19.Y, literal.x,
2969; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2970; CM-NEXT:    65535(9.183409e-41), 112(1.569454e-43)
2971; CM-NEXT:     LSHR T26.X, PV.W, literal.x,
2972; CM-NEXT:     LSHR T25.Y, T19.X, literal.y,
2973; CM-NEXT:     LSHR * T27.W, T19.W, literal.y,
2974; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2975; CM-NEXT:     AND_INT T25.X, T19.X, literal.x,
2976; CM-NEXT:     AND_INT T27.Z, T19.W, literal.x,
2977; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2978; CM-NEXT:    65535(9.183409e-41), 64(8.968310e-44)
2979; CM-NEXT:     LSHR T19.X, PV.W, literal.x,
2980; CM-NEXT:     LSHR T27.Y, T19.Z, literal.y,
2981; CM-NEXT:     LSHR * T28.W, T22.Y, literal.y,
2982; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2983; CM-NEXT:     AND_INT T27.X, T19.Z, literal.x,
2984; CM-NEXT:     AND_INT T28.Z, T22.Y, literal.x,
2985; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2986; CM-NEXT:    65535(9.183409e-41), 80(1.121039e-43)
2987; CM-NEXT:     LSHR T29.X, PV.W, literal.x,
2988; CM-NEXT:     LSHR T28.Y, T22.X, literal.y,
2989; CM-NEXT:     LSHR * T30.W, T22.W, literal.y,
2990; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2991; CM-NEXT:     AND_INT T28.X, T22.X, literal.x,
2992; CM-NEXT:     AND_INT T30.Z, T22.W, literal.x,
2993; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2994; CM-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
2995; CM-NEXT:     LSHR T22.X, PV.W, literal.x,
2996; CM-NEXT:     LSHR T30.Y, T22.Z, literal.y,
2997; CM-NEXT:     LSHR * T31.W, T21.Y, literal.y,
2998; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2999; CM-NEXT:     AND_INT T30.X, T22.Z, literal.x,
3000; CM-NEXT:     AND_INT T31.Z, T21.Y, literal.x,
3001; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3002; CM-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
3003; CM-NEXT:     LSHR T32.X, PV.W, literal.x,
3004; CM-NEXT:     LSHR T31.Y, T21.X, literal.y,
3005; CM-NEXT:     LSHR * T33.W, T21.W, literal.y,
3006; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3007; CM-NEXT:     AND_INT T31.X, T21.X, literal.x,
3008; CM-NEXT:     AND_INT * T33.Z, T21.W, literal.x,
3009; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3010; CM-NEXT:     LSHR T21.X, KC0[2].Y, literal.x,
3011; CM-NEXT:     LSHR * T33.Y, T21.Z, literal.y,
3012; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3013; CM-NEXT:     AND_INT T33.X, T21.Z, literal.x,
3014; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3015; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
3016; CM-NEXT:     LSHR * T34.X, PV.W, literal.x,
3017; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
3018  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
3019  %ext = zext <32 x i16> %load to <32 x i32>
3020  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
3021  ret void
3022}
3023
3024define amdgpu_kernel void @global_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
3025; GCN-NOHSA-SI-LABEL: global_sextload_v32i16_to_v32i32:
3026; GCN-NOHSA-SI:       ; %bb.0:
3027; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
3028; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
3029; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
3030; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s2
3031; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s3
3032; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
3033; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
3034; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
3035; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
3036; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
3037; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
3038; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
3039; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(3)
3040; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v19, 16, v3
3041; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v17, 16, v2
3042; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v18, v3, 0, 16
3043; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v16, v2, 0, 16
3044; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v23, 16, v1
3045; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v21, 16, v0
3046; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v22, v1, 0, 16
3047; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v20, v0, 0, 16
3048; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(2)
3049; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 16, v7
3050; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 16, v6
3051; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v7, 0, 16
3052; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v6, 0, 16
3053; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v27, 16, v5
3054; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v25, 16, v4
3055; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v26, v5, 0, 16
3056; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v24, v4, 0, 16
3057; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(1)
3058; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v7, 16, v11
3059; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v5, 16, v10
3060; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v6, v11, 0, 16
3061; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v4, v10, 0, 16
3062; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v31, 16, v9
3063; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v29, 16, v8
3064; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v30, v9, 0, 16
3065; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v28, v8, 0, 16
3066; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
3067; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v11, 16, v15
3068; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v9, 16, v14
3069; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v10, v15, 0, 16
3070; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v8, v14, 0, 16
3071; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v35, 16, v13
3072; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v33, 16, v12
3073; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v34, v13, 0, 16
3074; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v32, v12, 0, 16
3075; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
3076; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
3077; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96
3078; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
3079; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64
3080; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80
3081; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32
3082; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
3083; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0
3084; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16
3085; GCN-NOHSA-SI-NEXT:    s_endpgm
3086;
3087; GCN-HSA-LABEL: global_sextload_v32i16_to_v32i32:
3088; GCN-HSA:       ; %bb.0:
3089; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
3090; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3091; GCN-HSA-NEXT:    s_add_u32 s4, s2, 48
3092; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
3093; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
3094; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
3095; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
3096; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
3097; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3098; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
3099; GCN-HSA-NEXT:    s_add_u32 s4, s2, 32
3100; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
3101; GCN-HSA-NEXT:    s_add_u32 s2, s2, 16
3102; GCN-HSA-NEXT:    s_addc_u32 s3, s3, 0
3103; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s5
3104; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
3105; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
3106; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s4
3107; GCN-HSA-NEXT:    flat_load_dwordx4 v[8:11], v[8:9]
3108; GCN-HSA-NEXT:    flat_load_dwordx4 v[12:15], v[12:13]
3109; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
3110; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3111; GCN-HSA-NEXT:    s_waitcnt vmcnt(3)
3112; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v19, 16, v1
3113; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v17, 16, v0
3114; GCN-HSA-NEXT:    v_bfe_i32 v18, v1, 0, 16
3115; GCN-HSA-NEXT:    v_bfe_i32 v16, v0, 0, 16
3116; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
3117; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
3118; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[16:19]
3119; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
3120; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
3121; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
3122; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3123; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s3
3124; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s2
3125; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
3126; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v19, 16, v3
3127; GCN-HSA-NEXT:    v_bfe_i32 v18, v3, 0, 16
3128; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v17, 16, v2
3129; GCN-HSA-NEXT:    v_bfe_i32 v16, v2, 0, 16
3130; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3131; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[16:19]
3132; GCN-HSA-NEXT:    s_waitcnt vmcnt(4)
3133; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v5
3134; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s3
3135; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s2
3136; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
3137; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3138; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s3
3139; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s2
3140; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
3141; GCN-HSA-NEXT:    v_bfe_i32 v2, v5, 0, 16
3142; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v4
3143; GCN-HSA-NEXT:    v_bfe_i32 v0, v4, 0, 16
3144; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3145; GCN-HSA-NEXT:    flat_store_dwordx4 v[20:21], v[0:3]
3146; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s3
3147; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s2
3148; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
3149; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3150; GCN-HSA-NEXT:    s_add_u32 s0, s0, 48
3151; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v7
3152; GCN-HSA-NEXT:    v_bfe_i32 v2, v7, 0, 16
3153; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v6
3154; GCN-HSA-NEXT:    v_bfe_i32 v0, v6, 0, 16
3155; GCN-HSA-NEXT:    s_waitcnt vmcnt(4)
3156; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v7, 16, v9
3157; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v5, 16, v8
3158; GCN-HSA-NEXT:    v_bfe_i32 v6, v9, 0, 16
3159; GCN-HSA-NEXT:    v_bfe_i32 v4, v8, 0, 16
3160; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
3161; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
3162; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[0:3]
3163; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
3164; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v11
3165; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v10
3166; GCN-HSA-NEXT:    v_bfe_i32 v2, v11, 0, 16
3167; GCN-HSA-NEXT:    v_bfe_i32 v0, v10, 0, 16
3168; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[4:7]
3169; GCN-HSA-NEXT:    flat_store_dwordx4 v[20:21], v[0:3]
3170; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s1
3171; GCN-HSA-NEXT:    s_waitcnt vmcnt(6)
3172; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v7, 16, v13
3173; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v5, 16, v12
3174; GCN-HSA-NEXT:    v_bfe_i32 v6, v13, 0, 16
3175; GCN-HSA-NEXT:    v_bfe_i32 v4, v12, 0, 16
3176; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s0
3177; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v15
3178; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v14
3179; GCN-HSA-NEXT:    v_bfe_i32 v2, v15, 0, 16
3180; GCN-HSA-NEXT:    v_bfe_i32 v0, v14, 0, 16
3181; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
3182; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[0:3]
3183; GCN-HSA-NEXT:    s_endpgm
3184;
3185; GCN-NOHSA-VI-LABEL: global_sextload_v32i16_to_v32i32:
3186; GCN-NOHSA-VI:       ; %bb.0:
3187; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
3188; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
3189; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
3190; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
3191; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
3192; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
3193; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
3194; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
3195; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
3196; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
3197; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
3198; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
3199; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
3200; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
3201; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(3)
3202; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v19, 16, v3
3203; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v18, v3, 0, 16
3204; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v17, 16, v2
3205; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
3206; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v35, 16, v13
3207; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v33, 16, v12
3208; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v34, v13, 0, 16
3209; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v32, v12, 0, 16
3210; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v16, v2, 0, 16
3211; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v23, 16, v1
3212; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v22, v1, 0, 16
3213; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v21, 16, v0
3214; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v20, v0, 0, 16
3215; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v3, 16, v7
3216; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v2, v7, 0, 16
3217; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 16, v6
3218; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v6, 0, 16
3219; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v27, 16, v5
3220; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v26, v5, 0, 16
3221; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v25, 16, v4
3222; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v24, v4, 0, 16
3223; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v7, 16, v11
3224; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v6, v11, 0, 16
3225; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v5, 16, v10
3226; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v4, v10, 0, 16
3227; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v31, 16, v9
3228; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v30, v9, 0, 16
3229; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v29, 16, v8
3230; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v28, v8, 0, 16
3231; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v11, 16, v15
3232; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v9, 16, v14
3233; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v10, v15, 0, 16
3234; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v8, v14, 0, 16
3235; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96
3236; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
3237; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64
3238; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80
3239; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32
3240; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
3241; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0
3242; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16
3243; GCN-NOHSA-VI-NEXT:    s_endpgm
3244;
3245; EG-LABEL: global_sextload_v32i16_to_v32i32:
3246; EG:       ; %bb.0:
3247; EG-NEXT:    ALU 9, @20, KC0[CB0:0-32], KC1[]
3248; EG-NEXT:    TEX 3 @12
3249; EG-NEXT:    ALU 73, @30, KC0[CB0:0-32], KC1[]
3250; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T22.X, 0
3251; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T34.X, 0
3252; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T28.X, 0
3253; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T27.X, 0
3254; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T26.X, 0
3255; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0
3256; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T20.X, 0
3257; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T19.X, 1
3258; EG-NEXT:    CF_END
3259; EG-NEXT:    Fetch clause starting at 12:
3260; EG-NEXT:     VTX_READ_128 T23.XYZW, T22.X, 16, #1
3261; EG-NEXT:     VTX_READ_128 T24.XYZW, T22.X, 32, #1
3262; EG-NEXT:     VTX_READ_128 T25.XYZW, T22.X, 0, #1
3263; EG-NEXT:     VTX_READ_128 T22.XYZW, T22.X, 48, #1
3264; EG-NEXT:    ALU clause starting at 20:
3265; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
3266; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3267; EG-NEXT:     LSHR T19.X, PV.W, literal.x,
3268; EG-NEXT:     LSHR * T20.X, KC0[2].Y, literal.x,
3269; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
3270; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
3271; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
3272; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
3273; EG-NEXT:     MOV * T22.X, KC0[2].Z,
3274; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
3275; EG-NEXT:    ALU clause starting at 30:
3276; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
3277; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
3278; EG-NEXT:     LSHR T26.X, PV.W, literal.x,
3279; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3280; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
3281; EG-NEXT:     LSHR T27.X, PV.W, literal.x,
3282; EG-NEXT:     LSHR T0.W, T22.Y, literal.y,
3283; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
3284; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3285; EG-NEXT:    64(8.968310e-44), 0(0.000000e+00)
3286; EG-NEXT:     LSHR T28.X, PS, literal.x,
3287; EG-NEXT:     LSHR T0.Y, T22.W, literal.y,
3288; EG-NEXT:     BFE_INT T29.Z, T25.W, 0.0, literal.y, BS:VEC_120/SCL_212
3289; EG-NEXT:     LSHR T1.W, T24.Y, literal.y,
3290; EG-NEXT:     LSHR * T2.W, T24.W, literal.y,
3291; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3292; EG-NEXT:     BFE_INT T29.X, T25.Z, 0.0, literal.x,
3293; EG-NEXT:     LSHR T1.Y, T23.Y, literal.x,
3294; EG-NEXT:     BFE_INT T30.Z, T25.Y, 0.0, literal.x, BS:VEC_120/SCL_212
3295; EG-NEXT:     LSHR T3.W, T23.W, literal.x,
3296; EG-NEXT:     LSHR * T4.W, T25.W, literal.x,
3297; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3298; EG-NEXT:     BFE_INT T30.X, T25.X, 0.0, literal.x,
3299; EG-NEXT:     LSHR T2.Y, T25.Y, literal.x,
3300; EG-NEXT:     BFE_INT T31.Z, T23.W, 0.0, literal.x,
3301; EG-NEXT:     BFE_INT T29.W, PS, 0.0, literal.x,
3302; EG-NEXT:     LSHR * T4.W, T25.Z, literal.x,
3303; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3304; EG-NEXT:     BFE_INT T31.X, T23.Z, 0.0, literal.x,
3305; EG-NEXT:     BFE_INT T29.Y, PS, 0.0, literal.x,
3306; EG-NEXT:     BFE_INT T25.Z, T23.Y, 0.0, literal.x,
3307; EG-NEXT:     BFE_INT T30.W, PV.Y, 0.0, literal.x,
3308; EG-NEXT:     LSHR * T4.W, T25.X, literal.x,
3309; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3310; EG-NEXT:     BFE_INT T25.X, T23.X, 0.0, literal.x,
3311; EG-NEXT:     BFE_INT T30.Y, PS, 0.0, literal.x,
3312; EG-NEXT:     BFE_INT T32.Z, T24.W, 0.0, literal.x,
3313; EG-NEXT:     BFE_INT T31.W, T3.W, 0.0, literal.x, BS:VEC_120/SCL_212
3314; EG-NEXT:     LSHR * T3.W, T23.Z, literal.x,
3315; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3316; EG-NEXT:     BFE_INT T32.X, T24.Z, 0.0, literal.x,
3317; EG-NEXT:     BFE_INT T31.Y, PS, 0.0, literal.x,
3318; EG-NEXT:     BFE_INT T23.Z, T24.Y, 0.0, literal.x,
3319; EG-NEXT:     BFE_INT T25.W, T1.Y, 0.0, literal.x, BS:VEC_120/SCL_212
3320; EG-NEXT:     LSHR * T3.W, T23.X, literal.x,
3321; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3322; EG-NEXT:     BFE_INT T23.X, T24.X, 0.0, literal.x,
3323; EG-NEXT:     BFE_INT T25.Y, PS, 0.0, literal.x,
3324; EG-NEXT:     BFE_INT T33.Z, T22.W, 0.0, literal.x,
3325; EG-NEXT:     BFE_INT T32.W, T2.W, 0.0, literal.x, BS:VEC_120/SCL_212
3326; EG-NEXT:     LSHR * T2.W, T24.Z, literal.x,
3327; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3328; EG-NEXT:     BFE_INT T33.X, T22.Z, 0.0, literal.x,
3329; EG-NEXT:     BFE_INT T32.Y, PS, 0.0, literal.x,
3330; EG-NEXT:     BFE_INT T24.Z, T22.Y, 0.0, literal.x,
3331; EG-NEXT:     BFE_INT T23.W, T1.W, 0.0, literal.x,
3332; EG-NEXT:     LSHR * T1.W, T24.X, literal.x,
3333; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3334; EG-NEXT:     BFE_INT T24.X, T22.X, 0.0, literal.x,
3335; EG-NEXT:     BFE_INT T23.Y, PS, 0.0, literal.x,
3336; EG-NEXT:     LSHR T0.Z, T22.Z, literal.x,
3337; EG-NEXT:     BFE_INT T33.W, T0.Y, 0.0, literal.x,
3338; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
3339; EG-NEXT:    16(2.242078e-44), 112(1.569454e-43)
3340; EG-NEXT:     LSHR T34.X, PS, literal.x,
3341; EG-NEXT:     BFE_INT T33.Y, PV.Z, 0.0, literal.y,
3342; EG-NEXT:     LSHR T0.Z, T22.X, literal.y,
3343; EG-NEXT:     BFE_INT T24.W, T0.W, 0.0, literal.y,
3344; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3345; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3346; EG-NEXT:    96(1.345247e-43), 0(0.000000e+00)
3347; EG-NEXT:     LSHR T22.X, PS, literal.x,
3348; EG-NEXT:     BFE_INT * T24.Y, PV.Z, 0.0, literal.y,
3349; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3350;
3351; CM-LABEL: global_sextload_v32i16_to_v32i32:
3352; CM:       ; %bb.0:
3353; CM-NEXT:    ALU 0, @22, KC0[CB0:0-32], KC1[]
3354; CM-NEXT:    TEX 0 @14
3355; CM-NEXT:    ALU 7, @23, KC0[CB0:0-32], KC1[]
3356; CM-NEXT:    TEX 2 @16
3357; CM-NEXT:    ALU 76, @31, KC0[CB0:0-32], KC1[]
3358; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T33, T34.X
3359; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T19, T20.X
3360; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T32, T28.X
3361; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T23, T27.X
3362; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T31, T26.X
3363; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T22, T25.X
3364; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T30, T24.X
3365; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T29, T21.X
3366; CM-NEXT:    CF_END
3367; CM-NEXT:    Fetch clause starting at 14:
3368; CM-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 0, #1
3369; CM-NEXT:    Fetch clause starting at 16:
3370; CM-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 48, #1
3371; CM-NEXT:     VTX_READ_128 T23.XYZW, T19.X, 32, #1
3372; CM-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 16, #1
3373; CM-NEXT:    ALU clause starting at 22:
3374; CM-NEXT:     MOV * T19.X, KC0[2].Z,
3375; CM-NEXT:    ALU clause starting at 23:
3376; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
3377; CM-NEXT:    96(1.345247e-43), 0(0.000000e+00)
3378; CM-NEXT:     LSHR T21.X, PV.W, literal.x,
3379; CM-NEXT:     LSHR T0.Y, T20.Z, literal.y,
3380; CM-NEXT:     LSHR T0.Z, T20.W, literal.y,
3381; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3382; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3383; CM-NEXT:    112(1.569454e-43), 0(0.000000e+00)
3384; CM-NEXT:    ALU clause starting at 31:
3385; CM-NEXT:     LSHR T24.X, T0.W, literal.x,
3386; CM-NEXT:     LSHR T1.Y, T20.Y, literal.y,
3387; CM-NEXT:     LSHR T1.Z, T19.Z, literal.y,
3388; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3389; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3390; CM-NEXT:    64(8.968310e-44), 0(0.000000e+00)
3391; CM-NEXT:     LSHR T25.X, PV.W, literal.x,
3392; CM-NEXT:     LSHR T2.Y, T19.W, literal.y,
3393; CM-NEXT:     LSHR T2.Z, T19.X, literal.y,
3394; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3395; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3396; CM-NEXT:    80(1.121039e-43), 0(0.000000e+00)
3397; CM-NEXT:     LSHR T26.X, PV.W, literal.x,
3398; CM-NEXT:     LSHR T3.Y, T19.Y, literal.y,
3399; CM-NEXT:     LSHR T3.Z, T23.Z, literal.y,
3400; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3401; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3402; CM-NEXT:    32(4.484155e-44), 0(0.000000e+00)
3403; CM-NEXT:     LSHR T27.X, PV.W, literal.x,
3404; CM-NEXT:     LSHR T4.Y, T23.W, literal.y,
3405; CM-NEXT:     LSHR T4.Z, T23.X, literal.y,
3406; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3407; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3408; CM-NEXT:    48(6.726233e-44), 0(0.000000e+00)
3409; CM-NEXT:     LSHR T28.X, PV.W, literal.x,
3410; CM-NEXT:     LSHR T5.Y, T23.Y, literal.y,
3411; CM-NEXT:     BFE_INT T29.Z, T22.Y, 0.0, literal.y, BS:VEC_120/SCL_212
3412; CM-NEXT:     LSHR * T0.W, T22.Z, literal.y,
3413; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3414; CM-NEXT:     BFE_INT T29.X, T22.X, 0.0, literal.x,
3415; CM-NEXT:     LSHR T6.Y, T22.W, literal.x,
3416; CM-NEXT:     BFE_INT T30.Z, T22.W, 0.0, literal.x,
3417; CM-NEXT:     LSHR * T1.W, T22.Y, literal.x,
3418; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3419; CM-NEXT:     BFE_INT T30.X, T22.Z, 0.0, literal.x,
3420; CM-NEXT:     LSHR T7.Y, T22.X, literal.x,
3421; CM-NEXT:     BFE_INT T22.Z, T23.Y, 0.0, literal.x,
3422; CM-NEXT:     BFE_INT * T29.W, PV.W, 0.0, literal.x,
3423; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3424; CM-NEXT:     BFE_INT T22.X, T23.X, 0.0, literal.x,
3425; CM-NEXT:     BFE_INT T29.Y, PV.Y, 0.0, literal.x,
3426; CM-NEXT:     BFE_INT T31.Z, T23.W, 0.0, literal.x,
3427; CM-NEXT:     BFE_INT * T30.W, T6.Y, 0.0, literal.x,
3428; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3429; CM-NEXT:     BFE_INT T31.X, T23.Z, 0.0, literal.x,
3430; CM-NEXT:     BFE_INT T30.Y, T0.W, 0.0, literal.x,
3431; CM-NEXT:     BFE_INT T23.Z, T19.Y, 0.0, literal.x,
3432; CM-NEXT:     BFE_INT * T22.W, T5.Y, 0.0, literal.x, BS:VEC_120/SCL_212
3433; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3434; CM-NEXT:     BFE_INT T23.X, T19.X, 0.0, literal.x,
3435; CM-NEXT:     BFE_INT T22.Y, T4.Z, 0.0, literal.x,
3436; CM-NEXT:     BFE_INT T32.Z, T19.W, 0.0, literal.x,
3437; CM-NEXT:     BFE_INT * T31.W, T4.Y, 0.0, literal.x,
3438; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3439; CM-NEXT:     BFE_INT T32.X, T19.Z, 0.0, literal.x,
3440; CM-NEXT:     BFE_INT T31.Y, T3.Z, 0.0, literal.x, BS:VEC_120/SCL_212
3441; CM-NEXT:     BFE_INT T19.Z, T20.Y, 0.0, literal.x,
3442; CM-NEXT:     BFE_INT * T23.W, T3.Y, 0.0, literal.x, BS:VEC_120/SCL_212
3443; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3444; CM-NEXT:     BFE_INT T19.X, T20.X, 0.0, literal.x,
3445; CM-NEXT:     BFE_INT T23.Y, T2.Z, 0.0, literal.x,
3446; CM-NEXT:     BFE_INT T33.Z, T20.W, 0.0, literal.x,
3447; CM-NEXT:     BFE_INT * T32.W, T2.Y, 0.0, literal.x,
3448; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3449; CM-NEXT:     BFE_INT T33.X, T20.Z, 0.0, literal.x,
3450; CM-NEXT:     BFE_INT T32.Y, T1.Z, 0.0, literal.x, BS:VEC_120/SCL_212
3451; CM-NEXT:     LSHR T1.Z, T20.X, literal.x,
3452; CM-NEXT:     BFE_INT * T19.W, T1.Y, 0.0, literal.x,
3453; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3454; CM-NEXT:     LSHR T20.X, KC0[2].Y, literal.x,
3455; CM-NEXT:     BFE_INT T19.Y, PV.Z, 0.0, literal.y,
3456; CM-NEXT:     ADD_INT T1.Z, KC0[2].Y, literal.y,
3457; CM-NEXT:     BFE_INT * T33.W, T0.Z, 0.0, literal.y,
3458; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3459; CM-NEXT:     LSHR T34.X, PV.Z, literal.x,
3460; CM-NEXT:     BFE_INT * T33.Y, T0.Y, 0.0, literal.y,
3461; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3462  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
3463  %ext = sext <32 x i16> %load to <32 x i32>
3464  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
3465  ret void
3466}
3467
3468define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
3469; GCN-NOHSA-SI-LABEL: global_zextload_v64i16_to_v64i32:
3470; GCN-NOHSA-SI:       ; %bb.0:
3471; GCN-NOHSA-SI-NEXT:    s_mov_b32 s12, SCRATCH_RSRC_DWORD0
3472; GCN-NOHSA-SI-NEXT:    s_mov_b32 s13, SCRATCH_RSRC_DWORD1
3473; GCN-NOHSA-SI-NEXT:    s_mov_b32 s14, -1
3474; GCN-NOHSA-SI-NEXT:    s_mov_b32 s15, 0xe8f000
3475; GCN-NOHSA-SI-NEXT:    s_add_u32 s12, s12, s3
3476; GCN-NOHSA-SI-NEXT:    s_addc_u32 s13, s13, 0
3477; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
3478; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
3479; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
3480; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, 0xffff
3481; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s2
3482; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s3
3483; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
3484; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
3485; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
3486; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
3487; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
3488; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
3489; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
3490; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:64
3491; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:80
3492; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:96
3493; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:112
3494; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(7)
3495; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v35, 16, v3
3496; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v33, 16, v2
3497; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v39, 16, v1
3498; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v37, 16, v0
3499; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(6)
3500; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v43, 16, v7
3501; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v41, 16, v6
3502; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v34, s0, v3
3503; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v32, s0, v2
3504; GCN-NOHSA-SI-NEXT:    buffer_store_dword v32, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
3505; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
3506; GCN-NOHSA-SI-NEXT:    buffer_store_dword v33, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill
3507; GCN-NOHSA-SI-NEXT:    buffer_store_dword v34, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill
3508; GCN-NOHSA-SI-NEXT:    buffer_store_dword v35, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill
3509; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v38, s0, v1
3510; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v36, s0, v0
3511; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3512; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v35, 16, v5
3513; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v33, 16, v4
3514; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v42, s0, v7
3515; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v40, s0, v6
3516; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v34, s0, v5
3517; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v32, s0, v4
3518; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v7, 16, v11
3519; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v10
3520; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v47, 16, v9
3521; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v45, 16, v8
3522; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v6, s0, v11
3523; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v4, s0, v10
3524; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v46, s0, v9
3525; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v44, s0, v8
3526; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v11, 16, v15
3527; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v9, 16, v14
3528; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v51, 16, v13
3529; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v49, 16, v12
3530; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v10, s0, v15
3531; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v8, s0, v14
3532; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v50, s0, v13
3533; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v48, s0, v12
3534; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v15, 16, v19
3535; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v13, 16, v18
3536; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v55, 16, v17
3537; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v53, 16, v16
3538; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v14, s0, v19
3539; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v12, s0, v18
3540; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v54, s0, v17
3541; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v52, s0, v16
3542; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v19, 16, v23
3543; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v17, 16, v22
3544; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v59, 16, v21
3545; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v57, 16, v20
3546; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v18, s0, v23
3547; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v16, s0, v22
3548; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v58, s0, v21
3549; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v56, s0, v20
3550; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v23, 16, v27
3551; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v21, 16, v26
3552; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v63, 16, v25
3553; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v61, 16, v24
3554; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v22, s0, v27
3555; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v20, s0, v26
3556; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v62, s0, v25
3557; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v60, s0, v24
3558; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v27, 16, v31
3559; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v25, 16, v30
3560; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v3, 16, v29
3561; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v28
3562; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v26, s0, v31
3563; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v24, s0, v30
3564; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v2, s0, v29
3565; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, s0, v28
3566; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
3567; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
3568; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
3569; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:240
3570; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[60:63], off, s[0:3], 0 offset:192
3571; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:208
3572; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:160
3573; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
3574; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:128
3575; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:144
3576; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:96
3577; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
3578; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:64
3579; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80
3580; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:32
3581; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:48
3582; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[36:39], off, s[0:3], 0
3583; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
3584; GCN-NOHSA-SI-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
3585; GCN-NOHSA-SI-NEXT:    buffer_load_dword v2, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
3586; GCN-NOHSA-SI-NEXT:    buffer_load_dword v3, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload
3587; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
3588; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
3589; GCN-NOHSA-SI-NEXT:    s_endpgm
3590;
3591; GCN-HSA-LABEL: global_zextload_v64i16_to_v64i32:
3592; GCN-HSA:       ; %bb.0:
3593; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
3594; GCN-HSA-NEXT:    s_movk_i32 s14, 0x50
3595; GCN-HSA-NEXT:    s_movk_i32 s15, 0x60
3596; GCN-HSA-NEXT:    s_movk_i32 s16, 0x70
3597; GCN-HSA-NEXT:    s_mov_b32 s17, 0xffff
3598; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3599; GCN-HSA-NEXT:    s_add_u32 s4, s2, s14
3600; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
3601; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
3602; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
3603; GCN-HSA-NEXT:    s_add_u32 s4, s2, s15
3604; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
3605; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
3606; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
3607; GCN-HSA-NEXT:    s_add_u32 s4, s2, s16
3608; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
3609; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s5
3610; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3611; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
3612; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s4
3613; GCN-HSA-NEXT:    s_add_u32 s4, s2, 16
3614; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
3615; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s5
3616; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s4
3617; GCN-HSA-NEXT:    s_add_u32 s4, s2, 32
3618; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
3619; GCN-HSA-NEXT:    s_add_u32 s6, s2, 48
3620; GCN-HSA-NEXT:    s_addc_u32 s7, s3, 0
3621; GCN-HSA-NEXT:    v_mov_b32_e32 v33, s3
3622; GCN-HSA-NEXT:    v_mov_b32_e32 v32, s2
3623; GCN-HSA-NEXT:    s_add_u32 s2, s2, 64
3624; GCN-HSA-NEXT:    s_addc_u32 s3, s3, 0
3625; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s5
3626; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s7
3627; GCN-HSA-NEXT:    v_mov_b32_e32 v29, s3
3628; GCN-HSA-NEXT:    flat_load_dwordx4 v[8:11], v[8:9]
3629; GCN-HSA-NEXT:    flat_load_dwordx4 v[12:15], v[12:13]
3630; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s4
3631; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s6
3632; GCN-HSA-NEXT:    v_mov_b32_e32 v28, s2
3633; GCN-HSA-NEXT:    flat_load_dwordx4 v[16:19], v[16:17]
3634; GCN-HSA-NEXT:    flat_load_dwordx4 v[20:23], v[20:21]
3635; GCN-HSA-NEXT:    flat_load_dwordx4 v[28:31], v[28:29]
3636; GCN-HSA-NEXT:    flat_load_dwordx4 v[32:35], v[32:33]
3637; GCN-HSA-NEXT:    s_add_u32 s4, s0, 16
3638; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
3639; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xe0
3640; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3641; GCN-HSA-NEXT:    s_add_u32 s6, s0, 0xf0
3642; GCN-HSA-NEXT:    s_addc_u32 s7, s1, 0
3643; GCN-HSA-NEXT:    s_add_u32 s8, s0, 0xc0
3644; GCN-HSA-NEXT:    s_addc_u32 s9, s1, 0
3645; GCN-HSA-NEXT:    s_add_u32 s10, s0, 0xd0
3646; GCN-HSA-NEXT:    s_addc_u32 s11, s1, 0
3647; GCN-HSA-NEXT:    s_add_u32 s12, s0, 0xa0
3648; GCN-HSA-NEXT:    s_addc_u32 s13, s1, 0
3649; GCN-HSA-NEXT:    s_waitcnt vmcnt(7)
3650; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v27, 16, v1
3651; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v25, 16, v0
3652; GCN-HSA-NEXT:    v_and_b32_e32 v26, s17, v1
3653; GCN-HSA-NEXT:    v_and_b32_e32 v24, s17, v0
3654; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s12
3655; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s13
3656; GCN-HSA-NEXT:    s_add_u32 s12, s0, 0xb0
3657; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[24:27]
3658; GCN-HSA-NEXT:    s_addc_u32 s13, s1, 0
3659; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s12
3660; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v27, 16, v3
3661; GCN-HSA-NEXT:    v_and_b32_e32 v26, s17, v3
3662; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v25, 16, v2
3663; GCN-HSA-NEXT:    v_and_b32_e32 v24, s17, v2
3664; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s13
3665; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[24:27]
3666; GCN-HSA-NEXT:    s_waitcnt vmcnt(8)
3667; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v5
3668; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
3669; GCN-HSA-NEXT:    v_and_b32_e32 v2, s17, v5
3670; GCN-HSA-NEXT:    v_and_b32_e32 v0, s17, v4
3671; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s8
3672; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s9
3673; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3674; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s10
3675; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v7
3676; GCN-HSA-NEXT:    v_and_b32_e32 v2, s17, v7
3677; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v6
3678; GCN-HSA-NEXT:    v_and_b32_e32 v0, s17, v6
3679; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s11
3680; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3681; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3682; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3683; GCN-HSA-NEXT:    s_waitcnt vmcnt(9)
3684; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v9
3685; GCN-HSA-NEXT:    v_and_b32_e32 v2, s17, v9
3686; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v8
3687; GCN-HSA-NEXT:    v_and_b32_e32 v0, s17, v8
3688; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3689; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s6
3690; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s1
3691; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v11
3692; GCN-HSA-NEXT:    v_and_b32_e32 v2, s17, v11
3693; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v10
3694; GCN-HSA-NEXT:    v_and_b32_e32 v0, s17, v10
3695; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s7
3696; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3697; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x80
3698; GCN-HSA-NEXT:    v_mov_b32_e32 v27, s5
3699; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s0
3700; GCN-HSA-NEXT:    s_waitcnt vmcnt(6)
3701; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v33
3702; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v32
3703; GCN-HSA-NEXT:    v_and_b32_e32 v2, s17, v33
3704; GCN-HSA-NEXT:    v_and_b32_e32 v0, s17, v32
3705; GCN-HSA-NEXT:    v_mov_b32_e32 v26, s4
3706; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v35
3707; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v5, 16, v34
3708; GCN-HSA-NEXT:    v_and_b32_e32 v6, s17, v35
3709; GCN-HSA-NEXT:    v_and_b32_e32 v4, s17, v34
3710; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[0:3]
3711; GCN-HSA-NEXT:    flat_store_dwordx4 v[26:27], v[4:7]
3712; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3713; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
3714; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
3715; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x90
3716; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3717; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3718; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3719; GCN-HSA-NEXT:    s_add_u32 s2, s0, s15
3720; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v11, 16, v29
3721; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v9, 16, v28
3722; GCN-HSA-NEXT:    v_and_b32_e32 v10, s17, v29
3723; GCN-HSA-NEXT:    v_and_b32_e32 v8, s17, v28
3724; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
3725; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v31
3726; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v30
3727; GCN-HSA-NEXT:    v_and_b32_e32 v2, s17, v31
3728; GCN-HSA-NEXT:    v_and_b32_e32 v0, s17, v30
3729; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3730; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3731; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
3732; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
3733; GCN-HSA-NEXT:    s_add_u32 s2, s0, s16
3734; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3735; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
3736; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
3737; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
3738; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v21
3739; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v20
3740; GCN-HSA-NEXT:    v_and_b32_e32 v2, s17, v21
3741; GCN-HSA-NEXT:    v_and_b32_e32 v0, s17, v20
3742; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3743; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v23
3744; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v5, 16, v22
3745; GCN-HSA-NEXT:    v_and_b32_e32 v6, s17, v23
3746; GCN-HSA-NEXT:    v_and_b32_e32 v4, s17, v22
3747; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3748; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
3749; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v3, 16, v15
3750; GCN-HSA-NEXT:    v_and_b32_e32 v2, s17, v15
3751; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v14
3752; GCN-HSA-NEXT:    v_and_b32_e32 v0, s17, v14
3753; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v13
3754; GCN-HSA-NEXT:    v_and_b32_e32 v6, s17, v13
3755; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v5, 16, v12
3756; GCN-HSA-NEXT:    v_and_b32_e32 v4, s17, v12
3757; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v15, 16, v17
3758; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v13, 16, v16
3759; GCN-HSA-NEXT:    v_and_b32_e32 v14, s17, v17
3760; GCN-HSA-NEXT:    v_and_b32_e32 v12, s17, v16
3761; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s3
3762; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s2
3763; GCN-HSA-NEXT:    s_add_u32 s2, s0, s14
3764; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3765; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[12:15]
3766; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v11, 16, v19
3767; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
3768; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
3769; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
3770; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
3771; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v9, 16, v18
3772; GCN-HSA-NEXT:    v_and_b32_e32 v10, s17, v19
3773; GCN-HSA-NEXT:    v_and_b32_e32 v8, s17, v18
3774; GCN-HSA-NEXT:    s_add_u32 s0, s0, 48
3775; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[8:11]
3776; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
3777; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
3778; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
3779; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
3780; GCN-HSA-NEXT:    s_nop 0
3781; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
3782; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
3783; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3784; GCN-HSA-NEXT:    s_endpgm
3785;
3786; GCN-NOHSA-VI-LABEL: global_zextload_v64i16_to_v64i32:
3787; GCN-NOHSA-VI:       ; %bb.0:
3788; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
3789; GCN-NOHSA-VI-NEXT:    s_mov_b32 s88, SCRATCH_RSRC_DWORD0
3790; GCN-NOHSA-VI-NEXT:    s_mov_b32 s89, SCRATCH_RSRC_DWORD1
3791; GCN-NOHSA-VI-NEXT:    s_mov_b32 s90, -1
3792; GCN-NOHSA-VI-NEXT:    s_mov_b32 s91, 0xe80000
3793; GCN-NOHSA-VI-NEXT:    s_add_u32 s88, s88, s3
3794; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
3795; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
3796; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
3797; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
3798; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
3799; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
3800; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
3801; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:96
3802; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:80
3803; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:64
3804; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0
3805; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:16
3806; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:32
3807; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:48
3808; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, 0xffff
3809; GCN-NOHSA-VI-NEXT:    s_addc_u32 s89, s89, 0
3810; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
3811; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(3)
3812; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v31, 16, v15
3813; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v30, s0, v15
3814; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v29, 16, v14
3815; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v28, s0, v14
3816; GCN-NOHSA-VI-NEXT:    buffer_store_dword v28, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill
3817; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
3818; GCN-NOHSA-VI-NEXT:    buffer_store_dword v29, off, s[88:91], 0 offset:8 ; 4-byte Folded Spill
3819; GCN-NOHSA-VI-NEXT:    buffer_store_dword v30, off, s[88:91], 0 offset:12 ; 4-byte Folded Spill
3820; GCN-NOHSA-VI-NEXT:    buffer_store_dword v31, off, s[88:91], 0 offset:16 ; 4-byte Folded Spill
3821; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[56:59], off, s[8:11], 0 offset:112
3822; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v31, 16, v13
3823; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v30, s0, v13
3824; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v29, 16, v12
3825; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v28, s0, v12
3826; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v35, 16, v19
3827; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v34, s0, v19
3828; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v33, 16, v18
3829; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v32, s0, v18
3830; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v19, 16, v17
3831; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v18, s0, v17
3832; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v17, 16, v16
3833; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v16, s0, v16
3834; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v39, 16, v23
3835; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v38, s0, v23
3836; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v37, 16, v22
3837; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v36, s0, v22
3838; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v23, 16, v21
3839; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v22, s0, v21
3840; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v21, 16, v20
3841; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v20, s0, v20
3842; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v43, 16, v27
3843; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v42, s0, v27
3844; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v41, 16, v26
3845; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v40, s0, v26
3846; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v27, 16, v25
3847; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v26, s0, v25
3848; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v25, 16, v24
3849; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v24, s0, v24
3850; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v47, 16, v11
3851; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v46, s0, v11
3852; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v45, 16, v10
3853; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v44, s0, v10
3854; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v15, 16, v9
3855; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v14, s0, v9
3856; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v13, 16, v8
3857; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v12, s0, v8
3858; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v51, 16, v7
3859; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v50, s0, v7
3860; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v49, 16, v6
3861; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v48, s0, v6
3862; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v11, 16, v5
3863; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v10, s0, v5
3864; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v9, 16, v4
3865; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v8, s0, v4
3866; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v55, 16, v3
3867; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v54, s0, v3
3868; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v53, 16, v2
3869; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v52, s0, v2
3870; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
3871; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v6, s0, v1
3872; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
3873; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v4, s0, v0
3874; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
3875; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v61, 16, v59
3876; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v60, s0, v59
3877; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v59, 16, v58
3878; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v58, s0, v58
3879; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v57
3880; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v2, s0, v57
3881; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, s0, v56
3882; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v56
3883; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
3884; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
3885; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[58:61], off, s[0:3], 0 offset:240
3886; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192
3887; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:208
3888; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:160
3889; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:176
3890; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:128
3891; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:144
3892; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:96
3893; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:112
3894; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64
3895; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[36:39], off, s[0:3], 0 offset:80
3896; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32
3897; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:48
3898; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0
3899; GCN-NOHSA-VI-NEXT:    buffer_load_dword v0, off, s[88:91], 0 offset:4 ; 4-byte Folded Reload
3900; GCN-NOHSA-VI-NEXT:    buffer_load_dword v1, off, s[88:91], 0 offset:8 ; 4-byte Folded Reload
3901; GCN-NOHSA-VI-NEXT:    buffer_load_dword v2, off, s[88:91], 0 offset:12 ; 4-byte Folded Reload
3902; GCN-NOHSA-VI-NEXT:    buffer_load_dword v3, off, s[88:91], 0 offset:16 ; 4-byte Folded Reload
3903; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
3904; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
3905; GCN-NOHSA-VI-NEXT:    s_endpgm
3906;
3907; EG-LABEL: global_zextload_v64i16_to_v64i32:
3908; EG:       ; %bb.0:
3909; EG-NEXT:    ALU 0, @38, KC0[CB0:0-32], KC1[]
3910; EG-NEXT:    TEX 3 @22
3911; EG-NEXT:    ALU 56, @39, KC0[CB0:0-32], KC1[]
3912; EG-NEXT:    TEX 3 @30
3913; EG-NEXT:    ALU 87, @96, KC0[CB0:0-32], KC1[]
3914; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T66.X, 0
3915; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T65.X, 0
3916; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T64.X, 0
3917; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T62.X, 0
3918; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T51.XYZW, T61.X, 0
3919; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T59.X, 0
3920; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T52.XYZW, T58.X, 0
3921; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T54.XYZW, T56.X, 0
3922; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T55.X, 0
3923; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T53.X, 0
3924; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T48.X, 0
3925; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T47.X, 0
3926; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T46.X, 0
3927; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T44.X, 0
3928; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T43.X, 0
3929; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T41.X, 1
3930; EG-NEXT:    CF_END
3931; EG-NEXT:    Fetch clause starting at 22:
3932; EG-NEXT:     VTX_READ_128 T36.XYZW, T35.X, 0, #1
3933; EG-NEXT:     VTX_READ_128 T37.XYZW, T35.X, 48, #1
3934; EG-NEXT:     VTX_READ_128 T38.XYZW, T35.X, 32, #1
3935; EG-NEXT:     VTX_READ_128 T39.XYZW, T35.X, 16, #1
3936; EG-NEXT:    Fetch clause starting at 30:
3937; EG-NEXT:     VTX_READ_128 T49.XYZW, T35.X, 112, #1
3938; EG-NEXT:     VTX_READ_128 T50.XYZW, T35.X, 96, #1
3939; EG-NEXT:     VTX_READ_128 T51.XYZW, T35.X, 80, #1
3940; EG-NEXT:     VTX_READ_128 T52.XYZW, T35.X, 64, #1
3941; EG-NEXT:    ALU clause starting at 38:
3942; EG-NEXT:     MOV * T35.X, KC0[2].Z,
3943; EG-NEXT:    ALU clause starting at 39:
3944; EG-NEXT:     LSHR * T40.W, T36.W, literal.x,
3945; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3946; EG-NEXT:     AND_INT * T40.Z, T36.W, literal.x,
3947; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3948; EG-NEXT:     LSHR T40.Y, T36.Z, literal.x,
3949; EG-NEXT:     LSHR * T36.W, T36.Y, literal.x,
3950; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3951; EG-NEXT:     AND_INT T40.X, T36.Z, literal.x,
3952; EG-NEXT:     AND_INT T36.Z, T36.Y, literal.x,
3953; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3954; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
3955; EG-NEXT:     LSHR T41.X, PV.W, literal.x,
3956; EG-NEXT:     LSHR T36.Y, T36.X, literal.y,
3957; EG-NEXT:     LSHR T42.W, T39.W, literal.y,
3958; EG-NEXT:     AND_INT * T36.X, T36.X, literal.z,
3959; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3960; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3961; EG-NEXT:     AND_INT * T42.Z, T39.W, literal.x,
3962; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3963; EG-NEXT:     LSHR T43.X, KC0[2].Y, literal.x,
3964; EG-NEXT:     LSHR T42.Y, T39.Z, literal.y,
3965; EG-NEXT:     LSHR T39.W, T39.Y, literal.y,
3966; EG-NEXT:     AND_INT * T42.X, T39.Z, literal.z,
3967; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3968; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3969; EG-NEXT:     AND_INT T39.Z, T39.Y, literal.x,
3970; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3971; EG-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
3972; EG-NEXT:     LSHR T44.X, PV.W, literal.x,
3973; EG-NEXT:     LSHR T39.Y, T39.X, literal.y,
3974; EG-NEXT:     LSHR T45.W, T38.W, literal.y,
3975; EG-NEXT:     AND_INT * T39.X, T39.X, literal.z,
3976; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3977; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3978; EG-NEXT:     AND_INT T45.Z, T38.W, literal.x,
3979; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3980; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
3981; EG-NEXT:     LSHR T46.X, PV.W, literal.x,
3982; EG-NEXT:     LSHR T45.Y, T38.Z, literal.y,
3983; EG-NEXT:     LSHR T38.W, T38.Y, literal.y,
3984; EG-NEXT:     AND_INT * T45.X, T38.Z, literal.z,
3985; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3986; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3987; EG-NEXT:     AND_INT T38.Z, T38.Y, literal.x,
3988; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3989; EG-NEXT:    65535(9.183409e-41), 80(1.121039e-43)
3990; EG-NEXT:     LSHR T47.X, PV.W, literal.x,
3991; EG-NEXT:     LSHR T38.Y, T38.X, literal.y,
3992; EG-NEXT:     AND_INT * T38.X, T38.X, literal.z,
3993; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3994; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
3995; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.x,
3996; EG-NEXT:     LSHR * T35.W, T37.W, literal.y,
3997; EG-NEXT:    64(8.968310e-44), 16(2.242078e-44)
3998; EG-NEXT:     LSHR T48.X, PV.W, literal.x,
3999; EG-NEXT:     AND_INT * T35.Z, T37.W, literal.y,
4000; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
4001; EG-NEXT:    ALU clause starting at 96:
4002; EG-NEXT:     LSHR T35.Y, T37.Z, literal.x,
4003; EG-NEXT:     LSHR * T37.W, T37.Y, literal.x,
4004; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4005; EG-NEXT:     AND_INT T35.X, T37.Z, literal.x,
4006; EG-NEXT:     AND_INT T37.Z, T37.Y, literal.x,
4007; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4008; EG-NEXT:    65535(9.183409e-41), 112(1.569454e-43)
4009; EG-NEXT:     LSHR T53.X, PV.W, literal.x,
4010; EG-NEXT:     LSHR T37.Y, T37.X, literal.y,
4011; EG-NEXT:     LSHR T54.W, T52.W, literal.y,
4012; EG-NEXT:     AND_INT * T37.X, T37.X, literal.z,
4013; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4014; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4015; EG-NEXT:     AND_INT T54.Z, T52.W, literal.x,
4016; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4017; EG-NEXT:    65535(9.183409e-41), 96(1.345247e-43)
4018; EG-NEXT:     LSHR T55.X, PV.W, literal.x,
4019; EG-NEXT:     LSHR T54.Y, T52.Z, literal.y,
4020; EG-NEXT:     LSHR T52.W, T52.Y, literal.y,
4021; EG-NEXT:     AND_INT * T54.X, T52.Z, literal.z,
4022; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4023; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4024; EG-NEXT:     AND_INT T52.Z, T52.Y, literal.x,
4025; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4026; EG-NEXT:    65535(9.183409e-41), 144(2.017870e-43)
4027; EG-NEXT:     LSHR T56.X, PV.W, literal.x,
4028; EG-NEXT:     LSHR T52.Y, T52.X, literal.y,
4029; EG-NEXT:     LSHR T57.W, T51.W, literal.y,
4030; EG-NEXT:     AND_INT * T52.X, T52.X, literal.z,
4031; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4032; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4033; EG-NEXT:     AND_INT T57.Z, T51.W, literal.x,
4034; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4035; EG-NEXT:    65535(9.183409e-41), 128(1.793662e-43)
4036; EG-NEXT:     LSHR T58.X, PV.W, literal.x,
4037; EG-NEXT:     LSHR T57.Y, T51.Z, literal.y,
4038; EG-NEXT:     LSHR T51.W, T51.Y, literal.y,
4039; EG-NEXT:     AND_INT * T57.X, T51.Z, literal.z,
4040; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4041; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4042; EG-NEXT:     AND_INT T51.Z, T51.Y, literal.x,
4043; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4044; EG-NEXT:    65535(9.183409e-41), 176(2.466285e-43)
4045; EG-NEXT:     LSHR T59.X, PV.W, literal.x,
4046; EG-NEXT:     LSHR T51.Y, T51.X, literal.y,
4047; EG-NEXT:     LSHR T60.W, T50.W, literal.y,
4048; EG-NEXT:     AND_INT * T51.X, T51.X, literal.z,
4049; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4050; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4051; EG-NEXT:     AND_INT T60.Z, T50.W, literal.x,
4052; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4053; EG-NEXT:    65535(9.183409e-41), 160(2.242078e-43)
4054; EG-NEXT:     LSHR T61.X, PV.W, literal.x,
4055; EG-NEXT:     LSHR T60.Y, T50.Z, literal.y,
4056; EG-NEXT:     LSHR T50.W, T50.Y, literal.y,
4057; EG-NEXT:     AND_INT * T60.X, T50.Z, literal.z,
4058; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4059; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4060; EG-NEXT:     AND_INT T50.Z, T50.Y, literal.x,
4061; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4062; EG-NEXT:    65535(9.183409e-41), 208(2.914701e-43)
4063; EG-NEXT:     LSHR T62.X, PV.W, literal.x,
4064; EG-NEXT:     LSHR T50.Y, T50.X, literal.y,
4065; EG-NEXT:     LSHR T63.W, T49.W, literal.y,
4066; EG-NEXT:     AND_INT * T50.X, T50.X, literal.z,
4067; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4068; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4069; EG-NEXT:     AND_INT T63.Z, T49.W, literal.x,
4070; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4071; EG-NEXT:    65535(9.183409e-41), 192(2.690493e-43)
4072; EG-NEXT:     LSHR T64.X, PV.W, literal.x,
4073; EG-NEXT:     LSHR T63.Y, T49.Z, literal.y,
4074; EG-NEXT:     LSHR T49.W, T49.Y, literal.y,
4075; EG-NEXT:     AND_INT * T63.X, T49.Z, literal.z,
4076; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4077; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4078; EG-NEXT:     AND_INT T49.Z, T49.Y, literal.x,
4079; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4080; EG-NEXT:    65535(9.183409e-41), 240(3.363116e-43)
4081; EG-NEXT:     LSHR T65.X, PV.W, literal.x,
4082; EG-NEXT:     LSHR T49.Y, T49.X, literal.y,
4083; EG-NEXT:     AND_INT * T49.X, T49.X, literal.z,
4084; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4085; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4086; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
4087; EG-NEXT:    224(3.138909e-43), 0(0.000000e+00)
4088; EG-NEXT:     LSHR * T66.X, PV.W, literal.x,
4089; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4090;
4091; CM-LABEL: global_zextload_v64i16_to_v64i32:
4092; CM:       ; %bb.0:
4093; CM-NEXT:    ALU 0, @38, KC0[CB0:0-32], KC1[]
4094; CM-NEXT:    TEX 3 @22
4095; CM-NEXT:    ALU 50, @39, KC0[CB0:0-32], KC1[]
4096; CM-NEXT:    TEX 3 @30
4097; CM-NEXT:    ALU 78, @90, KC0[CB0:0-32], KC1[]
4098; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T65, T66.X
4099; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T63, T48.X
4100; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T62, T64.X
4101; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T60, T49.X
4102; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T59, T61.X
4103; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T57, T50.X
4104; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T56, T58.X
4105; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T54, T51.X
4106; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T53, T55.X
4107; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T35, T37.X
4108; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T47, T52.X
4109; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T45, T38.X
4110; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T44, T46.X
4111; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T42, T39.X
4112; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T41, T43.X
4113; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T40, T36.X
4114; CM-NEXT:    CF_END
4115; CM-NEXT:    Fetch clause starting at 22:
4116; CM-NEXT:     VTX_READ_128 T36.XYZW, T35.X, 112, #1
4117; CM-NEXT:     VTX_READ_128 T37.XYZW, T35.X, 64, #1
4118; CM-NEXT:     VTX_READ_128 T38.XYZW, T35.X, 80, #1
4119; CM-NEXT:     VTX_READ_128 T39.XYZW, T35.X, 96, #1
4120; CM-NEXT:    Fetch clause starting at 30:
4121; CM-NEXT:     VTX_READ_128 T48.XYZW, T35.X, 0, #1
4122; CM-NEXT:     VTX_READ_128 T49.XYZW, T35.X, 16, #1
4123; CM-NEXT:     VTX_READ_128 T50.XYZW, T35.X, 32, #1
4124; CM-NEXT:     VTX_READ_128 T51.XYZW, T35.X, 48, #1
4125; CM-NEXT:    ALU clause starting at 38:
4126; CM-NEXT:     MOV * T35.X, KC0[2].Z,
4127; CM-NEXT:    ALU clause starting at 39:
4128; CM-NEXT:     LSHR * T40.W, T36.Y, literal.x,
4129; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4130; CM-NEXT:     AND_INT * T40.Z, T36.Y, literal.x,
4131; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4132; CM-NEXT:     LSHR T40.Y, T36.X, literal.x,
4133; CM-NEXT:     LSHR * T41.W, T36.W, literal.x,
4134; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4135; CM-NEXT:     AND_INT T40.X, T36.X, literal.x,
4136; CM-NEXT:     AND_INT T41.Z, T36.W, literal.x,
4137; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4138; CM-NEXT:    65535(9.183409e-41), 224(3.138909e-43)
4139; CM-NEXT:     LSHR T36.X, PV.W, literal.x,
4140; CM-NEXT:     LSHR T41.Y, T36.Z, literal.y,
4141; CM-NEXT:     LSHR * T42.W, T39.Y, literal.y,
4142; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4143; CM-NEXT:     AND_INT T41.X, T36.Z, literal.x,
4144; CM-NEXT:     AND_INT T42.Z, T39.Y, literal.x,
4145; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4146; CM-NEXT:    65535(9.183409e-41), 240(3.363116e-43)
4147; CM-NEXT:     LSHR T43.X, PV.W, literal.x,
4148; CM-NEXT:     LSHR T42.Y, T39.X, literal.y,
4149; CM-NEXT:     LSHR * T44.W, T39.W, literal.y,
4150; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4151; CM-NEXT:     AND_INT T42.X, T39.X, literal.x,
4152; CM-NEXT:     AND_INT T44.Z, T39.W, literal.x,
4153; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4154; CM-NEXT:    65535(9.183409e-41), 192(2.690493e-43)
4155; CM-NEXT:     LSHR T39.X, PV.W, literal.x,
4156; CM-NEXT:     LSHR T44.Y, T39.Z, literal.y,
4157; CM-NEXT:     LSHR * T45.W, T38.Y, literal.y,
4158; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4159; CM-NEXT:     AND_INT T44.X, T39.Z, literal.x,
4160; CM-NEXT:     AND_INT T45.Z, T38.Y, literal.x,
4161; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4162; CM-NEXT:    65535(9.183409e-41), 208(2.914701e-43)
4163; CM-NEXT:     LSHR T46.X, PV.W, literal.x,
4164; CM-NEXT:     LSHR T45.Y, T38.X, literal.y,
4165; CM-NEXT:     LSHR * T47.W, T38.W, literal.y,
4166; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4167; CM-NEXT:     AND_INT T45.X, T38.X, literal.x,
4168; CM-NEXT:     AND_INT T47.Z, T38.W, literal.x,
4169; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4170; CM-NEXT:    65535(9.183409e-41), 160(2.242078e-43)
4171; CM-NEXT:     LSHR T38.X, PV.W, literal.x,
4172; CM-NEXT:     LSHR T47.Y, T38.Z, literal.y,
4173; CM-NEXT:     LSHR * T35.W, T37.Y, literal.y,
4174; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4175; CM-NEXT:     AND_INT T47.X, T38.Z, literal.x,
4176; CM-NEXT:     AND_INT T35.Z, T37.Y, literal.x,
4177; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4178; CM-NEXT:    65535(9.183409e-41), 176(2.466285e-43)
4179; CM-NEXT:    ALU clause starting at 90:
4180; CM-NEXT:     LSHR T52.X, T0.W, literal.x,
4181; CM-NEXT:     LSHR T35.Y, T37.X, literal.y,
4182; CM-NEXT:     LSHR * T53.W, T37.W, literal.y, BS:VEC_120/SCL_212
4183; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4184; CM-NEXT:     AND_INT T35.X, T37.X, literal.x,
4185; CM-NEXT:     AND_INT T53.Z, T37.W, literal.x,
4186; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4187; CM-NEXT:    65535(9.183409e-41), 128(1.793662e-43)
4188; CM-NEXT:     LSHR T37.X, PV.W, literal.x,
4189; CM-NEXT:     LSHR T53.Y, T37.Z, literal.y,
4190; CM-NEXT:     LSHR * T54.W, T51.Y, literal.y,
4191; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4192; CM-NEXT:     AND_INT T53.X, T37.Z, literal.x,
4193; CM-NEXT:     AND_INT T54.Z, T51.Y, literal.x,
4194; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4195; CM-NEXT:    65535(9.183409e-41), 144(2.017870e-43)
4196; CM-NEXT:     LSHR T55.X, PV.W, literal.x,
4197; CM-NEXT:     LSHR T54.Y, T51.X, literal.y,
4198; CM-NEXT:     LSHR * T56.W, T51.W, literal.y,
4199; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4200; CM-NEXT:     AND_INT T54.X, T51.X, literal.x,
4201; CM-NEXT:     AND_INT T56.Z, T51.W, literal.x,
4202; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4203; CM-NEXT:    65535(9.183409e-41), 96(1.345247e-43)
4204; CM-NEXT:     LSHR T51.X, PV.W, literal.x,
4205; CM-NEXT:     LSHR T56.Y, T51.Z, literal.y,
4206; CM-NEXT:     LSHR * T57.W, T50.Y, literal.y,
4207; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4208; CM-NEXT:     AND_INT T56.X, T51.Z, literal.x,
4209; CM-NEXT:     AND_INT T57.Z, T50.Y, literal.x,
4210; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4211; CM-NEXT:    65535(9.183409e-41), 112(1.569454e-43)
4212; CM-NEXT:     LSHR T58.X, PV.W, literal.x,
4213; CM-NEXT:     LSHR T57.Y, T50.X, literal.y,
4214; CM-NEXT:     LSHR * T59.W, T50.W, literal.y,
4215; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4216; CM-NEXT:     AND_INT T57.X, T50.X, literal.x,
4217; CM-NEXT:     AND_INT T59.Z, T50.W, literal.x,
4218; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4219; CM-NEXT:    65535(9.183409e-41), 64(8.968310e-44)
4220; CM-NEXT:     LSHR T50.X, PV.W, literal.x,
4221; CM-NEXT:     LSHR T59.Y, T50.Z, literal.y,
4222; CM-NEXT:     LSHR * T60.W, T49.Y, literal.y,
4223; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4224; CM-NEXT:     AND_INT T59.X, T50.Z, literal.x,
4225; CM-NEXT:     AND_INT T60.Z, T49.Y, literal.x,
4226; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4227; CM-NEXT:    65535(9.183409e-41), 80(1.121039e-43)
4228; CM-NEXT:     LSHR T61.X, PV.W, literal.x,
4229; CM-NEXT:     LSHR T60.Y, T49.X, literal.y,
4230; CM-NEXT:     LSHR * T62.W, T49.W, literal.y,
4231; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4232; CM-NEXT:     AND_INT T60.X, T49.X, literal.x,
4233; CM-NEXT:     AND_INT T62.Z, T49.W, literal.x,
4234; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4235; CM-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
4236; CM-NEXT:     LSHR T49.X, PV.W, literal.x,
4237; CM-NEXT:     LSHR T62.Y, T49.Z, literal.y,
4238; CM-NEXT:     LSHR * T63.W, T48.Y, literal.y,
4239; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4240; CM-NEXT:     AND_INT T62.X, T49.Z, literal.x,
4241; CM-NEXT:     AND_INT T63.Z, T48.Y, literal.x,
4242; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4243; CM-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
4244; CM-NEXT:     LSHR T64.X, PV.W, literal.x,
4245; CM-NEXT:     LSHR T63.Y, T48.X, literal.y,
4246; CM-NEXT:     LSHR * T65.W, T48.W, literal.y,
4247; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4248; CM-NEXT:     AND_INT T63.X, T48.X, literal.x,
4249; CM-NEXT:     AND_INT * T65.Z, T48.W, literal.x,
4250; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4251; CM-NEXT:     LSHR T48.X, KC0[2].Y, literal.x,
4252; CM-NEXT:     LSHR * T65.Y, T48.Z, literal.y,
4253; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4254; CM-NEXT:     AND_INT T65.X, T48.Z, literal.x,
4255; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4256; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
4257; CM-NEXT:     LSHR * T66.X, PV.W, literal.x,
4258; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4259  %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
4260  %ext = zext <64 x i16> %load to <64 x i32>
4261  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
4262  ret void
4263}
4264
4265define amdgpu_kernel void @global_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
4266; GCN-NOHSA-SI-LABEL: global_sextload_v64i16_to_v64i32:
4267; GCN-NOHSA-SI:       ; %bb.0:
4268; GCN-NOHSA-SI-NEXT:    s_mov_b32 s12, SCRATCH_RSRC_DWORD0
4269; GCN-NOHSA-SI-NEXT:    s_mov_b32 s13, SCRATCH_RSRC_DWORD1
4270; GCN-NOHSA-SI-NEXT:    s_mov_b32 s14, -1
4271; GCN-NOHSA-SI-NEXT:    s_mov_b32 s15, 0xe8f000
4272; GCN-NOHSA-SI-NEXT:    s_add_u32 s12, s12, s3
4273; GCN-NOHSA-SI-NEXT:    s_addc_u32 s13, s13, 0
4274; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
4275; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
4276; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
4277; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s2
4278; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s3
4279; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4280; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
4281; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
4282; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[32:35], off, s[8:11], 0 offset:112
4283; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:96
4284; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:80
4285; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:64
4286; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[16:19], off, s[8:11], 0
4287; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:16
4288; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:32
4289; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:48
4290; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(3)
4291; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 16, v19
4292; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 16, v18
4293; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v19, 0, 16
4294; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v18, 0, 16
4295; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
4296; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
4297; GCN-NOHSA-SI-NEXT:    buffer_store_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill
4298; GCN-NOHSA-SI-NEXT:    buffer_store_dword v2, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill
4299; GCN-NOHSA-SI-NEXT:    buffer_store_dword v3, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill
4300; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v39, 16, v17
4301; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v37, 16, v16
4302; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v38, v17, 0, 16
4303; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v36, v16, 0, 16
4304; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v19, 16, v23
4305; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v17, 16, v22
4306; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v18, v23, 0, 16
4307; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v16, v22, 0, 16
4308; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v43, 16, v21
4309; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v41, 16, v20
4310; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v42, v21, 0, 16
4311; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v40, v20, 0, 16
4312; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v23, 16, v27
4313; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v21, 16, v26
4314; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v22, v27, 0, 16
4315; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v20, v26, 0, 16
4316; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v47, 16, v25
4317; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v45, 16, v24
4318; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v46, v25, 0, 16
4319; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v44, v24, 0, 16
4320; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v27, 16, v31
4321; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v25, 16, v30
4322; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v26, v31, 0, 16
4323; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v24, v30, 0, 16
4324; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v51, 16, v29
4325; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v49, 16, v28
4326; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v50, v29, 0, 16
4327; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v48, v28, 0, 16
4328; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v31, 16, v15
4329; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v29, 16, v14
4330; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v30, v15, 0, 16
4331; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v28, v14, 0, 16
4332; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v55, 16, v13
4333; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v53, 16, v12
4334; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v54, v13, 0, 16
4335; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v52, v12, 0, 16
4336; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v15, 16, v11
4337; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v13, 16, v10
4338; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v14, v11, 0, 16
4339; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v12, v10, 0, 16
4340; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v59, 16, v9
4341; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v57, 16, v8
4342; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v58, v9, 0, 16
4343; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v56, v8, 0, 16
4344; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v11, 16, v7
4345; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v9, 16, v6
4346; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v10, v7, 0, 16
4347; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v8, v6, 0, 16
4348; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v63, 16, v5
4349; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v61, 16, v4
4350; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v62, v5, 0, 16
4351; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v60, v4, 0, 16
4352; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v7, 16, v35
4353; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v5, 16, v34
4354; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v6, v35, 0, 16
4355; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v4, v34, 0, 16
4356; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
4357; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 16, v33
4358; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 16, v32
4359; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v33, 0, 16
4360; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v32, 0, 16
4361; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
4362; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
4363; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
4364; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:240
4365; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[60:63], off, s[0:3], 0 offset:192
4366; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
4367; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:160
4368; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:176
4369; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:128
4370; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:144
4371; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:96
4372; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:112
4373; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:64
4374; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:80
4375; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:32
4376; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48
4377; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[36:39], off, s[0:3], 0
4378; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
4379; GCN-NOHSA-SI-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
4380; GCN-NOHSA-SI-NEXT:    buffer_load_dword v2, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
4381; GCN-NOHSA-SI-NEXT:    buffer_load_dword v3, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload
4382; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
4383; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
4384; GCN-NOHSA-SI-NEXT:    s_endpgm
4385;
4386; GCN-HSA-LABEL: global_sextload_v64i16_to_v64i32:
4387; GCN-HSA:       ; %bb.0:
4388; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
4389; GCN-HSA-NEXT:    s_movk_i32 s8, 0x70
4390; GCN-HSA-NEXT:    s_movk_i32 s9, 0x60
4391; GCN-HSA-NEXT:    s_movk_i32 s10, 0x50
4392; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4393; GCN-HSA-NEXT:    s_add_u32 s4, s2, s8
4394; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
4395; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
4396; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
4397; GCN-HSA-NEXT:    s_add_u32 s4, s2, s9
4398; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
4399; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
4400; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
4401; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
4402; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
4403; GCN-HSA-NEXT:    s_add_u32 s4, s2, s10
4404; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
4405; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
4406; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s5
4407; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s4
4408; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
4409; GCN-HSA-NEXT:    flat_load_dwordx4 v[8:11], v[8:9]
4410; GCN-HSA-NEXT:    flat_load_dwordx4 v[12:15], v[12:13]
4411; GCN-HSA-NEXT:    s_add_u32 s4, s2, 64
4412; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
4413; GCN-HSA-NEXT:    s_add_u32 s6, s2, 48
4414; GCN-HSA-NEXT:    s_addc_u32 s7, s3, 0
4415; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s5
4416; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s7
4417; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s4
4418; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s6
4419; GCN-HSA-NEXT:    flat_load_dwordx4 v[16:19], v[16:17]
4420; GCN-HSA-NEXT:    flat_load_dwordx4 v[20:23], v[20:21]
4421; GCN-HSA-NEXT:    s_add_u32 s4, s2, 32
4422; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
4423; GCN-HSA-NEXT:    s_add_u32 s2, s2, 16
4424; GCN-HSA-NEXT:    s_addc_u32 s3, s3, 0
4425; GCN-HSA-NEXT:    v_mov_b32_e32 v29, s5
4426; GCN-HSA-NEXT:    v_mov_b32_e32 v33, s3
4427; GCN-HSA-NEXT:    v_mov_b32_e32 v28, s4
4428; GCN-HSA-NEXT:    v_mov_b32_e32 v32, s2
4429; GCN-HSA-NEXT:    flat_load_dwordx4 v[28:31], v[28:29]
4430; GCN-HSA-NEXT:    flat_load_dwordx4 v[32:35], v[32:33]
4431; GCN-HSA-NEXT:    s_add_u32 s4, s0, 16
4432; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
4433; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xe0
4434; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4435; GCN-HSA-NEXT:    s_waitcnt vmcnt(7)
4436; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v27, 16, v1
4437; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v25, 16, v0
4438; GCN-HSA-NEXT:    v_bfe_i32 v26, v1, 0, 16
4439; GCN-HSA-NEXT:    v_bfe_i32 v24, v0, 0, 16
4440; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
4441; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
4442; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xf0
4443; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[24:27]
4444; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4445; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
4446; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
4447; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xc0
4448; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v27, 16, v3
4449; GCN-HSA-NEXT:    v_bfe_i32 v26, v3, 0, 16
4450; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v25, 16, v2
4451; GCN-HSA-NEXT:    v_bfe_i32 v24, v2, 0, 16
4452; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4453; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[24:27]
4454; GCN-HSA-NEXT:    s_waitcnt vmcnt(8)
4455; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v5
4456; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v4
4457; GCN-HSA-NEXT:    v_bfe_i32 v2, v5, 0, 16
4458; GCN-HSA-NEXT:    v_bfe_i32 v0, v4, 0, 16
4459; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
4460; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
4461; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xd0
4462; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4463; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4464; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
4465; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
4466; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v7
4467; GCN-HSA-NEXT:    v_bfe_i32 v2, v7, 0, 16
4468; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v6
4469; GCN-HSA-NEXT:    v_bfe_i32 v0, v6, 0, 16
4470; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4471; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xa0
4472; GCN-HSA-NEXT:    s_waitcnt vmcnt(8)
4473; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v13
4474; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v12
4475; GCN-HSA-NEXT:    v_bfe_i32 v2, v13, 0, 16
4476; GCN-HSA-NEXT:    v_bfe_i32 v0, v12, 0, 16
4477; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s1
4478; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4479; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s0
4480; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[0:3]
4481; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s3
4482; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v9
4483; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v8
4484; GCN-HSA-NEXT:    v_bfe_i32 v2, v9, 0, 16
4485; GCN-HSA-NEXT:    v_bfe_i32 v0, v8, 0, 16
4486; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s5
4487; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s2
4488; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xb0
4489; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v7, 16, v15
4490; GCN-HSA-NEXT:    v_bfe_i32 v6, v15, 0, 16
4491; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v5, 16, v14
4492; GCN-HSA-NEXT:    v_bfe_i32 v4, v14, 0, 16
4493; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s4
4494; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
4495; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[0:3]
4496; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4497; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
4498; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
4499; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x80
4500; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4501; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
4502; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
4503; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x90
4504; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v14, 16, v11
4505; GCN-HSA-NEXT:    v_bfe_i32 v13, v11, 0, 16
4506; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v12, 16, v10
4507; GCN-HSA-NEXT:    v_bfe_i32 v11, v10, 0, 16
4508; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[11:14]
4509; GCN-HSA-NEXT:    s_waitcnt vmcnt(11)
4510; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v17
4511; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v16
4512; GCN-HSA-NEXT:    v_bfe_i32 v2, v17, 0, 16
4513; GCN-HSA-NEXT:    v_bfe_i32 v0, v16, 0, 16
4514; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4515; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4516; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
4517; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
4518; GCN-HSA-NEXT:    s_add_u32 s2, s0, s9
4519; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v19
4520; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v18
4521; GCN-HSA-NEXT:    v_bfe_i32 v2, v19, 0, 16
4522; GCN-HSA-NEXT:    v_bfe_i32 v0, v18, 0, 16
4523; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4524; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4525; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
4526; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
4527; GCN-HSA-NEXT:    s_add_u32 s2, s0, s8
4528; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4529; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
4530; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
4531; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
4532; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4533; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s3
4534; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s2
4535; GCN-HSA-NEXT:    s_add_u32 s2, s0, s10
4536; GCN-HSA-NEXT:    s_waitcnt vmcnt(12)
4537; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v11, 16, v23
4538; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 16, v22
4539; GCN-HSA-NEXT:    v_bfe_i32 v10, v23, 0, 16
4540; GCN-HSA-NEXT:    v_bfe_i32 v8, v22, 0, 16
4541; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[8:11]
4542; GCN-HSA-NEXT:    s_waitcnt vmcnt(12)
4543; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v15, 16, v29
4544; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v13, 16, v28
4545; GCN-HSA-NEXT:    v_bfe_i32 v14, v29, 0, 16
4546; GCN-HSA-NEXT:    v_bfe_i32 v12, v28, 0, 16
4547; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4548; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[12:15]
4549; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v11, 16, v31
4550; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
4551; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
4552; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
4553; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
4554; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 16, v30
4555; GCN-HSA-NEXT:    v_bfe_i32 v10, v31, 0, 16
4556; GCN-HSA-NEXT:    v_bfe_i32 v8, v30, 0, 16
4557; GCN-HSA-NEXT:    s_add_u32 s0, s0, 48
4558; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[8:11]
4559; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v21
4560; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
4561; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v20
4562; GCN-HSA-NEXT:    v_bfe_i32 v2, v21, 0, 16
4563; GCN-HSA-NEXT:    v_bfe_i32 v0, v20, 0, 16
4564; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4565; GCN-HSA-NEXT:    s_waitcnt vmcnt(14)
4566; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v7, 16, v33
4567; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v5, 16, v32
4568; GCN-HSA-NEXT:    v_bfe_i32 v6, v33, 0, 16
4569; GCN-HSA-NEXT:    v_bfe_i32 v4, v32, 0, 16
4570; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
4571; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
4572; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
4573; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 16, v35
4574; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
4575; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 16, v34
4576; GCN-HSA-NEXT:    v_bfe_i32 v2, v35, 0, 16
4577; GCN-HSA-NEXT:    v_bfe_i32 v0, v34, 0, 16
4578; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
4579; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4580; GCN-HSA-NEXT:    s_endpgm
4581;
4582; GCN-NOHSA-VI-LABEL: global_sextload_v64i16_to_v64i32:
4583; GCN-NOHSA-VI:       ; %bb.0:
4584; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
4585; GCN-NOHSA-VI-NEXT:    s_mov_b32 s88, SCRATCH_RSRC_DWORD0
4586; GCN-NOHSA-VI-NEXT:    s_mov_b32 s89, SCRATCH_RSRC_DWORD1
4587; GCN-NOHSA-VI-NEXT:    s_mov_b32 s90, -1
4588; GCN-NOHSA-VI-NEXT:    s_mov_b32 s91, 0xe80000
4589; GCN-NOHSA-VI-NEXT:    s_add_u32 s88, s88, s3
4590; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
4591; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
4592; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
4593; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4594; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
4595; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
4596; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
4597; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:96
4598; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:80
4599; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:64
4600; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0
4601; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:16
4602; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:32
4603; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:48
4604; GCN-NOHSA-VI-NEXT:    s_addc_u32 s89, s89, 0
4605; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
4606; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
4607; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(6)
4608; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v59, 16, v1
4609; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v57, 16, v0
4610; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(4)
4611; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v51, 16, v9
4612; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(3)
4613; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v31, 16, v15
4614; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v30, v15, 0, 16
4615; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v29, 16, v14
4616; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v28, v14, 0, 16
4617; GCN-NOHSA-VI-NEXT:    buffer_store_dword v28, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill
4618; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
4619; GCN-NOHSA-VI-NEXT:    buffer_store_dword v29, off, s[88:91], 0 offset:8 ; 4-byte Folded Spill
4620; GCN-NOHSA-VI-NEXT:    buffer_store_dword v30, off, s[88:91], 0 offset:12 ; 4-byte Folded Spill
4621; GCN-NOHSA-VI-NEXT:    buffer_store_dword v31, off, s[88:91], 0 offset:16 ; 4-byte Folded Spill
4622; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[60:63], off, s[8:11], 0 offset:112
4623; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v50, v9, 0, 16
4624; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v58, v1, 0, 16
4625; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v56, v0, 0, 16
4626; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v31, 16, v19
4627; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v30, v19, 0, 16
4628; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v29, 16, v18
4629; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v28, v18, 0, 16
4630; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v39, 16, v17
4631; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v38, v17, 0, 16
4632; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v37, 16, v16
4633; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v36, v16, 0, 16
4634; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v19, 16, v23
4635; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v18, v23, 0, 16
4636; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v17, 16, v22
4637; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v16, v22, 0, 16
4638; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v43, 16, v21
4639; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v42, v21, 0, 16
4640; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v41, 16, v20
4641; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v40, v20, 0, 16
4642; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v23, 16, v27
4643; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v22, v27, 0, 16
4644; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v21, 16, v26
4645; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v20, v26, 0, 16
4646; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v47, 16, v25
4647; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v46, v25, 0, 16
4648; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v45, 16, v24
4649; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v44, v24, 0, 16
4650; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v27, 16, v11
4651; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v26, v11, 0, 16
4652; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v25, 16, v10
4653; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v24, v10, 0, 16
4654; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v49, 16, v8
4655; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v48, v8, 0, 16
4656; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v11, 16, v3
4657; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v10, v3, 0, 16
4658; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v9, 16, v2
4659; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v8, v2, 0, 16
4660; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v35, 16, v13
4661; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v34, v13, 0, 16
4662; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v33, 16, v12
4663; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v32, v12, 0, 16
4664; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v15, 16, v7
4665; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v14, v7, 0, 16
4666; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v13, 16, v6
4667; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v12, v6, 0, 16
4668; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v55, 16, v5
4669; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v54, v5, 0, 16
4670; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v53, 16, v4
4671; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v52, v4, 0, 16
4672; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
4673; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v3, 16, v61
4674; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 16, v60
4675; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v2, v61, 0, 16
4676; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v60, 0, 16
4677; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v7, 16, v63
4678; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v5, 16, v62
4679; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v6, v63, 0, 16
4680; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v4, v62, 0, 16
4681; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
4682; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:240
4683; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:192
4684; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
4685; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:160
4686; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:176
4687; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:128
4688; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:144
4689; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:96
4690; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:112
4691; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:64
4692; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:80
4693; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[36:39], off, s[0:3], 0 offset:32
4694; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:48
4695; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0
4696; GCN-NOHSA-VI-NEXT:    buffer_load_dword v0, off, s[88:91], 0 offset:4 ; 4-byte Folded Reload
4697; GCN-NOHSA-VI-NEXT:    buffer_load_dword v1, off, s[88:91], 0 offset:8 ; 4-byte Folded Reload
4698; GCN-NOHSA-VI-NEXT:    buffer_load_dword v2, off, s[88:91], 0 offset:12 ; 4-byte Folded Reload
4699; GCN-NOHSA-VI-NEXT:    buffer_load_dword v3, off, s[88:91], 0 offset:16 ; 4-byte Folded Reload
4700; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
4701; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
4702; GCN-NOHSA-VI-NEXT:    s_endpgm
4703;
4704; EG-LABEL: global_sextload_v64i16_to_v64i32:
4705; EG:       ; %bb.0:
4706; EG-NEXT:    ALU 18, @38, KC0[CB0:0-32], KC1[]
4707; EG-NEXT:    TEX 7 @22
4708; EG-NEXT:    ALU 75, @57, KC0[CB0:0-32], KC1[]
4709; EG-NEXT:    ALU 71, @133, KC0[CB0:0-32], KC1[]
4710; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T41.X, 0
4711; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T66.X, 0
4712; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T56.X, 0
4713; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T64.XYZW, T55.X, 0
4714; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T54.X, 0
4715; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T53.X, 0
4716; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T52.X, 0
4717; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T51.X, 0
4718; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T50.X, 0
4719; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T61.XYZW, T49.X, 0
4720; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T40.X, 0
4721; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T39.X, 0
4722; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T38.X, 0
4723; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T37.X, 0
4724; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T58.XYZW, T36.X, 0
4725; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T35.X, 1
4726; EG-NEXT:    CF_END
4727; EG-NEXT:    PAD
4728; EG-NEXT:    Fetch clause starting at 22:
4729; EG-NEXT:     VTX_READ_128 T42.XYZW, T41.X, 16, #1
4730; EG-NEXT:     VTX_READ_128 T43.XYZW, T41.X, 32, #1
4731; EG-NEXT:     VTX_READ_128 T44.XYZW, T41.X, 0, #1
4732; EG-NEXT:     VTX_READ_128 T45.XYZW, T41.X, 48, #1
4733; EG-NEXT:     VTX_READ_128 T46.XYZW, T41.X, 64, #1
4734; EG-NEXT:     VTX_READ_128 T47.XYZW, T41.X, 80, #1
4735; EG-NEXT:     VTX_READ_128 T48.XYZW, T41.X, 96, #1
4736; EG-NEXT:     VTX_READ_128 T41.XYZW, T41.X, 112, #1
4737; EG-NEXT:    ALU clause starting at 38:
4738; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
4739; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4740; EG-NEXT:     LSHR T35.X, PV.W, literal.x,
4741; EG-NEXT:     LSHR * T36.X, KC0[2].Y, literal.x,
4742; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4743; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
4744; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
4745; EG-NEXT:     LSHR T37.X, PV.W, literal.x,
4746; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4747; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
4748; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
4749; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4750; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
4751; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
4752; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4753; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
4754; EG-NEXT:     LSHR T40.X, PV.W, literal.x,
4755; EG-NEXT:     MOV * T41.X, KC0[2].Z,
4756; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4757; EG-NEXT:    ALU clause starting at 57:
4758; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
4759; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
4760; EG-NEXT:     LSHR T49.X, PV.W, literal.x,
4761; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4762; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
4763; EG-NEXT:     LSHR T50.X, PV.W, literal.x,
4764; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4765; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
4766; EG-NEXT:     LSHR T51.X, PV.W, literal.x,
4767; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4768; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
4769; EG-NEXT:     LSHR T52.X, PV.W, literal.x,
4770; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4771; EG-NEXT:    2(2.802597e-45), 176(2.466285e-43)
4772; EG-NEXT:     LSHR T53.X, PV.W, literal.x,
4773; EG-NEXT:     LSHR T0.Y, T41.Y, literal.y,
4774; EG-NEXT:     LSHR T0.Z, T41.W, literal.y,
4775; EG-NEXT:     LSHR T0.W, T48.Y, literal.y, BS:VEC_120/SCL_212
4776; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
4777; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4778; EG-NEXT:    160(2.242078e-43), 0(0.000000e+00)
4779; EG-NEXT:     LSHR T54.X, PS, literal.x,
4780; EG-NEXT:     LSHR T1.Y, T48.W, literal.y,
4781; EG-NEXT:     LSHR T1.Z, T47.Y, literal.y,
4782; EG-NEXT:     LSHR T1.W, T47.W, literal.y, BS:VEC_120/SCL_212
4783; EG-NEXT:     ADD_INT * T2.W, KC0[2].Y, literal.z,
4784; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4785; EG-NEXT:    208(2.914701e-43), 0(0.000000e+00)
4786; EG-NEXT:     LSHR T55.X, PS, literal.x,
4787; EG-NEXT:     LSHR T2.Y, T46.Y, literal.y,
4788; EG-NEXT:     LSHR T2.Z, T46.W, literal.y,
4789; EG-NEXT:     LSHR T2.W, T45.Y, literal.y, BS:VEC_120/SCL_212
4790; EG-NEXT:     ADD_INT * T3.W, KC0[2].Y, literal.z,
4791; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4792; EG-NEXT:    192(2.690493e-43), 0(0.000000e+00)
4793; EG-NEXT:     LSHR T56.X, PS, literal.x,
4794; EG-NEXT:     LSHR T3.Y, T45.W, literal.y,
4795; EG-NEXT:     BFE_INT T57.Z, T44.W, 0.0, literal.y, BS:VEC_120/SCL_212
4796; EG-NEXT:     LSHR T3.W, T43.Y, literal.y,
4797; EG-NEXT:     LSHR * T4.W, T43.W, literal.y,
4798; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4799; EG-NEXT:     BFE_INT T57.X, T44.Z, 0.0, literal.x,
4800; EG-NEXT:     LSHR T4.Y, T42.Y, literal.x,
4801; EG-NEXT:     BFE_INT T58.Z, T44.Y, 0.0, literal.x, BS:VEC_120/SCL_212
4802; EG-NEXT:     LSHR T5.W, T42.W, literal.x,
4803; EG-NEXT:     LSHR * T6.W, T44.W, literal.x,
4804; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4805; EG-NEXT:     BFE_INT T58.X, T44.X, 0.0, literal.x,
4806; EG-NEXT:     LSHR T5.Y, T44.Y, literal.x,
4807; EG-NEXT:     BFE_INT T59.Z, T42.W, 0.0, literal.x,
4808; EG-NEXT:     BFE_INT T57.W, PS, 0.0, literal.x,
4809; EG-NEXT:     LSHR * T6.W, T44.Z, literal.x,
4810; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4811; EG-NEXT:     BFE_INT T59.X, T42.Z, 0.0, literal.x,
4812; EG-NEXT:     BFE_INT T57.Y, PS, 0.0, literal.x,
4813; EG-NEXT:     BFE_INT T44.Z, T42.Y, 0.0, literal.x,
4814; EG-NEXT:     BFE_INT T58.W, PV.Y, 0.0, literal.x,
4815; EG-NEXT:     LSHR * T6.W, T44.X, literal.x,
4816; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4817; EG-NEXT:     BFE_INT T44.X, T42.X, 0.0, literal.x,
4818; EG-NEXT:     BFE_INT T58.Y, PS, 0.0, literal.x,
4819; EG-NEXT:     BFE_INT T60.Z, T43.W, 0.0, literal.x,
4820; EG-NEXT:     BFE_INT T59.W, T5.W, 0.0, literal.x, BS:VEC_120/SCL_212
4821; EG-NEXT:     LSHR * T5.W, T42.Z, literal.x,
4822; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4823; EG-NEXT:     BFE_INT T60.X, T43.Z, 0.0, literal.x,
4824; EG-NEXT:     BFE_INT T59.Y, PS, 0.0, literal.x,
4825; EG-NEXT:     BFE_INT T42.Z, T43.Y, 0.0, literal.x,
4826; EG-NEXT:     BFE_INT T44.W, T4.Y, 0.0, literal.x, BS:VEC_120/SCL_212
4827; EG-NEXT:     LSHR * T5.W, T42.X, literal.x,
4828; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4829; EG-NEXT:     BFE_INT T42.X, T43.X, 0.0, literal.x,
4830; EG-NEXT:     BFE_INT T44.Y, PS, 0.0, literal.x,
4831; EG-NEXT:     BFE_INT T61.Z, T45.W, 0.0, literal.x,
4832; EG-NEXT:     BFE_INT * T60.W, T4.W, 0.0, literal.x, BS:VEC_120/SCL_212
4833; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4834; EG-NEXT:    ALU clause starting at 133:
4835; EG-NEXT:     LSHR * T4.W, T43.Z, literal.x,
4836; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4837; EG-NEXT:     BFE_INT T61.X, T45.Z, 0.0, literal.x,
4838; EG-NEXT:     BFE_INT T60.Y, PV.W, 0.0, literal.x,
4839; EG-NEXT:     BFE_INT T43.Z, T45.Y, 0.0, literal.x,
4840; EG-NEXT:     BFE_INT T42.W, T3.W, 0.0, literal.x,
4841; EG-NEXT:     LSHR * T3.W, T43.X, literal.x,
4842; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4843; EG-NEXT:     BFE_INT T43.X, T45.X, 0.0, literal.x,
4844; EG-NEXT:     BFE_INT T42.Y, PS, 0.0, literal.x,
4845; EG-NEXT:     BFE_INT T62.Z, T46.W, 0.0, literal.x,
4846; EG-NEXT:     BFE_INT T61.W, T3.Y, 0.0, literal.x,
4847; EG-NEXT:     LSHR * T3.W, T45.Z, literal.x,
4848; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4849; EG-NEXT:     BFE_INT T62.X, T46.Z, 0.0, literal.x,
4850; EG-NEXT:     BFE_INT T61.Y, PS, 0.0, literal.x,
4851; EG-NEXT:     BFE_INT T45.Z, T46.Y, 0.0, literal.x,
4852; EG-NEXT:     BFE_INT T43.W, T2.W, 0.0, literal.x,
4853; EG-NEXT:     LSHR * T2.W, T45.X, literal.x,
4854; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4855; EG-NEXT:     BFE_INT T45.X, T46.X, 0.0, literal.x,
4856; EG-NEXT:     BFE_INT T43.Y, PS, 0.0, literal.x,
4857; EG-NEXT:     BFE_INT T63.Z, T47.W, 0.0, literal.x,
4858; EG-NEXT:     BFE_INT T62.W, T2.Z, 0.0, literal.x,
4859; EG-NEXT:     LSHR * T2.W, T46.Z, literal.x,
4860; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4861; EG-NEXT:     BFE_INT T63.X, T47.Z, 0.0, literal.x,
4862; EG-NEXT:     BFE_INT T62.Y, PS, 0.0, literal.x,
4863; EG-NEXT:     BFE_INT T46.Z, T47.Y, 0.0, literal.x,
4864; EG-NEXT:     BFE_INT T45.W, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212
4865; EG-NEXT:     LSHR * T2.W, T46.X, literal.x,
4866; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4867; EG-NEXT:     BFE_INT T46.X, T47.X, 0.0, literal.x,
4868; EG-NEXT:     BFE_INT T45.Y, PS, 0.0, literal.x,
4869; EG-NEXT:     BFE_INT T64.Z, T48.W, 0.0, literal.x,
4870; EG-NEXT:     BFE_INT T63.W, T1.W, 0.0, literal.x, BS:VEC_120/SCL_212
4871; EG-NEXT:     LSHR * T1.W, T47.Z, literal.x,
4872; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4873; EG-NEXT:     BFE_INT T64.X, T48.Z, 0.0, literal.x,
4874; EG-NEXT:     BFE_INT T63.Y, PS, 0.0, literal.x,
4875; EG-NEXT:     BFE_INT T47.Z, T48.Y, 0.0, literal.x,
4876; EG-NEXT:     BFE_INT T46.W, T1.Z, 0.0, literal.x, BS:VEC_120/SCL_212
4877; EG-NEXT:     LSHR * T1.W, T47.X, literal.x,
4878; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4879; EG-NEXT:     BFE_INT T47.X, T48.X, 0.0, literal.x,
4880; EG-NEXT:     BFE_INT T46.Y, PS, 0.0, literal.x,
4881; EG-NEXT:     BFE_INT T65.Z, T41.W, 0.0, literal.x,
4882; EG-NEXT:     BFE_INT T64.W, T1.Y, 0.0, literal.x,
4883; EG-NEXT:     LSHR * T1.W, T48.Z, literal.x,
4884; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4885; EG-NEXT:     BFE_INT T65.X, T41.Z, 0.0, literal.x,
4886; EG-NEXT:     BFE_INT T64.Y, PS, 0.0, literal.x,
4887; EG-NEXT:     BFE_INT T48.Z, T41.Y, 0.0, literal.x,
4888; EG-NEXT:     BFE_INT T47.W, T0.W, 0.0, literal.x,
4889; EG-NEXT:     LSHR * T0.W, T48.X, literal.x,
4890; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4891; EG-NEXT:     BFE_INT T48.X, T41.X, 0.0, literal.x,
4892; EG-NEXT:     BFE_INT T47.Y, PS, 0.0, literal.x,
4893; EG-NEXT:     LSHR T1.Z, T41.Z, literal.x,
4894; EG-NEXT:     BFE_INT T65.W, T0.Z, 0.0, literal.x, BS:VEC_120/SCL_212
4895; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4896; EG-NEXT:    16(2.242078e-44), 240(3.363116e-43)
4897; EG-NEXT:     LSHR T66.X, PS, literal.x,
4898; EG-NEXT:     BFE_INT T65.Y, PV.Z, 0.0, literal.y,
4899; EG-NEXT:     LSHR T0.Z, T41.X, literal.y,
4900; EG-NEXT:     BFE_INT T48.W, T0.Y, 0.0, literal.y,
4901; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
4902; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4903; EG-NEXT:    224(3.138909e-43), 0(0.000000e+00)
4904; EG-NEXT:     LSHR T41.X, PS, literal.x,
4905; EG-NEXT:     BFE_INT * T48.Y, PV.Z, 0.0, literal.y,
4906; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4907;
4908; CM-LABEL: global_sextload_v64i16_to_v64i32:
4909; CM:       ; %bb.0:
4910; CM-NEXT:    ALU 0, @40, KC0[CB0:0-32], KC1[]
4911; CM-NEXT:    TEX 1 @24
4912; CM-NEXT:    ALU 15, @41, KC0[CB0:0-32], KC1[]
4913; CM-NEXT:    TEX 5 @28
4914; CM-NEXT:    ALU 82, @57, KC0[CB0:0-32], KC1[]
4915; CM-NEXT:    ALU 72, @140, KC0[CB0:0-32], KC1[]
4916; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T65, T66.X
4917; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T36, T37.X
4918; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T64, T56.X
4919; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T35, T55.X
4920; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T63, T54.X
4921; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T45, T53.X
4922; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T62, T52.X
4923; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T44, T51.X
4924; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T61, T50.X
4925; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T43, T49.X
4926; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T60, T48.X
4927; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T42, T47.X
4928; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T59, T46.X
4929; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T41, T40.X
4930; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T58, T39.X
4931; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T57, T38.X
4932; CM-NEXT:    CF_END
4933; CM-NEXT:    PAD
4934; CM-NEXT:    Fetch clause starting at 24:
4935; CM-NEXT:     VTX_READ_128 T36.XYZW, T35.X, 16, #1
4936; CM-NEXT:     VTX_READ_128 T37.XYZW, T35.X, 0, #1
4937; CM-NEXT:    Fetch clause starting at 28:
4938; CM-NEXT:     VTX_READ_128 T41.XYZW, T35.X, 112, #1
4939; CM-NEXT:     VTX_READ_128 T42.XYZW, T35.X, 96, #1
4940; CM-NEXT:     VTX_READ_128 T43.XYZW, T35.X, 80, #1
4941; CM-NEXT:     VTX_READ_128 T44.XYZW, T35.X, 64, #1
4942; CM-NEXT:     VTX_READ_128 T45.XYZW, T35.X, 48, #1
4943; CM-NEXT:     VTX_READ_128 T35.XYZW, T35.X, 32, #1
4944; CM-NEXT:    ALU clause starting at 40:
4945; CM-NEXT:     MOV * T35.X, KC0[2].Z,
4946; CM-NEXT:    ALU clause starting at 41:
4947; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
4948; CM-NEXT:    224(3.138909e-43), 0(0.000000e+00)
4949; CM-NEXT:     LSHR T38.X, PV.W, literal.x,
4950; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4951; CM-NEXT:    2(2.802597e-45), 240(3.363116e-43)
4952; CM-NEXT:     LSHR T39.X, PV.W, literal.x,
4953; CM-NEXT:     LSHR T0.Y, T37.Z, literal.y,
4954; CM-NEXT:     LSHR T0.Z, T37.W, literal.y,
4955; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
4956; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4957; CM-NEXT:    192(2.690493e-43), 0(0.000000e+00)
4958; CM-NEXT:     LSHR T40.X, PV.W, literal.x,
4959; CM-NEXT:     LSHR T1.Y, T37.Y, literal.y,
4960; CM-NEXT:     LSHR T1.Z, T36.Z, literal.y,
4961; CM-NEXT:     LSHR * T0.W, T36.W, literal.y,
4962; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4963; CM-NEXT:    ALU clause starting at 57:
4964; CM-NEXT:     LSHR T2.Z, T36.X, literal.x,
4965; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
4966; CM-NEXT:    16(2.242078e-44), 208(2.914701e-43)
4967; CM-NEXT:     LSHR T46.X, PV.W, literal.x,
4968; CM-NEXT:     LSHR T2.Y, T36.Y, literal.y,
4969; CM-NEXT:     LSHR T3.Z, T35.Z, literal.y,
4970; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
4971; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4972; CM-NEXT:    160(2.242078e-43), 0(0.000000e+00)
4973; CM-NEXT:     LSHR T47.X, PV.W, literal.x,
4974; CM-NEXT:     LSHR T3.Y, T35.W, literal.y,
4975; CM-NEXT:     LSHR T4.Z, T35.X, literal.y,
4976; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
4977; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4978; CM-NEXT:    176(2.466285e-43), 0(0.000000e+00)
4979; CM-NEXT:     LSHR T48.X, PV.W, literal.x,
4980; CM-NEXT:     LSHR T4.Y, T35.Y, literal.y,
4981; CM-NEXT:     LSHR T5.Z, T45.Z, literal.y,
4982; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
4983; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4984; CM-NEXT:    128(1.793662e-43), 0(0.000000e+00)
4985; CM-NEXT:     LSHR T49.X, PV.W, literal.x,
4986; CM-NEXT:     LSHR T5.Y, T45.W, literal.y,
4987; CM-NEXT:     LSHR T6.Z, T45.X, literal.y,
4988; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
4989; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4990; CM-NEXT:    144(2.017870e-43), 0(0.000000e+00)
4991; CM-NEXT:     LSHR T50.X, PV.W, literal.x,
4992; CM-NEXT:     LSHR T6.Y, T45.Y, literal.y,
4993; CM-NEXT:     LSHR T7.Z, T44.Z, literal.y,
4994; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
4995; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4996; CM-NEXT:    96(1.345247e-43), 0(0.000000e+00)
4997; CM-NEXT:     LSHR T51.X, PV.W, literal.x,
4998; CM-NEXT:     LSHR T7.Y, T44.W, literal.y,
4999; CM-NEXT:     LSHR T8.Z, T44.X, literal.y,
5000; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
5001; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5002; CM-NEXT:    112(1.569454e-43), 0(0.000000e+00)
5003; CM-NEXT:     LSHR T52.X, PV.W, literal.x,
5004; CM-NEXT:     LSHR T8.Y, T44.Y, literal.y,
5005; CM-NEXT:     LSHR T9.Z, T43.Z, literal.y,
5006; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
5007; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5008; CM-NEXT:    64(8.968310e-44), 0(0.000000e+00)
5009; CM-NEXT:     LSHR T53.X, PV.W, literal.x,
5010; CM-NEXT:     LSHR T9.Y, T43.W, literal.y,
5011; CM-NEXT:     LSHR T10.Z, T43.X, literal.y,
5012; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
5013; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5014; CM-NEXT:    80(1.121039e-43), 0(0.000000e+00)
5015; CM-NEXT:     LSHR T54.X, PV.W, literal.x,
5016; CM-NEXT:     LSHR T10.Y, T43.Y, literal.y,
5017; CM-NEXT:     LSHR T11.Z, T42.Z, literal.y,
5018; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
5019; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5020; CM-NEXT:    32(4.484155e-44), 0(0.000000e+00)
5021; CM-NEXT:     LSHR T55.X, PV.W, literal.x,
5022; CM-NEXT:     LSHR T11.Y, T42.W, literal.y,
5023; CM-NEXT:     LSHR T12.Z, T42.X, literal.y,
5024; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
5025; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5026; CM-NEXT:    48(6.726233e-44), 0(0.000000e+00)
5027; CM-NEXT:     LSHR T56.X, PV.W, literal.x,
5028; CM-NEXT:     LSHR T12.Y, T42.Y, literal.y,
5029; CM-NEXT:     BFE_INT T57.Z, T41.Y, 0.0, literal.y, BS:VEC_120/SCL_212
5030; CM-NEXT:     LSHR * T1.W, T41.Z, literal.y,
5031; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5032; CM-NEXT:     BFE_INT T57.X, T41.X, 0.0, literal.x,
5033; CM-NEXT:     LSHR T13.Y, T41.W, literal.x,
5034; CM-NEXT:     BFE_INT T58.Z, T41.W, 0.0, literal.x,
5035; CM-NEXT:     LSHR * T2.W, T41.Y, literal.x,
5036; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5037; CM-NEXT:     BFE_INT T58.X, T41.Z, 0.0, literal.x,
5038; CM-NEXT:     LSHR T14.Y, T41.X, literal.x,
5039; CM-NEXT:     BFE_INT T41.Z, T42.Y, 0.0, literal.x,
5040; CM-NEXT:     BFE_INT * T57.W, PV.W, 0.0, literal.x,
5041; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5042; CM-NEXT:     BFE_INT T41.X, T42.X, 0.0, literal.x,
5043; CM-NEXT:     BFE_INT T57.Y, PV.Y, 0.0, literal.x,
5044; CM-NEXT:     BFE_INT T59.Z, T42.W, 0.0, literal.x,
5045; CM-NEXT:     BFE_INT * T58.W, T13.Y, 0.0, literal.x,
5046; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5047; CM-NEXT:    ALU clause starting at 140:
5048; CM-NEXT:     BFE_INT T59.X, T42.Z, 0.0, literal.x,
5049; CM-NEXT:     BFE_INT T58.Y, T1.W, 0.0, literal.x,
5050; CM-NEXT:     BFE_INT T42.Z, T43.Y, 0.0, literal.x,
5051; CM-NEXT:     BFE_INT * T41.W, T12.Y, 0.0, literal.x, BS:VEC_120/SCL_212
5052; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5053; CM-NEXT:     BFE_INT T42.X, T43.X, 0.0, literal.x,
5054; CM-NEXT:     BFE_INT T41.Y, T12.Z, 0.0, literal.x,
5055; CM-NEXT:     BFE_INT T60.Z, T43.W, 0.0, literal.x,
5056; CM-NEXT:     BFE_INT * T59.W, T11.Y, 0.0, literal.x,
5057; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5058; CM-NEXT:     BFE_INT T60.X, T43.Z, 0.0, literal.x,
5059; CM-NEXT:     BFE_INT T59.Y, T11.Z, 0.0, literal.x, BS:VEC_120/SCL_212
5060; CM-NEXT:     BFE_INT T43.Z, T44.Y, 0.0, literal.x,
5061; CM-NEXT:     BFE_INT * T42.W, T10.Y, 0.0, literal.x, BS:VEC_120/SCL_212
5062; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5063; CM-NEXT:     BFE_INT T43.X, T44.X, 0.0, literal.x,
5064; CM-NEXT:     BFE_INT T42.Y, T10.Z, 0.0, literal.x,
5065; CM-NEXT:     BFE_INT T61.Z, T44.W, 0.0, literal.x,
5066; CM-NEXT:     BFE_INT * T60.W, T9.Y, 0.0, literal.x,
5067; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5068; CM-NEXT:     BFE_INT T61.X, T44.Z, 0.0, literal.x,
5069; CM-NEXT:     BFE_INT T60.Y, T9.Z, 0.0, literal.x, BS:VEC_120/SCL_212
5070; CM-NEXT:     BFE_INT T44.Z, T45.Y, 0.0, literal.x,
5071; CM-NEXT:     BFE_INT * T43.W, T8.Y, 0.0, literal.x, BS:VEC_120/SCL_212
5072; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5073; CM-NEXT:     BFE_INT T44.X, T45.X, 0.0, literal.x,
5074; CM-NEXT:     BFE_INT T43.Y, T8.Z, 0.0, literal.x,
5075; CM-NEXT:     BFE_INT T62.Z, T45.W, 0.0, literal.x,
5076; CM-NEXT:     BFE_INT * T61.W, T7.Y, 0.0, literal.x,
5077; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5078; CM-NEXT:     BFE_INT T62.X, T45.Z, 0.0, literal.x,
5079; CM-NEXT:     BFE_INT T61.Y, T7.Z, 0.0, literal.x, BS:VEC_120/SCL_212
5080; CM-NEXT:     BFE_INT T45.Z, T35.Y, 0.0, literal.x,
5081; CM-NEXT:     BFE_INT * T44.W, T6.Y, 0.0, literal.x, BS:VEC_120/SCL_212
5082; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5083; CM-NEXT:     BFE_INT T45.X, T35.X, 0.0, literal.x,
5084; CM-NEXT:     BFE_INT T44.Y, T6.Z, 0.0, literal.x,
5085; CM-NEXT:     BFE_INT T63.Z, T35.W, 0.0, literal.x,
5086; CM-NEXT:     BFE_INT * T62.W, T5.Y, 0.0, literal.x,
5087; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5088; CM-NEXT:     BFE_INT T63.X, T35.Z, 0.0, literal.x,
5089; CM-NEXT:     BFE_INT T62.Y, T5.Z, 0.0, literal.x, BS:VEC_120/SCL_212
5090; CM-NEXT:     BFE_INT T35.Z, T36.Y, 0.0, literal.x,
5091; CM-NEXT:     BFE_INT * T45.W, T4.Y, 0.0, literal.x, BS:VEC_120/SCL_212
5092; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5093; CM-NEXT:     BFE_INT T35.X, T36.X, 0.0, literal.x,
5094; CM-NEXT:     BFE_INT T45.Y, T4.Z, 0.0, literal.x,
5095; CM-NEXT:     BFE_INT T64.Z, T36.W, 0.0, literal.x,
5096; CM-NEXT:     BFE_INT * T63.W, T3.Y, 0.0, literal.x,
5097; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5098; CM-NEXT:     BFE_INT T64.X, T36.Z, 0.0, literal.x,
5099; CM-NEXT:     BFE_INT T63.Y, T3.Z, 0.0, literal.x, BS:VEC_120/SCL_212
5100; CM-NEXT:     BFE_INT T36.Z, T37.Y, 0.0, literal.x,
5101; CM-NEXT:     BFE_INT * T35.W, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212
5102; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5103; CM-NEXT:     BFE_INT T36.X, T37.X, 0.0, literal.x,
5104; CM-NEXT:     BFE_INT T35.Y, T2.Z, 0.0, literal.x,
5105; CM-NEXT:     BFE_INT T65.Z, T37.W, 0.0, literal.x,
5106; CM-NEXT:     BFE_INT * T64.W, T0.W, 0.0, literal.x, BS:VEC_120/SCL_212
5107; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5108; CM-NEXT:     BFE_INT T65.X, T37.Z, 0.0, literal.x,
5109; CM-NEXT:     BFE_INT T64.Y, T1.Z, 0.0, literal.x, BS:VEC_120/SCL_212
5110; CM-NEXT:     LSHR T1.Z, T37.X, literal.x,
5111; CM-NEXT:     BFE_INT * T36.W, T1.Y, 0.0, literal.x,
5112; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5113; CM-NEXT:     LSHR T37.X, KC0[2].Y, literal.x,
5114; CM-NEXT:     BFE_INT T36.Y, PV.Z, 0.0, literal.y,
5115; CM-NEXT:     ADD_INT T1.Z, KC0[2].Y, literal.y,
5116; CM-NEXT:     BFE_INT * T65.W, T0.Z, 0.0, literal.y,
5117; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5118; CM-NEXT:     LSHR T66.X, PV.Z, literal.x,
5119; CM-NEXT:     BFE_INT * T65.Y, T0.Y, 0.0, literal.y,
5120; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5121  %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
5122  %ext = sext <64 x i16> %load to <64 x i32>
5123  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
5124  ret void
5125}
5126
5127define amdgpu_kernel void @global_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
5128; GCN-NOHSA-SI-LABEL: global_zextload_i16_to_i64:
5129; GCN-NOHSA-SI:       ; %bb.0:
5130; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5131; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
5132; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
5133; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
5134; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
5135; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5136; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
5137; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
5138; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
5139; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
5140; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
5141; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
5142; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
5143; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5144; GCN-NOHSA-SI-NEXT:    s_endpgm
5145;
5146; GCN-HSA-LABEL: global_zextload_i16_to_i64:
5147; GCN-HSA:       ; %bb.0:
5148; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5149; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5150; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
5151; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
5152; GCN-HSA-NEXT:    flat_load_ushort v2, v[2:3]
5153; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
5154; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
5155; GCN-HSA-NEXT:    v_mov_b32_e32 v3, 0
5156; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
5157; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
5158; GCN-HSA-NEXT:    s_endpgm
5159;
5160; GCN-NOHSA-VI-LABEL: global_zextload_i16_to_i64:
5161; GCN-NOHSA-VI:       ; %bb.0:
5162; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
5163; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
5164; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
5165; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
5166; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
5167; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5168; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
5169; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
5170; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
5171; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
5172; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
5173; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
5174; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
5175; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
5176; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
5177; GCN-NOHSA-VI-NEXT:    s_endpgm
5178;
5179; EG-LABEL: global_zextload_i16_to_i64:
5180; EG:       ; %bb.0:
5181; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5182; EG-NEXT:    TEX 0 @6
5183; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
5184; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
5185; EG-NEXT:    CF_END
5186; EG-NEXT:    PAD
5187; EG-NEXT:    Fetch clause starting at 6:
5188; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
5189; EG-NEXT:    ALU clause starting at 8:
5190; EG-NEXT:     MOV * T0.X, KC0[2].Z,
5191; EG-NEXT:    ALU clause starting at 9:
5192; EG-NEXT:     MOV * T0.Y, 0.0,
5193; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
5194; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5195;
5196; CM-LABEL: global_zextload_i16_to_i64:
5197; CM:       ; %bb.0:
5198; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5199; CM-NEXT:    TEX 0 @6
5200; CM-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
5201; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
5202; CM-NEXT:    CF_END
5203; CM-NEXT:    PAD
5204; CM-NEXT:    Fetch clause starting at 6:
5205; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
5206; CM-NEXT:    ALU clause starting at 8:
5207; CM-NEXT:     MOV * T0.X, KC0[2].Z,
5208; CM-NEXT:    ALU clause starting at 9:
5209; CM-NEXT:     MOV * T0.Y, 0.0,
5210; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
5211; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5212  %a = load i16, i16 addrspace(1)* %in
5213  %ext = zext i16 %a to i64
5214  store i64 %ext, i64 addrspace(1)* %out
5215  ret void
5216}
5217
5218; FIXME: Need to optimize this sequence to avoid extra bfe:
5219;  t28: i32,ch = load<LD2[%in(addrspace=1)], anyext from i16> t12, t27, undef:i64
5220;          t31: i64 = any_extend t28
5221;        t33: i64 = sign_extend_inreg t31, ValueType:ch:i16
5222
5223; TODO: These could be expanded earlier using ASHR 15
5224define amdgpu_kernel void @global_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
5225; GCN-NOHSA-SI-LABEL: global_sextload_i16_to_i64:
5226; GCN-NOHSA-SI:       ; %bb.0:
5227; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5228; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
5229; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
5230; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
5231; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
5232; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5233; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
5234; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
5235; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
5236; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
5237; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
5238; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
5239; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5240; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5241; GCN-NOHSA-SI-NEXT:    s_endpgm
5242;
5243; GCN-HSA-LABEL: global_sextload_i16_to_i64:
5244; GCN-HSA:       ; %bb.0:
5245; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5246; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5247; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
5248; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
5249; GCN-HSA-NEXT:    flat_load_sshort v2, v[2:3]
5250; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
5251; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
5252; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
5253; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5254; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
5255; GCN-HSA-NEXT:    s_endpgm
5256;
5257; GCN-NOHSA-VI-LABEL: global_sextload_i16_to_i64:
5258; GCN-NOHSA-VI:       ; %bb.0:
5259; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
5260; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
5261; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
5262; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5263; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
5264; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
5265; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s6
5266; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s7
5267; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s2
5268; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, s3
5269; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
5270; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
5271; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v0, 0, 16
5272; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5273; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
5274; GCN-NOHSA-VI-NEXT:    s_endpgm
5275;
5276; EG-LABEL: global_sextload_i16_to_i64:
5277; EG:       ; %bb.0:
5278; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5279; EG-NEXT:    TEX 0 @6
5280; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
5281; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
5282; EG-NEXT:    CF_END
5283; EG-NEXT:    PAD
5284; EG-NEXT:    Fetch clause starting at 6:
5285; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
5286; EG-NEXT:    ALU clause starting at 8:
5287; EG-NEXT:     MOV * T0.X, KC0[2].Z,
5288; EG-NEXT:    ALU clause starting at 9:
5289; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
5290; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
5291; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
5292; EG-NEXT:     ASHR * T0.Y, PV.X, literal.x,
5293; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5294;
5295; CM-LABEL: global_sextload_i16_to_i64:
5296; CM:       ; %bb.0:
5297; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5298; CM-NEXT:    TEX 0 @6
5299; CM-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
5300; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
5301; CM-NEXT:    CF_END
5302; CM-NEXT:    PAD
5303; CM-NEXT:    Fetch clause starting at 6:
5304; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
5305; CM-NEXT:    ALU clause starting at 8:
5306; CM-NEXT:     MOV * T0.X, KC0[2].Z,
5307; CM-NEXT:    ALU clause starting at 9:
5308; CM-NEXT:     BFE_INT * T0.X, T0.X, 0.0, literal.x,
5309; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5310; CM-NEXT:     LSHR T1.X, KC0[2].Y, literal.x,
5311; CM-NEXT:     ASHR * T0.Y, PV.X, literal.y,
5312; CM-NEXT:    2(2.802597e-45), 31(4.344025e-44)
5313  %a = load i16, i16 addrspace(1)* %in
5314  %ext = sext i16 %a to i64
5315  store i64 %ext, i64 addrspace(1)* %out
5316  ret void
5317}
5318
5319define amdgpu_kernel void @global_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
5320; GCN-NOHSA-SI-LABEL: global_zextload_v1i16_to_v1i64:
5321; GCN-NOHSA-SI:       ; %bb.0:
5322; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5323; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
5324; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
5325; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
5326; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
5327; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5328; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
5329; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
5330; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
5331; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
5332; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
5333; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
5334; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
5335; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5336; GCN-NOHSA-SI-NEXT:    s_endpgm
5337;
5338; GCN-HSA-LABEL: global_zextload_v1i16_to_v1i64:
5339; GCN-HSA:       ; %bb.0:
5340; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5341; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5342; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
5343; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
5344; GCN-HSA-NEXT:    flat_load_ushort v2, v[2:3]
5345; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
5346; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
5347; GCN-HSA-NEXT:    v_mov_b32_e32 v3, 0
5348; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
5349; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
5350; GCN-HSA-NEXT:    s_endpgm
5351;
5352; GCN-NOHSA-VI-LABEL: global_zextload_v1i16_to_v1i64:
5353; GCN-NOHSA-VI:       ; %bb.0:
5354; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
5355; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
5356; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
5357; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
5358; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
5359; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5360; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
5361; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
5362; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
5363; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
5364; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
5365; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
5366; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
5367; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
5368; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
5369; GCN-NOHSA-VI-NEXT:    s_endpgm
5370;
5371; EG-LABEL: global_zextload_v1i16_to_v1i64:
5372; EG:       ; %bb.0:
5373; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5374; EG-NEXT:    TEX 0 @6
5375; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
5376; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
5377; EG-NEXT:    CF_END
5378; EG-NEXT:    PAD
5379; EG-NEXT:    Fetch clause starting at 6:
5380; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
5381; EG-NEXT:    ALU clause starting at 8:
5382; EG-NEXT:     MOV * T0.X, KC0[2].Z,
5383; EG-NEXT:    ALU clause starting at 9:
5384; EG-NEXT:     MOV * T0.Y, 0.0,
5385; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
5386; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5387;
5388; CM-LABEL: global_zextload_v1i16_to_v1i64:
5389; CM:       ; %bb.0:
5390; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5391; CM-NEXT:    TEX 0 @6
5392; CM-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
5393; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
5394; CM-NEXT:    CF_END
5395; CM-NEXT:    PAD
5396; CM-NEXT:    Fetch clause starting at 6:
5397; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
5398; CM-NEXT:    ALU clause starting at 8:
5399; CM-NEXT:     MOV * T0.X, KC0[2].Z,
5400; CM-NEXT:    ALU clause starting at 9:
5401; CM-NEXT:     MOV * T0.Y, 0.0,
5402; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
5403; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5404  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
5405  %ext = zext <1 x i16> %load to <1 x i64>
5406  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
5407  ret void
5408}
5409
5410; TODO: These could be expanded earlier using ASHR 15
5411define amdgpu_kernel void @global_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
5412; GCN-NOHSA-SI-LABEL: global_sextload_v1i16_to_v1i64:
5413; GCN-NOHSA-SI:       ; %bb.0:
5414; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5415; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
5416; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
5417; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
5418; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
5419; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5420; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
5421; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
5422; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
5423; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
5424; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
5425; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
5426; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5427; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5428; GCN-NOHSA-SI-NEXT:    s_endpgm
5429;
5430; GCN-HSA-LABEL: global_sextload_v1i16_to_v1i64:
5431; GCN-HSA:       ; %bb.0:
5432; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5433; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5434; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
5435; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
5436; GCN-HSA-NEXT:    flat_load_sshort v2, v[2:3]
5437; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
5438; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
5439; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
5440; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5441; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
5442; GCN-HSA-NEXT:    s_endpgm
5443;
5444; GCN-NOHSA-VI-LABEL: global_sextload_v1i16_to_v1i64:
5445; GCN-NOHSA-VI:       ; %bb.0:
5446; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
5447; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
5448; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
5449; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5450; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
5451; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
5452; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s6
5453; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s7
5454; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s2
5455; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, s3
5456; GCN-NOHSA-VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
5457; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
5458; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v0, 0, 16
5459; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5460; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
5461; GCN-NOHSA-VI-NEXT:    s_endpgm
5462;
5463; EG-LABEL: global_sextload_v1i16_to_v1i64:
5464; EG:       ; %bb.0:
5465; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5466; EG-NEXT:    TEX 0 @6
5467; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
5468; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
5469; EG-NEXT:    CF_END
5470; EG-NEXT:    PAD
5471; EG-NEXT:    Fetch clause starting at 6:
5472; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
5473; EG-NEXT:    ALU clause starting at 8:
5474; EG-NEXT:     MOV * T0.X, KC0[2].Z,
5475; EG-NEXT:    ALU clause starting at 9:
5476; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
5477; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
5478; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
5479; EG-NEXT:     ASHR * T0.Y, PV.X, literal.x,
5480; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5481;
5482; CM-LABEL: global_sextload_v1i16_to_v1i64:
5483; CM:       ; %bb.0:
5484; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5485; CM-NEXT:    TEX 0 @6
5486; CM-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
5487; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
5488; CM-NEXT:    CF_END
5489; CM-NEXT:    PAD
5490; CM-NEXT:    Fetch clause starting at 6:
5491; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
5492; CM-NEXT:    ALU clause starting at 8:
5493; CM-NEXT:     MOV * T0.X, KC0[2].Z,
5494; CM-NEXT:    ALU clause starting at 9:
5495; CM-NEXT:     BFE_INT * T0.X, T0.X, 0.0, literal.x,
5496; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5497; CM-NEXT:     LSHR T1.X, KC0[2].Y, literal.x,
5498; CM-NEXT:     ASHR * T0.Y, PV.X, literal.y,
5499; CM-NEXT:    2(2.802597e-45), 31(4.344025e-44)
5500  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
5501  %ext = sext <1 x i16> %load to <1 x i64>
5502  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
5503  ret void
5504}
5505
5506define amdgpu_kernel void @global_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
5507; GCN-NOHSA-SI-LABEL: global_zextload_v2i16_to_v2i64:
5508; GCN-NOHSA-SI:       ; %bb.0:
5509; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5510; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
5511; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
5512; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
5513; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
5514; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5515; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
5516; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
5517; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
5518; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
5519; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
5520; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
5521; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
5522; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
5523; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
5524; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
5525; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
5526; GCN-NOHSA-SI-NEXT:    s_endpgm
5527;
5528; GCN-HSA-LABEL: global_zextload_v2i16_to_v2i64:
5529; GCN-HSA:       ; %bb.0:
5530; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5531; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5532; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5533; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5534; GCN-HSA-NEXT:    flat_load_dword v0, v[0:1]
5535; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
5536; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
5537; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
5538; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
5539; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
5540; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
5541; GCN-HSA-NEXT:    v_and_b32_e32 v0, 0xffff, v0
5542; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5543; GCN-HSA-NEXT:    s_endpgm
5544;
5545; GCN-NOHSA-VI-LABEL: global_zextload_v2i16_to_v2i64:
5546; GCN-NOHSA-VI:       ; %bb.0:
5547; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
5548; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
5549; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
5550; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
5551; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
5552; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5553; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
5554; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
5555; GCN-NOHSA-VI-NEXT:    buffer_load_dword v2, off, s[8:11], 0
5556; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
5557; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
5558; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
5559; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
5560; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
5561; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v2
5562; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
5563; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5564; GCN-NOHSA-VI-NEXT:    s_endpgm
5565;
5566; EG-LABEL: global_zextload_v2i16_to_v2i64:
5567; EG:       ; %bb.0:
5568; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5569; EG-NEXT:    TEX 0 @6
5570; EG-NEXT:    ALU 6, @9, KC0[CB0:0-32], KC1[]
5571; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1
5572; EG-NEXT:    CF_END
5573; EG-NEXT:    PAD
5574; EG-NEXT:    Fetch clause starting at 6:
5575; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
5576; EG-NEXT:    ALU clause starting at 8:
5577; EG-NEXT:     MOV * T4.X, KC0[2].Z,
5578; EG-NEXT:    ALU clause starting at 9:
5579; EG-NEXT:     LSHR * T4.Z, T4.X, literal.x,
5580; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5581; EG-NEXT:     AND_INT T4.X, T4.X, literal.x,
5582; EG-NEXT:     MOV T4.Y, 0.0,
5583; EG-NEXT:     MOV T4.W, 0.0,
5584; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
5585; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
5586;
5587; CM-LABEL: global_zextload_v2i16_to_v2i64:
5588; CM:       ; %bb.0:
5589; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5590; CM-NEXT:    TEX 0 @6
5591; CM-NEXT:    ALU 7, @9, KC0[CB0:0-32], KC1[]
5592; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T4, T5.X
5593; CM-NEXT:    CF_END
5594; CM-NEXT:    PAD
5595; CM-NEXT:    Fetch clause starting at 6:
5596; CM-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
5597; CM-NEXT:    ALU clause starting at 8:
5598; CM-NEXT:     MOV * T4.X, KC0[2].Z,
5599; CM-NEXT:    ALU clause starting at 9:
5600; CM-NEXT:     LSHR * T4.Z, T4.X, literal.x,
5601; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5602; CM-NEXT:     AND_INT T4.X, T4.X, literal.x,
5603; CM-NEXT:     MOV T4.Y, 0.0,
5604; CM-NEXT:     MOV * T4.W, 0.0,
5605; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
5606; CM-NEXT:     LSHR * T5.X, KC0[2].Y, literal.x,
5607; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5608  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
5609  %ext = zext <2 x i16> %load to <2 x i64>
5610  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
5611  ret void
5612}
5613
5614define amdgpu_kernel void @global_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
5615; GCN-NOHSA-SI-LABEL: global_sextload_v2i16_to_v2i64:
5616; GCN-NOHSA-SI:       ; %bb.0:
5617; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5618; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
5619; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
5620; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
5621; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
5622; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5623; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
5624; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
5625; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
5626; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
5627; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
5628; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
5629; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
5630; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v0, 0, 16
5631; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5632; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v2, 0, 16
5633; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5634; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
5635; GCN-NOHSA-SI-NEXT:    s_endpgm
5636;
5637; GCN-HSA-LABEL: global_sextload_v2i16_to_v2i64:
5638; GCN-HSA:       ; %bb.0:
5639; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5640; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5641; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5642; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5643; GCN-HSA-NEXT:    flat_load_dword v0, v[0:1]
5644; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
5645; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
5646; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
5647; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
5648; GCN-HSA-NEXT:    v_bfe_i32 v0, v0, 0, 16
5649; GCN-HSA-NEXT:    v_bfe_i32 v2, v2, 0, 16
5650; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5651; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5652; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5653; GCN-HSA-NEXT:    s_endpgm
5654;
5655; GCN-NOHSA-VI-LABEL: global_sextload_v2i16_to_v2i64:
5656; GCN-NOHSA-VI:       ; %bb.0:
5657; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
5658; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
5659; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
5660; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
5661; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
5662; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5663; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
5664; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
5665; GCN-NOHSA-VI-NEXT:    buffer_load_dword v1, off, s[8:11], 0
5666; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
5667; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
5668; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
5669; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
5670; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v1, 0, 16
5671; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v2, v2, 0, 16
5672; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5673; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5674; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5675; GCN-NOHSA-VI-NEXT:    s_endpgm
5676;
5677; EG-LABEL: global_sextload_v2i16_to_v2i64:
5678; EG:       ; %bb.0:
5679; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5680; EG-NEXT:    TEX 0 @6
5681; EG-NEXT:    ALU 8, @9, KC0[CB0:0-32], KC1[]
5682; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1
5683; EG-NEXT:    CF_END
5684; EG-NEXT:    PAD
5685; EG-NEXT:    Fetch clause starting at 6:
5686; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
5687; EG-NEXT:    ALU clause starting at 8:
5688; EG-NEXT:     MOV * T4.X, KC0[2].Z,
5689; EG-NEXT:    ALU clause starting at 9:
5690; EG-NEXT:     ASHR * T4.W, T4.X, literal.x,
5691; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5692; EG-NEXT:     ASHR * T4.Z, T4.X, literal.x,
5693; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5694; EG-NEXT:     BFE_INT T4.X, T4.X, 0.0, literal.x,
5695; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
5696; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
5697; EG-NEXT:     ASHR * T4.Y, PV.X, literal.x,
5698; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5699;
5700; CM-LABEL: global_sextload_v2i16_to_v2i64:
5701; CM:       ; %bb.0:
5702; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5703; CM-NEXT:    TEX 0 @6
5704; CM-NEXT:    ALU 8, @9, KC0[CB0:0-32], KC1[]
5705; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T4, T5.X
5706; CM-NEXT:    CF_END
5707; CM-NEXT:    PAD
5708; CM-NEXT:    Fetch clause starting at 6:
5709; CM-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
5710; CM-NEXT:    ALU clause starting at 8:
5711; CM-NEXT:     MOV * T4.X, KC0[2].Z,
5712; CM-NEXT:    ALU clause starting at 9:
5713; CM-NEXT:     ASHR * T4.W, T4.X, literal.x,
5714; CM-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5715; CM-NEXT:     ASHR * T4.Z, T4.X, literal.x,
5716; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5717; CM-NEXT:     BFE_INT * T4.X, T4.X, 0.0, literal.x,
5718; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5719; CM-NEXT:     LSHR T5.X, KC0[2].Y, literal.x,
5720; CM-NEXT:     ASHR * T4.Y, PV.X, literal.y,
5721; CM-NEXT:    2(2.802597e-45), 31(4.344025e-44)
5722  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
5723  %ext = sext <2 x i16> %load to <2 x i64>
5724  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
5725  ret void
5726}
5727
5728define amdgpu_kernel void @global_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
5729; GCN-NOHSA-SI-LABEL: global_zextload_v4i16_to_v4i64:
5730; GCN-NOHSA-SI:       ; %bb.0:
5731; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5732; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
5733; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
5734; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
5735; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
5736; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5737; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
5738; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
5739; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx2 v[8:9], off, s[8:11], 0
5740; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
5741; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, 0xffff
5742; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
5743; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, v1
5744; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, v1
5745; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
5746; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
5747; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
5748; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v9
5749; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v6, 16, v8
5750; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v4, s2, v8
5751; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, s2, v9
5752; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16
5753; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
5754; GCN-NOHSA-SI-NEXT:    s_endpgm
5755;
5756; GCN-HSA-LABEL: global_zextload_v4i16_to_v4i64:
5757; GCN-HSA:       ; %bb.0:
5758; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5759; GCN-HSA-NEXT:    s_mov_b32 s4, 0xffff
5760; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5761; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5762; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5763; GCN-HSA-NEXT:    flat_load_dwordx2 v[8:9], v[0:1]
5764; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
5765; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5766; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
5767; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
5768; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s1
5769; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
5770; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
5771; GCN-HSA-NEXT:    v_mov_b32_e32 v5, v1
5772; GCN-HSA-NEXT:    v_mov_b32_e32 v7, v1
5773; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s0
5774; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
5775; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v2, 16, v9
5776; GCN-HSA-NEXT:    v_and_b32_e32 v0, s4, v9
5777; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v6, 16, v8
5778; GCN-HSA-NEXT:    v_and_b32_e32 v4, s4, v8
5779; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[0:3]
5780; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
5781; GCN-HSA-NEXT:    s_endpgm
5782;
5783; GCN-NOHSA-VI-LABEL: global_zextload_v4i16_to_v4i64:
5784; GCN-NOHSA-VI:       ; %bb.0:
5785; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
5786; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
5787; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
5788; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
5789; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
5790; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5791; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
5792; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
5793; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx2 v[8:9], off, s[8:11], 0
5794; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, 0xffff
5795; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, 0
5796; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v7, 0
5797; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
5798; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
5799; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, 0
5800; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, v5
5801; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
5802; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v4, s6, v9
5803; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v6, 16, v9
5804; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, s6, v8
5805; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v2, 16, v8
5806; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
5807; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5808; GCN-NOHSA-VI-NEXT:    s_endpgm
5809;
5810; EG-LABEL: global_zextload_v4i16_to_v4i64:
5811; EG:       ; %bb.0:
5812; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5813; EG-NEXT:    TEX 0 @6
5814; EG-NEXT:    ALU 18, @9, KC0[CB0:0-32], KC1[]
5815; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T8.X, 0
5816; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T7.X, 1
5817; EG-NEXT:    CF_END
5818; EG-NEXT:    Fetch clause starting at 6:
5819; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
5820; EG-NEXT:    ALU clause starting at 8:
5821; EG-NEXT:     MOV * T5.X, KC0[2].Z,
5822; EG-NEXT:    ALU clause starting at 9:
5823; EG-NEXT:     MOV T2.X, T5.X,
5824; EG-NEXT:     MOV * T3.X, T5.Y,
5825; EG-NEXT:     MOV T0.Y, PV.X,
5826; EG-NEXT:     MOV * T0.Z, PS,
5827; EG-NEXT:     LSHR * T5.Z, PV.Z, literal.x,
5828; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5829; EG-NEXT:     AND_INT T5.X, T0.Z, literal.x,
5830; EG-NEXT:     MOV T5.Y, 0.0,
5831; EG-NEXT:     LSHR T6.Z, T0.Y, literal.y,
5832; EG-NEXT:     AND_INT * T6.X, T0.Y, literal.x,
5833; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5834; EG-NEXT:     MOV T6.Y, 0.0,
5835; EG-NEXT:     MOV T5.W, 0.0,
5836; EG-NEXT:     MOV * T6.W, 0.0,
5837; EG-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
5838; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5839; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5840; EG-NEXT:     LSHR * T8.X, PV.W, literal.x,
5841; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5842;
5843; CM-LABEL: global_zextload_v4i16_to_v4i64:
5844; CM:       ; %bb.0:
5845; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5846; CM-NEXT:    TEX 0 @6
5847; CM-NEXT:    ALU 20, @9, KC0[CB0:0-32], KC1[]
5848; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T5, T8.X
5849; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T6, T7.X
5850; CM-NEXT:    CF_END
5851; CM-NEXT:    Fetch clause starting at 6:
5852; CM-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
5853; CM-NEXT:    ALU clause starting at 8:
5854; CM-NEXT:     MOV * T5.X, KC0[2].Z,
5855; CM-NEXT:    ALU clause starting at 9:
5856; CM-NEXT:     MOV * T2.X, T5.X,
5857; CM-NEXT:     MOV * T3.X, T5.Y,
5858; CM-NEXT:     MOV T0.Y, PV.X,
5859; CM-NEXT:     MOV * T0.Z, T2.X,
5860; CM-NEXT:     LSHR * T5.Z, PV.Z, literal.x,
5861; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5862; CM-NEXT:     AND_INT T5.X, T0.Z, literal.x,
5863; CM-NEXT:     MOV T5.Y, 0.0,
5864; CM-NEXT:     LSHR * T6.Z, T0.Y, literal.y,
5865; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5866; CM-NEXT:     AND_INT T6.X, T0.Y, literal.x,
5867; CM-NEXT:     MOV T6.Y, 0.0,
5868; CM-NEXT:     MOV * T5.W, 0.0,
5869; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
5870; CM-NEXT:     MOV * T6.W, 0.0,
5871; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
5872; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5873; CM-NEXT:     LSHR * T7.X, PV.W, literal.x,
5874; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5875; CM-NEXT:     LSHR * T8.X, KC0[2].Y, literal.x,
5876; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5877  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
5878  %ext = zext <4 x i16> %load to <4 x i64>
5879  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
5880  ret void
5881}
5882
5883define amdgpu_kernel void @global_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
5884; GCN-NOHSA-SI-LABEL: global_sextload_v4i16_to_v4i64:
5885; GCN-NOHSA-SI:       ; %bb.0:
5886; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
5887; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
5888; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
5889; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
5890; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
5891; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5892; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
5893; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
5894; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx2 v[1:2], off, s[8:11], 0
5895; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
5896; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
5897; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
5898; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v2
5899; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
5900; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v1, 0, 16
5901; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[6:7], v[1:2], 48
5902; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v4, v3, 0, 16
5903; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5904; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v5, 0, 16
5905; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
5906; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5907; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
5908; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
5909; GCN-NOHSA-SI-NEXT:    s_endpgm
5910;
5911; GCN-HSA-LABEL: global_sextload_v4i16_to_v4i64:
5912; GCN-HSA:       ; %bb.0:
5913; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
5914; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5915; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5916; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5917; GCN-HSA-NEXT:    flat_load_dwordx2 v[1:2], v[0:1]
5918; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
5919; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5920; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s3
5921; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s1
5922; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s2
5923; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s0
5924; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
5925; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v2
5926; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
5927; GCN-HSA-NEXT:    v_ashr_i64 v[6:7], v[1:2], 48
5928; GCN-HSA-NEXT:    v_bfe_i32 v2, v4, 0, 16
5929; GCN-HSA-NEXT:    v_bfe_i32 v4, v3, 0, 16
5930; GCN-HSA-NEXT:    v_bfe_i32 v0, v1, 0, 16
5931; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
5932; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5933; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5934; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
5935; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
5936; GCN-HSA-NEXT:    s_endpgm
5937;
5938; GCN-NOHSA-VI-LABEL: global_sextload_v4i16_to_v4i64:
5939; GCN-NOHSA-VI:       ; %bb.0:
5940; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
5941; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
5942; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
5943; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
5944; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
5945; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5946; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
5947; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
5948; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx2 v[1:2], off, s[8:11], 0
5949; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
5950; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
5951; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
5952; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, v2
5953; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
5954; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
5955; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v6, v5, 0, 16
5956; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v4, v4, 0, 16
5957; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v1, 0, 16
5958; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v2, v3, 0, 16
5959; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
5960; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
5961; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5962; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5963; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
5964; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5965; GCN-NOHSA-VI-NEXT:    s_endpgm
5966;
5967; EG-LABEL: global_sextload_v4i16_to_v4i64:
5968; EG:       ; %bb.0:
5969; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5970; EG-NEXT:    TEX 0 @6
5971; EG-NEXT:    ALU 20, @9, KC0[CB0:0-32], KC1[]
5972; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 0
5973; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
5974; EG-NEXT:    CF_END
5975; EG-NEXT:    Fetch clause starting at 6:
5976; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
5977; EG-NEXT:    ALU clause starting at 8:
5978; EG-NEXT:     MOV * T5.X, KC0[2].Z,
5979; EG-NEXT:    ALU clause starting at 9:
5980; EG-NEXT:     MOV T2.X, T5.X,
5981; EG-NEXT:     MOV * T3.X, T5.Y,
5982; EG-NEXT:     MOV T0.Y, PS,
5983; EG-NEXT:     MOV * T0.Z, PV.X,
5984; EG-NEXT:     ASHR * T5.W, PV.Z, literal.x,
5985; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5986; EG-NEXT:     LSHR T6.X, KC0[2].Y, literal.x,
5987; EG-NEXT:     ASHR T5.Z, T0.Z, literal.y,
5988; EG-NEXT:     ASHR * T7.W, T0.Y, literal.z,
5989; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5990; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5991; EG-NEXT:     BFE_INT T5.X, T0.Z, 0.0, literal.x,
5992; EG-NEXT:     ASHR * T7.Z, T0.Y, literal.x,
5993; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5994; EG-NEXT:     BFE_INT T7.X, T0.Y, 0.0, literal.x,
5995; EG-NEXT:     ASHR T5.Y, PV.X, literal.y,
5996; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
5997; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
5998; EG-NEXT:     LSHR T8.X, PV.W, literal.x,
5999; EG-NEXT:     ASHR * T7.Y, PV.X, literal.y,
6000; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
6001;
6002; CM-LABEL: global_sextload_v4i16_to_v4i64:
6003; CM:       ; %bb.0:
6004; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
6005; CM-NEXT:    TEX 0 @6
6006; CM-NEXT:    ALU 20, @9, KC0[CB0:0-32], KC1[]
6007; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T7, T8.X
6008; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T5, T6.X
6009; CM-NEXT:    CF_END
6010; CM-NEXT:    Fetch clause starting at 6:
6011; CM-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
6012; CM-NEXT:    ALU clause starting at 8:
6013; CM-NEXT:     MOV * T5.X, KC0[2].Z,
6014; CM-NEXT:    ALU clause starting at 9:
6015; CM-NEXT:     MOV * T2.X, T5.X,
6016; CM-NEXT:     MOV T3.X, T5.Y,
6017; CM-NEXT:     MOV * T0.Y, PV.X,
6018; CM-NEXT:     MOV * T0.Z, PV.X,
6019; CM-NEXT:     ADD_INT T1.Z, KC0[2].Y, literal.x,
6020; CM-NEXT:     ASHR * T5.W, PV.Z, literal.y,
6021; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6022; CM-NEXT:     LSHR T6.X, PV.Z, literal.x,
6023; CM-NEXT:     ASHR T5.Z, T0.Z, literal.y,
6024; CM-NEXT:     ASHR * T7.W, T0.Y, literal.z,
6025; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6026; CM-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6027; CM-NEXT:     BFE_INT T5.X, T0.Z, 0.0, literal.x,
6028; CM-NEXT:     ASHR * T7.Z, T0.Y, literal.x,
6029; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6030; CM-NEXT:     BFE_INT T7.X, T0.Y, 0.0, literal.x,
6031; CM-NEXT:     ASHR * T5.Y, PV.X, literal.y,
6032; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6033; CM-NEXT:     LSHR T8.X, KC0[2].Y, literal.x,
6034; CM-NEXT:     ASHR * T7.Y, PV.X, literal.y,
6035; CM-NEXT:    2(2.802597e-45), 31(4.344025e-44)
6036  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
6037  %ext = sext <4 x i16> %load to <4 x i64>
6038  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
6039  ret void
6040}
6041
6042define amdgpu_kernel void @global_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
6043; GCN-NOHSA-SI-LABEL: global_zextload_v8i16_to_v8i64:
6044; GCN-NOHSA-SI:       ; %bb.0:
6045; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
6046; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
6047; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
6048; GCN-NOHSA-SI-NEXT:    s_mov_b32 s12, 0xffff
6049; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
6050; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
6051; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6052; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
6053; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
6054; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
6055; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, 0
6056; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, 0
6057; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, 0
6058; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, v9
6059; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, v9
6060; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, v9
6061; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, v9
6062; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, v9
6063; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
6064; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
6065; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
6066; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v18, 16, v1
6067; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v10, 16, v3
6068; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v14, 16, v2
6069; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
6070; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v4, s12, v0
6071; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v12, s12, v2
6072; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v16, s12, v1
6073; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v8, s12, v3
6074; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:48
6075; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0 offset:16
6076; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0 offset:32
6077; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
6078; GCN-NOHSA-SI-NEXT:    s_endpgm
6079;
6080; GCN-HSA-LABEL: global_zextload_v8i16_to_v8i64:
6081; GCN-HSA:       ; %bb.0:
6082; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
6083; GCN-HSA-NEXT:    v_mov_b32_e32 v12, 0
6084; GCN-HSA-NEXT:    s_mov_b32 s4, 0xffff
6085; GCN-HSA-NEXT:    v_mov_b32_e32 v14, v12
6086; GCN-HSA-NEXT:    v_mov_b32_e32 v15, v12
6087; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6088; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
6089; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
6090; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
6091; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
6092; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6093; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6094; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6095; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
6096; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6097; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s1
6098; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s0
6099; GCN-HSA-NEXT:    s_add_u32 s0, s0, 32
6100; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s3
6101; GCN-HSA-NEXT:    v_mov_b32_e32 v8, v12
6102; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s2
6103; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
6104; GCN-HSA-NEXT:    v_mov_b32_e32 v6, 0
6105; GCN-HSA-NEXT:    v_mov_b32_e32 v10, 0
6106; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
6107; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v13, 16, v3
6108; GCN-HSA-NEXT:    v_and_b32_e32 v11, s4, v3
6109; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[11:14]
6110; GCN-HSA-NEXT:    v_mov_b32_e32 v4, v12
6111; GCN-HSA-NEXT:    v_mov_b32_e32 v13, v12
6112; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v14, 16, v1
6113; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
6114; GCN-HSA-NEXT:    v_and_b32_e32 v3, s4, v0
6115; GCN-HSA-NEXT:    v_and_b32_e32 v12, s4, v1
6116; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
6117; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
6118; GCN-HSA-NEXT:    v_and_b32_e32 v7, s4, v2
6119; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
6120; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[12:15]
6121; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[7:10]
6122; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[3:6]
6123; GCN-HSA-NEXT:    s_endpgm
6124;
6125; GCN-NOHSA-VI-LABEL: global_zextload_v8i16_to_v8i64:
6126; GCN-NOHSA-VI:       ; %bb.0:
6127; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
6128; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
6129; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
6130; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
6131; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
6132; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6133; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
6134; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
6135; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
6136; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, 0xffff
6137; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v17, 0
6138; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v19, 0
6139; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
6140; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
6141; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v7, 0
6142; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v11, 0
6143; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v15, 0
6144; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v13, v17
6145; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v9, v17
6146; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, v17
6147; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
6148; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v16, s6, v3
6149; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v18, 16, v3
6150; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v4, s6, v0
6151; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
6152; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v8, s6, v1
6153; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v10, 16, v1
6154; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v12, s6, v2
6155; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v14, 16, v2
6156; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48
6157; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32
6158; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16
6159; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
6160; GCN-NOHSA-VI-NEXT:    s_endpgm
6161;
6162; EG-LABEL: global_zextload_v8i16_to_v8i64:
6163; EG:       ; %bb.0:
6164; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
6165; EG-NEXT:    TEX 0 @8
6166; EG-NEXT:    ALU 30, @11, KC0[CB0:0-32], KC1[]
6167; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T14.X, 0
6168; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T13.X, 0
6169; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T12.X, 0
6170; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T11.X, 1
6171; EG-NEXT:    CF_END
6172; EG-NEXT:    Fetch clause starting at 8:
6173; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
6174; EG-NEXT:    ALU clause starting at 10:
6175; EG-NEXT:     MOV * T7.X, KC0[2].Z,
6176; EG-NEXT:    ALU clause starting at 11:
6177; EG-NEXT:     LSHR * T8.Z, T7.W, literal.x,
6178; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6179; EG-NEXT:     AND_INT T8.X, T7.W, literal.x,
6180; EG-NEXT:     MOV T8.Y, 0.0,
6181; EG-NEXT:     LSHR T9.Z, T7.Z, literal.y,
6182; EG-NEXT:     AND_INT * T9.X, T7.Z, literal.x,
6183; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6184; EG-NEXT:     MOV T9.Y, 0.0,
6185; EG-NEXT:     LSHR * T10.Z, T7.Y, literal.x,
6186; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6187; EG-NEXT:     AND_INT T10.X, T7.Y, literal.x,
6188; EG-NEXT:     MOV T10.Y, 0.0,
6189; EG-NEXT:     LSHR T7.Z, T7.X, literal.y,
6190; EG-NEXT:     AND_INT * T7.X, T7.X, literal.x,
6191; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6192; EG-NEXT:     MOV T7.Y, 0.0,
6193; EG-NEXT:     MOV T8.W, 0.0,
6194; EG-NEXT:     MOV * T9.W, 0.0,
6195; EG-NEXT:     MOV T10.W, 0.0,
6196; EG-NEXT:     MOV * T7.W, 0.0,
6197; EG-NEXT:     LSHR T11.X, KC0[2].Y, literal.x,
6198; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6199; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6200; EG-NEXT:     LSHR T12.X, PV.W, literal.x,
6201; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6202; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6203; EG-NEXT:     LSHR T13.X, PV.W, literal.x,
6204; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6205; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
6206; EG-NEXT:     LSHR * T14.X, PV.W, literal.x,
6207; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6208;
6209; CM-LABEL: global_zextload_v8i16_to_v8i64:
6210; CM:       ; %bb.0:
6211; CM-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
6212; CM-NEXT:    TEX 0 @8
6213; CM-NEXT:    ALU 32, @11, KC0[CB0:0-32], KC1[]
6214; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T8, T14.X
6215; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T9, T13.X
6216; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T10, T12.X
6217; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T7, T11.X
6218; CM-NEXT:    CF_END
6219; CM-NEXT:    Fetch clause starting at 8:
6220; CM-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
6221; CM-NEXT:    ALU clause starting at 10:
6222; CM-NEXT:     MOV * T7.X, KC0[2].Z,
6223; CM-NEXT:    ALU clause starting at 11:
6224; CM-NEXT:     LSHR * T8.Z, T7.X, literal.x,
6225; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6226; CM-NEXT:     AND_INT T8.X, T7.X, literal.x,
6227; CM-NEXT:     MOV T8.Y, 0.0,
6228; CM-NEXT:     LSHR * T9.Z, T7.Y, literal.y,
6229; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6230; CM-NEXT:     AND_INT T9.X, T7.Y, literal.x,
6231; CM-NEXT:     MOV T9.Y, 0.0,
6232; CM-NEXT:     LSHR * T10.Z, T7.Z, literal.y,
6233; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6234; CM-NEXT:     AND_INT T10.X, T7.Z, literal.x,
6235; CM-NEXT:     MOV T10.Y, 0.0,
6236; CM-NEXT:     LSHR * T7.Z, T7.W, literal.y,
6237; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6238; CM-NEXT:     AND_INT T7.X, T7.W, literal.x,
6239; CM-NEXT:     MOV T7.Y, 0.0,
6240; CM-NEXT:     MOV * T8.W, 0.0,
6241; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
6242; CM-NEXT:     MOV * T9.W, 0.0,
6243; CM-NEXT:     MOV * T10.W, 0.0,
6244; CM-NEXT:     MOV * T7.W, 0.0,
6245; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
6246; CM-NEXT:    48(6.726233e-44), 0(0.000000e+00)
6247; CM-NEXT:     LSHR T11.X, PV.W, literal.x,
6248; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6249; CM-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6250; CM-NEXT:     LSHR T12.X, PV.W, literal.x,
6251; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6252; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6253; CM-NEXT:     LSHR * T13.X, PV.W, literal.x,
6254; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6255; CM-NEXT:     LSHR * T14.X, KC0[2].Y, literal.x,
6256; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6257  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
6258  %ext = zext <8 x i16> %load to <8 x i64>
6259  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
6260  ret void
6261}
6262
6263define amdgpu_kernel void @global_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
6264; GCN-NOHSA-SI-LABEL: global_sextload_v8i16_to_v8i64:
6265; GCN-NOHSA-SI:       ; %bb.0:
6266; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
6267; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
6268; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
6269; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s2
6270; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s3
6271; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6272; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
6273; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
6274; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
6275; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
6276; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
6277; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
6278; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, v3
6279; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v7, 16, v2
6280; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
6281; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v4, v0, 0, 16
6282; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[10:11], v[0:1], 48
6283; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v12, v5, 0, 16
6284; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[14:15], v[2:3], 48
6285; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v8, v1, 0, 16
6286; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v2, 0, 16
6287; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
6288; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
6289; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v6, v6, 0, 16
6290; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v7, 0, 16
6291; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
6292; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v13, 31, v12
6293; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
6294; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
6295; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48
6296; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16
6297; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
6298; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
6299; GCN-NOHSA-SI-NEXT:    s_endpgm
6300;
6301; GCN-HSA-LABEL: global_sextload_v8i16_to_v8i64:
6302; GCN-HSA:       ; %bb.0:
6303; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
6304; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6305; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
6306; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
6307; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
6308; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
6309; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6310; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s3
6311; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s2
6312; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
6313; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6314; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s1
6315; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
6316; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s0
6317; GCN-HSA-NEXT:    s_add_u32 s0, s0, 32
6318; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
6319; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
6320; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s1
6321; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s0
6322; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
6323; GCN-HSA-NEXT:    v_bfe_i32 v4, v1, 0, 16
6324; GCN-HSA-NEXT:    v_ashr_i64 v[6:7], v[0:1], 48
6325; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
6326; GCN-HSA-NEXT:    v_mov_b32_e32 v11, v3
6327; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
6328; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
6329; GCN-HSA-NEXT:    v_bfe_i32 v8, v2, 0, 16
6330; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
6331; GCN-HSA-NEXT:    v_bfe_i32 v4, v0, 0, 16
6332; GCN-HSA-NEXT:    v_ashr_i64 v[2:3], v[2:3], 48
6333; GCN-HSA-NEXT:    v_bfe_i32 v0, v11, 0, 16
6334; GCN-HSA-NEXT:    v_bfe_i32 v6, v1, 0, 16
6335; GCN-HSA-NEXT:    v_bfe_i32 v10, v10, 0, 16
6336; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
6337; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
6338; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
6339; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
6340; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v11, 31, v10
6341; GCN-HSA-NEXT:    flat_store_dwordx4 v[14:15], v[0:3]
6342; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
6343; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[4:7]
6344; GCN-HSA-NEXT:    s_endpgm
6345;
6346; GCN-NOHSA-VI-LABEL: global_sextload_v8i16_to_v8i64:
6347; GCN-NOHSA-VI:       ; %bb.0:
6348; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
6349; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
6350; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
6351; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
6352; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
6353; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6354; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
6355; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
6356; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
6357; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
6358; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
6359; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
6360; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v11, v3
6361; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
6362; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
6363; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
6364; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v12, v11, 0, 16
6365; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v14, v3, 0, 16
6366; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
6367; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v8, v2, 0, 16
6368; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v4, v0, 0, 16
6369; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v1, 0, 16
6370; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v6, v5, 0, 16
6371; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v2, v7, 0, 16
6372; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v10, v10, 0, 16
6373; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v13, 31, v12
6374; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v15, 31, v14
6375; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
6376; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
6377; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
6378; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
6379; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
6380; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v11, 31, v10
6381; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48
6382; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
6383; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
6384; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
6385; GCN-NOHSA-VI-NEXT:    s_endpgm
6386;
6387; EG-LABEL: global_sextload_v8i16_to_v8i64:
6388; EG:       ; %bb.0:
6389; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
6390; EG-NEXT:    TEX 0 @8
6391; EG-NEXT:    ALU 33, @11, KC0[CB0:0-32], KC1[]
6392; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T7.X, 0
6393; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 0
6394; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T9.X, 0
6395; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T8.X, 1
6396; EG-NEXT:    CF_END
6397; EG-NEXT:    Fetch clause starting at 8:
6398; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
6399; EG-NEXT:    ALU clause starting at 10:
6400; EG-NEXT:     MOV * T7.X, KC0[2].Z,
6401; EG-NEXT:    ALU clause starting at 11:
6402; EG-NEXT:     LSHR T8.X, KC0[2].Y, literal.x,
6403; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6404; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6405; EG-NEXT:     LSHR T9.X, PV.W, literal.x,
6406; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.y,
6407; EG-NEXT:     ASHR * T10.W, T7.X, literal.z,
6408; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6409; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6410; EG-NEXT:     LSHR T11.X, PV.W, literal.x,
6411; EG-NEXT:     ASHR T10.Z, T7.X, literal.y,
6412; EG-NEXT:     ASHR * T12.W, T7.Y, literal.z,
6413; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6414; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6415; EG-NEXT:     BFE_INT T10.X, T7.X, 0.0, literal.x,
6416; EG-NEXT:     ASHR T12.Z, T7.Y, literal.x,
6417; EG-NEXT:     ASHR * T13.W, T7.Z, literal.y,
6418; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6419; EG-NEXT:     BFE_INT T12.X, T7.Y, 0.0, literal.x,
6420; EG-NEXT:     ASHR T10.Y, PV.X, literal.y,
6421; EG-NEXT:     ASHR T13.Z, T7.Z, literal.x,
6422; EG-NEXT:     ASHR * T14.W, T7.W, literal.y,
6423; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6424; EG-NEXT:     BFE_INT T13.X, T7.Z, 0.0, literal.x,
6425; EG-NEXT:     ASHR T12.Y, PV.X, literal.y,
6426; EG-NEXT:     ASHR * T14.Z, T7.W, literal.x,
6427; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6428; EG-NEXT:     BFE_INT T14.X, T7.W, 0.0, literal.x,
6429; EG-NEXT:     ASHR T13.Y, PV.X, literal.y,
6430; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
6431; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6432; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
6433; EG-NEXT:     LSHR T7.X, PV.W, literal.x,
6434; EG-NEXT:     ASHR * T14.Y, PV.X, literal.y,
6435; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
6436;
6437; CM-LABEL: global_sextload_v8i16_to_v8i64:
6438; CM:       ; %bb.0:
6439; CM-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
6440; CM-NEXT:    TEX 0 @8
6441; CM-NEXT:    ALU 33, @11, KC0[CB0:0-32], KC1[]
6442; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T7, T14.X
6443; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T13, T11.X
6444; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T12, T9.X
6445; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T10, T8.X
6446; CM-NEXT:    CF_END
6447; CM-NEXT:    Fetch clause starting at 8:
6448; CM-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
6449; CM-NEXT:    ALU clause starting at 10:
6450; CM-NEXT:     MOV * T7.X, KC0[2].Z,
6451; CM-NEXT:    ALU clause starting at 11:
6452; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
6453; CM-NEXT:    48(6.726233e-44), 0(0.000000e+00)
6454; CM-NEXT:     LSHR T8.X, PV.W, literal.x,
6455; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6456; CM-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6457; CM-NEXT:     LSHR T9.X, PV.W, literal.x,
6458; CM-NEXT:     ADD_INT T0.Z, KC0[2].Y, literal.y,
6459; CM-NEXT:     ASHR * T10.W, T7.W, literal.z,
6460; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6461; CM-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6462; CM-NEXT:     LSHR T11.X, PV.Z, literal.x,
6463; CM-NEXT:     ASHR T10.Z, T7.W, literal.y,
6464; CM-NEXT:     ASHR * T12.W, T7.Z, literal.z,
6465; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6466; CM-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6467; CM-NEXT:     BFE_INT T10.X, T7.W, 0.0, literal.x,
6468; CM-NEXT:     ASHR T12.Z, T7.Z, literal.x,
6469; CM-NEXT:     ASHR * T13.W, T7.Y, literal.y,
6470; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6471; CM-NEXT:     BFE_INT T12.X, T7.Z, 0.0, literal.x,
6472; CM-NEXT:     ASHR T10.Y, PV.X, literal.y,
6473; CM-NEXT:     ASHR T13.Z, T7.Y, literal.x,
6474; CM-NEXT:     ASHR * T7.W, T7.X, literal.y,
6475; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6476; CM-NEXT:     BFE_INT T13.X, T7.Y, 0.0, literal.x,
6477; CM-NEXT:     ASHR T12.Y, PV.X, literal.y,
6478; CM-NEXT:     ASHR * T7.Z, T7.X, literal.x,
6479; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6480; CM-NEXT:     BFE_INT T7.X, T7.X, 0.0, literal.x,
6481; CM-NEXT:     ASHR * T13.Y, PV.X, literal.y,
6482; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6483; CM-NEXT:     LSHR T14.X, KC0[2].Y, literal.x,
6484; CM-NEXT:     ASHR * T7.Y, PV.X, literal.y,
6485; CM-NEXT:    2(2.802597e-45), 31(4.344025e-44)
6486  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
6487  %ext = sext <8 x i16> %load to <8 x i64>
6488  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
6489  ret void
6490}
6491
6492define amdgpu_kernel void @global_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
6493; GCN-NOHSA-SI-LABEL: global_zextload_v16i16_to_v16i64:
6494; GCN-NOHSA-SI:       ; %bb.0:
6495; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
6496; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
6497; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
6498; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s2
6499; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s3
6500; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6501; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
6502; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
6503; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
6504; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, 0xffff
6505; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
6506; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(1)
6507; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v10, 16, v1
6508; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v14, 16, v2
6509; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v18, 16, v0
6510; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v16, s0, v0
6511; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v12, s0, v2
6512; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v8, s0, v1
6513; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
6514; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, s0, v3
6515; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v20, 0
6516; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
6517; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v21, 16, v5
6518; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v25, 16, v6
6519; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v23, s0, v6
6520; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v6, 16, v4
6521; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v4, s0, v4
6522; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v28, 16, v7
6523; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v26, s0, v7
6524; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v19, s0, v5
6525; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v22, v20
6526; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v27, v20
6527; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v29, v20
6528; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, v20
6529; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v20
6530; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, v20
6531; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, v20
6532; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, v20
6533; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v24, v20
6534; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, v20
6535; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, v20
6536; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
6537; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
6538; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:80
6539; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[26:29], off, s[0:3], 0 offset:112
6540; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(1)
6541; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, 0
6542; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, 0
6543; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6544; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v26, 0
6545; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, 0
6546; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
6547; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16
6548; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64
6549; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:96
6550; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32
6551; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0
6552; GCN-NOHSA-SI-NEXT:    s_endpgm
6553;
6554; GCN-HSA-LABEL: global_zextload_v16i16_to_v16i64:
6555; GCN-HSA:       ; %bb.0:
6556; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
6557; GCN-HSA-NEXT:    v_mov_b32_e32 v8, 0
6558; GCN-HSA-NEXT:    s_mov_b32 s6, 0xffff
6559; GCN-HSA-NEXT:    v_mov_b32_e32 v10, v8
6560; GCN-HSA-NEXT:    v_mov_b32_e32 v12, v8
6561; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6562; GCN-HSA-NEXT:    s_add_u32 s4, s2, 16
6563; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6564; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
6565; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
6566; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6567; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
6568; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
6569; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
6570; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
6571; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6572; GCN-HSA-NEXT:    s_add_u32 s4, s0, 16
6573; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6574; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s5
6575; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s4
6576; GCN-HSA-NEXT:    s_add_u32 s4, s0, 0x50
6577; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6578; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s3
6579; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s2
6580; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
6581; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v11, 16, v5
6582; GCN-HSA-NEXT:    v_and_b32_e32 v9, s6, v5
6583; GCN-HSA-NEXT:    flat_store_dwordx4 v[13:14], v[9:12]
6584; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s5
6585; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s4
6586; GCN-HSA-NEXT:    s_add_u32 s4, s0, 0x70
6587; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
6588; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
6589; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v11, 16, v1
6590; GCN-HSA-NEXT:    v_and_b32_e32 v9, s6, v1
6591; GCN-HSA-NEXT:    flat_store_dwordx4 v[13:14], v[9:12]
6592; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6593; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s5
6594; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s3
6595; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s4
6596; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v11, 16, v3
6597; GCN-HSA-NEXT:    v_and_b32_e32 v9, s6, v3
6598; GCN-HSA-NEXT:    flat_store_dwordx4 v[13:14], v[9:12]
6599; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s2
6600; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
6601; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
6602; GCN-HSA-NEXT:    v_and_b32_e32 v7, s6, v7
6603; GCN-HSA-NEXT:    v_mov_b32_e32 v14, 0
6604; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v13, 16, v6
6605; GCN-HSA-NEXT:    v_and_b32_e32 v11, s6, v6
6606; GCN-HSA-NEXT:    flat_store_dwordx4 v[15:16], v[7:10]
6607; GCN-HSA-NEXT:    flat_store_dwordx4 v[17:18], v[11:14]
6608; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6609; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s1
6610; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s0
6611; GCN-HSA-NEXT:    s_add_u32 s0, s0, 0x60
6612; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
6613; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s3
6614; GCN-HSA-NEXT:    v_mov_b32_e32 v1, v8
6615; GCN-HSA-NEXT:    v_mov_b32_e32 v7, v8
6616; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v14
6617; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v8, 16, v0
6618; GCN-HSA-NEXT:    v_and_b32_e32 v6, s6, v0
6619; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v13, 16, v2
6620; GCN-HSA-NEXT:    v_and_b32_e32 v11, s6, v2
6621; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v2, 16, v4
6622; GCN-HSA-NEXT:    v_and_b32_e32 v0, s6, v4
6623; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
6624; GCN-HSA-NEXT:    v_mov_b32_e32 v9, v14
6625; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s2
6626; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
6627; GCN-HSA-NEXT:    flat_store_dwordx4 v[15:16], v[0:3]
6628; GCN-HSA-NEXT:    flat_store_dwordx4 v[17:18], v[6:9]
6629; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[11:14]
6630; GCN-HSA-NEXT:    s_endpgm
6631;
6632; GCN-NOHSA-VI-LABEL: global_zextload_v16i16_to_v16i64:
6633; GCN-NOHSA-VI:       ; %bb.0:
6634; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
6635; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
6636; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
6637; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
6638; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
6639; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6640; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
6641; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
6642; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
6643; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
6644; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, 0xffff
6645; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v28, 0
6646; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v30, 0
6647; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
6648; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v25, v28
6649; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v21, v28
6650; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v17, v28
6651; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v13, v28
6652; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v9, v28
6653; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v11, 0
6654; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v15, 0
6655; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v19, 0
6656; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v23, 0
6657; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(1)
6658; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v8, s0, v0
6659; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v10, 16, v0
6660; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, s0, v2
6661; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v12, s0, v1
6662; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v16, s0, v3
6663; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v18, 16, v3
6664; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
6665; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v20, s0, v4
6666; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v24, s0, v5
6667; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v3, s0, v6
6668; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v27, s0, v7
6669; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v29, 16, v7
6670; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
6671; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v22, 16, v4
6672; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v26, 16, v5
6673; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v6
6674; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:112
6675; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, v28
6676; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v6, 0
6677; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v27, 0
6678; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v14, 16, v1
6679; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:96
6680; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
6681; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, v28
6682; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, 0
6683; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:80
6684; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64
6685; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48
6686; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
6687; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16
6688; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0
6689; GCN-NOHSA-VI-NEXT:    s_endpgm
6690;
6691; EG-LABEL: global_zextload_v16i16_to_v16i64:
6692; EG:       ; %bb.0:
6693; EG-NEXT:    ALU 0, @16, KC0[CB0:0-32], KC1[]
6694; EG-NEXT:    TEX 1 @12
6695; EG-NEXT:    ALU 62, @17, KC0[CB0:0-32], KC1[]
6696; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T26.X, 0
6697; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T25.X, 0
6698; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T24.X, 0
6699; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T23.X, 0
6700; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T22.X, 0
6701; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T21.X, 0
6702; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T20.X, 0
6703; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T19.X, 1
6704; EG-NEXT:    CF_END
6705; EG-NEXT:    Fetch clause starting at 12:
6706; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
6707; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
6708; EG-NEXT:    ALU clause starting at 16:
6709; EG-NEXT:     MOV * T11.X, KC0[2].Z,
6710; EG-NEXT:    ALU clause starting at 17:
6711; EG-NEXT:     LSHR * T13.Z, T12.W, literal.x,
6712; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6713; EG-NEXT:     AND_INT T13.X, T12.W, literal.x,
6714; EG-NEXT:     MOV T13.Y, 0.0,
6715; EG-NEXT:     LSHR T14.Z, T12.Z, literal.y,
6716; EG-NEXT:     AND_INT * T14.X, T12.Z, literal.x,
6717; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6718; EG-NEXT:     MOV T14.Y, 0.0,
6719; EG-NEXT:     LSHR * T15.Z, T12.Y, literal.x,
6720; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6721; EG-NEXT:     AND_INT T15.X, T12.Y, literal.x,
6722; EG-NEXT:     MOV T15.Y, 0.0,
6723; EG-NEXT:     LSHR T12.Z, T12.X, literal.y,
6724; EG-NEXT:     AND_INT * T12.X, T12.X, literal.x,
6725; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6726; EG-NEXT:     MOV T12.Y, 0.0,
6727; EG-NEXT:     LSHR * T16.Z, T11.W, literal.x,
6728; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6729; EG-NEXT:     AND_INT T16.X, T11.W, literal.x,
6730; EG-NEXT:     MOV T16.Y, 0.0,
6731; EG-NEXT:     LSHR T17.Z, T11.Z, literal.y,
6732; EG-NEXT:     AND_INT * T17.X, T11.Z, literal.x,
6733; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6734; EG-NEXT:     MOV T17.Y, 0.0,
6735; EG-NEXT:     LSHR * T18.Z, T11.Y, literal.x,
6736; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6737; EG-NEXT:     AND_INT T18.X, T11.Y, literal.x,
6738; EG-NEXT:     MOV T18.Y, 0.0,
6739; EG-NEXT:     LSHR T11.Z, T11.X, literal.y,
6740; EG-NEXT:     AND_INT * T11.X, T11.X, literal.x,
6741; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6742; EG-NEXT:     MOV T11.Y, 0.0,
6743; EG-NEXT:     MOV T13.W, 0.0,
6744; EG-NEXT:     MOV * T14.W, 0.0,
6745; EG-NEXT:     MOV T15.W, 0.0,
6746; EG-NEXT:     MOV * T12.W, 0.0,
6747; EG-NEXT:     MOV T16.W, 0.0,
6748; EG-NEXT:     MOV * T17.W, 0.0,
6749; EG-NEXT:     MOV T18.W, 0.0,
6750; EG-NEXT:     MOV * T11.W, 0.0,
6751; EG-NEXT:     LSHR T19.X, KC0[2].Y, literal.x,
6752; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6753; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6754; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
6755; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6756; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6757; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
6758; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6759; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
6760; EG-NEXT:     LSHR T22.X, PV.W, literal.x,
6761; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6762; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
6763; EG-NEXT:     LSHR T23.X, PV.W, literal.x,
6764; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6765; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
6766; EG-NEXT:     LSHR T24.X, PV.W, literal.x,
6767; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6768; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
6769; EG-NEXT:     LSHR T25.X, PV.W, literal.x,
6770; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6771; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
6772; EG-NEXT:     LSHR * T26.X, PV.W, literal.x,
6773; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6774;
6775; CM-LABEL: global_zextload_v16i16_to_v16i64:
6776; CM:       ; %bb.0:
6777; CM-NEXT:    ALU 0, @16, KC0[CB0:0-32], KC1[]
6778; CM-NEXT:    TEX 1 @12
6779; CM-NEXT:    ALU 64, @17, KC0[CB0:0-32], KC1[]
6780; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T13, T26.X
6781; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T14, T25.X
6782; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T15, T24.X
6783; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T12, T23.X
6784; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T16, T22.X
6785; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T17, T21.X
6786; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T18, T20.X
6787; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T11, T19.X
6788; CM-NEXT:    CF_END
6789; CM-NEXT:    Fetch clause starting at 12:
6790; CM-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 0, #1
6791; CM-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 16, #1
6792; CM-NEXT:    ALU clause starting at 16:
6793; CM-NEXT:     MOV * T11.X, KC0[2].Z,
6794; CM-NEXT:    ALU clause starting at 17:
6795; CM-NEXT:     LSHR * T13.Z, T12.X, literal.x,
6796; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6797; CM-NEXT:     AND_INT T13.X, T12.X, literal.x,
6798; CM-NEXT:     MOV T13.Y, 0.0,
6799; CM-NEXT:     LSHR * T14.Z, T12.Y, literal.y,
6800; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6801; CM-NEXT:     AND_INT T14.X, T12.Y, literal.x,
6802; CM-NEXT:     MOV T14.Y, 0.0,
6803; CM-NEXT:     LSHR * T15.Z, T12.Z, literal.y,
6804; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6805; CM-NEXT:     AND_INT T15.X, T12.Z, literal.x,
6806; CM-NEXT:     MOV T15.Y, 0.0,
6807; CM-NEXT:     LSHR * T12.Z, T12.W, literal.y,
6808; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6809; CM-NEXT:     AND_INT T12.X, T12.W, literal.x,
6810; CM-NEXT:     MOV T12.Y, 0.0,
6811; CM-NEXT:     LSHR * T16.Z, T11.X, literal.y,
6812; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6813; CM-NEXT:     AND_INT T16.X, T11.X, literal.x,
6814; CM-NEXT:     MOV T16.Y, 0.0,
6815; CM-NEXT:     LSHR * T17.Z, T11.Y, literal.y,
6816; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6817; CM-NEXT:     AND_INT T17.X, T11.Y, literal.x,
6818; CM-NEXT:     MOV T17.Y, 0.0,
6819; CM-NEXT:     LSHR * T18.Z, T11.Z, literal.y,
6820; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6821; CM-NEXT:     AND_INT T18.X, T11.Z, literal.x,
6822; CM-NEXT:     MOV T18.Y, 0.0,
6823; CM-NEXT:     LSHR * T11.Z, T11.W, literal.y,
6824; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6825; CM-NEXT:     AND_INT T11.X, T11.W, literal.x,
6826; CM-NEXT:     MOV T11.Y, 0.0,
6827; CM-NEXT:     MOV * T13.W, 0.0,
6828; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
6829; CM-NEXT:     MOV * T14.W, 0.0,
6830; CM-NEXT:     MOV * T15.W, 0.0,
6831; CM-NEXT:     MOV * T12.W, 0.0,
6832; CM-NEXT:     MOV * T16.W, 0.0,
6833; CM-NEXT:     MOV * T17.W, 0.0,
6834; CM-NEXT:     MOV * T18.W, 0.0,
6835; CM-NEXT:     MOV * T11.W, 0.0,
6836; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
6837; CM-NEXT:    112(1.569454e-43), 0(0.000000e+00)
6838; CM-NEXT:     LSHR T19.X, PV.W, literal.x,
6839; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6840; CM-NEXT:    2(2.802597e-45), 96(1.345247e-43)
6841; CM-NEXT:     LSHR T20.X, PV.W, literal.x,
6842; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6843; CM-NEXT:    2(2.802597e-45), 80(1.121039e-43)
6844; CM-NEXT:     LSHR T21.X, PV.W, literal.x,
6845; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6846; CM-NEXT:    2(2.802597e-45), 64(8.968310e-44)
6847; CM-NEXT:     LSHR T22.X, PV.W, literal.x,
6848; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6849; CM-NEXT:    2(2.802597e-45), 48(6.726233e-44)
6850; CM-NEXT:     LSHR T23.X, PV.W, literal.x,
6851; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6852; CM-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6853; CM-NEXT:     LSHR T24.X, PV.W, literal.x,
6854; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6855; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6856; CM-NEXT:     LSHR * T25.X, PV.W, literal.x,
6857; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6858; CM-NEXT:     LSHR * T26.X, KC0[2].Y, literal.x,
6859; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6860  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
6861  %ext = zext <16 x i16> %load to <16 x i64>
6862  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
6863  ret void
6864}
6865
6866define amdgpu_kernel void @global_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
6867; GCN-NOHSA-SI-LABEL: global_sextload_v16i16_to_v16i64:
6868; GCN-NOHSA-SI:       ; %bb.0:
6869; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
6870; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
6871; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
6872; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s2
6873; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s3
6874; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6875; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
6876; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
6877; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
6878; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
6879; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
6880; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
6881; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
6882; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, v7
6883; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, v3
6884; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v13, 16, v4
6885; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v14, 16, v2
6886; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v15, 16, v0
6887; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v8, v8, 0, 16
6888; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[10:11], v[6:7], 48
6889; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
6890; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
6891; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6892; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[9:10], v[4:5], 48
6893; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v7, v5, 0, 16
6894; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v8, 31, v7
6895; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:80
6896; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v5, v0, 0, 16
6897; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6898; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v7, v15, 0, 16
6899; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v8, v12, 0, 16
6900; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[10:11], v[2:3], 48
6901; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
6902; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:48
6903; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6904; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[11:12], v[0:1], 48
6905; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v9, v1, 0, 16
6906; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v2, 0, 16
6907; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v14, 0, 16
6908; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v15, v13, 0, 16
6909; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v13, v4, 0, 16
6910; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v6
6911; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v17, v6, 0, 16
6912; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v19, v1, 0, 16
6913; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
6914; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
6915; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v14, 31, v13
6916; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v18, 31, v17
6917; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v10, 31, v9
6918; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v8, 31, v7
6919; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
6920; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
6921; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v20, 31, v19
6922; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:16
6923; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:96
6924; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:64
6925; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
6926; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[5:8], off, s[0:3], 0
6927; GCN-NOHSA-SI-NEXT:    s_endpgm
6928;
6929; GCN-HSA-LABEL: global_sextload_v16i16_to_v16i64:
6930; GCN-HSA:       ; %bb.0:
6931; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
6932; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6933; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
6934; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
6935; GCN-HSA-NEXT:    s_add_u32 s2, s2, 16
6936; GCN-HSA-NEXT:    s_addc_u32 s3, s3, 0
6937; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6938; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6939; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
6940; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
6941; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
6942; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6943; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s3
6944; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s2
6945; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
6946; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6947; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s3
6948; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s2
6949; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
6950; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6951; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s3
6952; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s2
6953; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
6954; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6955; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s1
6956; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s0
6957; GCN-HSA-NEXT:    s_waitcnt vmcnt(1)
6958; GCN-HSA-NEXT:    v_bfe_i32 v8, v1, 0, 16
6959; GCN-HSA-NEXT:    v_ashr_i64 v[10:11], v[0:1], 48
6960; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
6961; GCN-HSA-NEXT:    v_mov_b32_e32 v1, v3
6962; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
6963; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s3
6964; GCN-HSA-NEXT:    v_bfe_i32 v8, v1, 0, 16
6965; GCN-HSA-NEXT:    v_ashr_i64 v[10:11], v[2:3], 48
6966; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s2
6967; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
6968; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
6969; GCN-HSA-NEXT:    flat_store_dwordx4 v[14:15], v[8:11]
6970; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6971; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
6972; GCN-HSA-NEXT:    v_bfe_i32 v8, v2, 0, 16
6973; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
6974; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s3
6975; GCN-HSA-NEXT:    v_bfe_i32 v2, v2, 0, 16
6976; GCN-HSA-NEXT:    v_bfe_i32 v0, v0, 0, 16
6977; GCN-HSA-NEXT:    v_bfe_i32 v10, v1, 0, 16
6978; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s2
6979; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
6980; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
6981; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
6982; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6983; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[0:3]
6984; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
6985; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v11, 31, v10
6986; GCN-HSA-NEXT:    s_waitcnt vmcnt(3)
6987; GCN-HSA-NEXT:    v_bfe_i32 v0, v5, 0, 16
6988; GCN-HSA-NEXT:    v_ashr_i64 v[2:3], v[4:5], 48
6989; GCN-HSA-NEXT:    flat_store_dwordx4 v[14:15], v[8:11]
6990; GCN-HSA-NEXT:    s_add_u32 s0, s0, 64
6991; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
6992; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
6993; GCN-HSA-NEXT:    v_mov_b32_e32 v11, v7
6994; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
6995; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[0:3]
6996; GCN-HSA-NEXT:    v_bfe_i32 v10, v8, 0, 16
6997; GCN-HSA-NEXT:    v_bfe_i32 v8, v6, 0, 16
6998; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
6999; GCN-HSA-NEXT:    v_bfe_i32 v0, v4, 0, 16
7000; GCN-HSA-NEXT:    v_bfe_i32 v4, v11, 0, 16
7001; GCN-HSA-NEXT:    v_ashr_i64 v[6:7], v[6:7], 48
7002; GCN-HSA-NEXT:    v_bfe_i32 v2, v1, 0, 16
7003; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s3
7004; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s1
7005; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
7006; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s2
7007; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s0
7008; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
7009; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
7010; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
7011; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v11, 31, v10
7012; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[4:7]
7013; GCN-HSA-NEXT:    flat_store_dwordx4 v[14:15], v[8:11]
7014; GCN-HSA-NEXT:    flat_store_dwordx4 v[20:21], v[0:3]
7015; GCN-HSA-NEXT:    s_endpgm
7016;
7017; GCN-NOHSA-VI-LABEL: global_sextload_v16i16_to_v16i64:
7018; GCN-NOHSA-VI:       ; %bb.0:
7019; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
7020; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
7021; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
7022; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
7023; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
7024; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
7025; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
7026; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
7027; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
7028; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
7029; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
7030; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
7031; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(1)
7032; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v8, v0, 0, 16
7033; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
7034; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v9, v7
7035; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
7036; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v9, v9, 0, 16
7037; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v11, v7, 0, 16
7038; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v10, 31, v9
7039; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
7040; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:112
7041; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
7042; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v9, v6, 0, 16
7043; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
7044; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v11, v6, 0, 16
7045; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v10, 31, v9
7046; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
7047; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:96
7048; GCN-NOHSA-VI-NEXT:    s_nop 0
7049; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v9, v5, 0, 16
7050; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
7051; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v11, v5, 0, 16
7052; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v10, 31, v9
7053; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
7054; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:80
7055; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
7056; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v12, v1, 0, 16
7057; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7058; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v14, v1, 0, 16
7059; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, v3
7060; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v16, v1, 0, 16
7061; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
7062; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
7063; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v6, v1, 0, 16
7064; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v4, v4, 0, 16
7065; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v10, v0, 0, 16
7066; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v0, v2, 0, 16
7067; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v2, v5, 0, 16
7068; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v18, v3, 0, 16
7069; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
7070; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
7071; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
7072; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v13, 31, v12
7073; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
7074; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v11, 31, v10
7075; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v15, 31, v14
7076; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
7077; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
7078; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v19, 31, v18
7079; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64
7080; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48
7081; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
7082; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16
7083; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0
7084; GCN-NOHSA-VI-NEXT:    s_endpgm
7085;
7086; EG-LABEL: global_sextload_v16i16_to_v16i64:
7087; EG:       ; %bb.0:
7088; EG-NEXT:    ALU 0, @16, KC0[CB0:0-32], KC1[]
7089; EG-NEXT:    TEX 1 @12
7090; EG-NEXT:    ALU 65, @17, KC0[CB0:0-32], KC1[]
7091; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T12.X, 0
7092; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T20.X, 0
7093; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T18.X, 0
7094; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T17.X, 0
7095; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T16.X, 0
7096; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T15.X, 0
7097; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T14.X, 0
7098; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T13.X, 1
7099; EG-NEXT:    CF_END
7100; EG-NEXT:    Fetch clause starting at 12:
7101; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
7102; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
7103; EG-NEXT:    ALU clause starting at 16:
7104; EG-NEXT:     MOV * T11.X, KC0[2].Z,
7105; EG-NEXT:    ALU clause starting at 17:
7106; EG-NEXT:     LSHR T13.X, KC0[2].Y, literal.x,
7107; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7108; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
7109; EG-NEXT:     LSHR T14.X, PV.W, literal.x,
7110; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7111; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
7112; EG-NEXT:     LSHR T15.X, PV.W, literal.x,
7113; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7114; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
7115; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
7116; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7117; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
7118; EG-NEXT:     LSHR T17.X, PV.W, literal.x,
7119; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7120; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
7121; EG-NEXT:     LSHR T18.X, PV.W, literal.x,
7122; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.y,
7123; EG-NEXT:     ASHR * T19.W, T11.X, literal.z,
7124; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
7125; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7126; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
7127; EG-NEXT:     ASHR T19.Z, T11.X, literal.y,
7128; EG-NEXT:     ASHR * T21.W, T11.Y, literal.z,
7129; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
7130; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7131; EG-NEXT:     BFE_INT T19.X, T11.X, 0.0, literal.x,
7132; EG-NEXT:     ASHR T21.Z, T11.Y, literal.x,
7133; EG-NEXT:     ASHR * T22.W, T11.Z, literal.y,
7134; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7135; EG-NEXT:     BFE_INT T21.X, T11.Y, 0.0, literal.x,
7136; EG-NEXT:     ASHR T19.Y, PV.X, literal.y,
7137; EG-NEXT:     ASHR T22.Z, T11.Z, literal.x,
7138; EG-NEXT:     ASHR * T23.W, T11.W, literal.y,
7139; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7140; EG-NEXT:     BFE_INT T22.X, T11.Z, 0.0, literal.x,
7141; EG-NEXT:     ASHR T21.Y, PV.X, literal.y,
7142; EG-NEXT:     ASHR T23.Z, T11.W, literal.x,
7143; EG-NEXT:     ASHR * T24.W, T12.X, literal.y,
7144; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7145; EG-NEXT:     BFE_INT T23.X, T11.W, 0.0, literal.x,
7146; EG-NEXT:     ASHR T22.Y, PV.X, literal.y,
7147; EG-NEXT:     ASHR T24.Z, T12.X, literal.x,
7148; EG-NEXT:     ASHR * T11.W, T12.Y, literal.y,
7149; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7150; EG-NEXT:     BFE_INT T24.X, T12.X, 0.0, literal.x,
7151; EG-NEXT:     ASHR T23.Y, PV.X, literal.y,
7152; EG-NEXT:     ASHR T11.Z, T12.Y, literal.x,
7153; EG-NEXT:     ASHR * T25.W, T12.Z, literal.y,
7154; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7155; EG-NEXT:     BFE_INT T11.X, T12.Y, 0.0, literal.x,
7156; EG-NEXT:     ASHR T24.Y, PV.X, literal.y,
7157; EG-NEXT:     ASHR T25.Z, T12.Z, literal.x,
7158; EG-NEXT:     ASHR * T26.W, T12.W, literal.y,
7159; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7160; EG-NEXT:     BFE_INT T25.X, T12.Z, 0.0, literal.x,
7161; EG-NEXT:     ASHR T11.Y, PV.X, literal.y,
7162; EG-NEXT:     ASHR * T26.Z, T12.W, literal.x,
7163; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7164; EG-NEXT:     BFE_INT T26.X, T12.W, 0.0, literal.x,
7165; EG-NEXT:     ASHR T25.Y, PV.X, literal.y,
7166; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
7167; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7168; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
7169; EG-NEXT:     LSHR T12.X, PV.W, literal.x,
7170; EG-NEXT:     ASHR * T26.Y, PV.X, literal.y,
7171; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
7172;
7173; CM-LABEL: global_sextload_v16i16_to_v16i64:
7174; CM:       ; %bb.0:
7175; CM-NEXT:    ALU 0, @16, KC0[CB0:0-32], KC1[]
7176; CM-NEXT:    TEX 1 @12
7177; CM-NEXT:    ALU 65, @17, KC0[CB0:0-32], KC1[]
7178; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T12, T26.X
7179; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T25, T20.X
7180; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T24, T18.X
7181; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T23, T17.X
7182; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T11, T16.X
7183; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T22, T15.X
7184; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T21, T14.X
7185; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T19, T13.X
7186; CM-NEXT:    CF_END
7187; CM-NEXT:    Fetch clause starting at 12:
7188; CM-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 0, #1
7189; CM-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 16, #1
7190; CM-NEXT:    ALU clause starting at 16:
7191; CM-NEXT:     MOV * T11.X, KC0[2].Z,
7192; CM-NEXT:    ALU clause starting at 17:
7193; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
7194; CM-NEXT:    112(1.569454e-43), 0(0.000000e+00)
7195; CM-NEXT:     LSHR T13.X, PV.W, literal.x,
7196; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7197; CM-NEXT:    2(2.802597e-45), 96(1.345247e-43)
7198; CM-NEXT:     LSHR T14.X, PV.W, literal.x,
7199; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7200; CM-NEXT:    2(2.802597e-45), 80(1.121039e-43)
7201; CM-NEXT:     LSHR T15.X, PV.W, literal.x,
7202; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7203; CM-NEXT:    2(2.802597e-45), 64(8.968310e-44)
7204; CM-NEXT:     LSHR T16.X, PV.W, literal.x,
7205; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7206; CM-NEXT:    2(2.802597e-45), 48(6.726233e-44)
7207; CM-NEXT:     LSHR T17.X, PV.W, literal.x,
7208; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7209; CM-NEXT:    2(2.802597e-45), 32(4.484155e-44)
7210; CM-NEXT:     LSHR T18.X, PV.W, literal.x,
7211; CM-NEXT:     ADD_INT T0.Z, KC0[2].Y, literal.y,
7212; CM-NEXT:     ASHR * T19.W, T11.W, literal.z,
7213; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
7214; CM-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7215; CM-NEXT:     LSHR T20.X, PV.Z, literal.x,
7216; CM-NEXT:     ASHR T19.Z, T11.W, literal.y,
7217; CM-NEXT:     ASHR * T21.W, T11.Z, literal.z,
7218; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
7219; CM-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7220; CM-NEXT:     BFE_INT T19.X, T11.W, 0.0, literal.x,
7221; CM-NEXT:     ASHR T21.Z, T11.Z, literal.x,
7222; CM-NEXT:     ASHR * T22.W, T11.Y, literal.y,
7223; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7224; CM-NEXT:     BFE_INT T21.X, T11.Z, 0.0, literal.x,
7225; CM-NEXT:     ASHR T19.Y, PV.X, literal.y,
7226; CM-NEXT:     ASHR T22.Z, T11.Y, literal.x,
7227; CM-NEXT:     ASHR * T11.W, T11.X, literal.y,
7228; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7229; CM-NEXT:     BFE_INT T22.X, T11.Y, 0.0, literal.x,
7230; CM-NEXT:     ASHR T21.Y, PV.X, literal.y,
7231; CM-NEXT:     ASHR T11.Z, T11.X, literal.x,
7232; CM-NEXT:     ASHR * T23.W, T12.W, literal.y,
7233; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7234; CM-NEXT:     BFE_INT T11.X, T11.X, 0.0, literal.x,
7235; CM-NEXT:     ASHR T22.Y, PV.X, literal.y,
7236; CM-NEXT:     ASHR T23.Z, T12.W, literal.x,
7237; CM-NEXT:     ASHR * T24.W, T12.Z, literal.y,
7238; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7239; CM-NEXT:     BFE_INT T23.X, T12.W, 0.0, literal.x,
7240; CM-NEXT:     ASHR T11.Y, PV.X, literal.y,
7241; CM-NEXT:     ASHR T24.Z, T12.Z, literal.x,
7242; CM-NEXT:     ASHR * T25.W, T12.Y, literal.y,
7243; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7244; CM-NEXT:     BFE_INT T24.X, T12.Z, 0.0, literal.x,
7245; CM-NEXT:     ASHR T23.Y, PV.X, literal.y,
7246; CM-NEXT:     ASHR T25.Z, T12.Y, literal.x,
7247; CM-NEXT:     ASHR * T12.W, T12.X, literal.y,
7248; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7249; CM-NEXT:     BFE_INT T25.X, T12.Y, 0.0, literal.x,
7250; CM-NEXT:     ASHR T24.Y, PV.X, literal.y,
7251; CM-NEXT:     ASHR * T12.Z, T12.X, literal.x,
7252; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7253; CM-NEXT:     BFE_INT T12.X, T12.X, 0.0, literal.x,
7254; CM-NEXT:     ASHR * T25.Y, PV.X, literal.y,
7255; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7256; CM-NEXT:     LSHR T26.X, KC0[2].Y, literal.x,
7257; CM-NEXT:     ASHR * T12.Y, PV.X, literal.y,
7258; CM-NEXT:    2(2.802597e-45), 31(4.344025e-44)
7259  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
7260  %ext = sext <16 x i16> %load to <16 x i64>
7261  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
7262  ret void
7263}
7264
7265define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
7266; GCN-NOHSA-SI-LABEL: global_zextload_v32i16_to_v32i64:
7267; GCN-NOHSA-SI:       ; %bb.0:
7268; GCN-NOHSA-SI-NEXT:    s_mov_b32 s12, SCRATCH_RSRC_DWORD0
7269; GCN-NOHSA-SI-NEXT:    s_mov_b32 s13, SCRATCH_RSRC_DWORD1
7270; GCN-NOHSA-SI-NEXT:    s_mov_b32 s14, -1
7271; GCN-NOHSA-SI-NEXT:    s_mov_b32 s15, 0xe8f000
7272; GCN-NOHSA-SI-NEXT:    s_add_u32 s12, s12, s3
7273; GCN-NOHSA-SI-NEXT:    s_addc_u32 s13, s13, 0
7274; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
7275; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
7276; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
7277; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, 0xffff
7278; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
7279; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s2
7280; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s3
7281; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
7282; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
7283; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
7284; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[2:5], off, s[8:11], 0
7285; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[6:9], off, s[8:11], 0 offset:16
7286; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[10:13], off, s[8:11], 0 offset:32
7287; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[14:17], off, s[8:11], 0 offset:48
7288; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(3)
7289; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v23, 16, v3
7290; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v21, 16, v4
7291; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v20, 16, v2
7292; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v18, s0, v2
7293; GCN-NOHSA-SI-NEXT:    buffer_store_dword v18, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
7294; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
7295; GCN-NOHSA-SI-NEXT:    buffer_store_dword v19, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill
7296; GCN-NOHSA-SI-NEXT:    buffer_store_dword v20, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill
7297; GCN-NOHSA-SI-NEXT:    buffer_store_dword v21, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill
7298; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(2)
7299; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v19, s0, v4
7300; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7301; GCN-NOHSA-SI-NEXT:    buffer_store_dword v19, off, s[12:15], 0 offset:20 ; 4-byte Folded Spill
7302; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
7303; GCN-NOHSA-SI-NEXT:    buffer_store_dword v20, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill
7304; GCN-NOHSA-SI-NEXT:    buffer_store_dword v21, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill
7305; GCN-NOHSA-SI-NEXT:    buffer_store_dword v22, off, s[12:15], 0 offset:32 ; 4-byte Folded Spill
7306; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v4, s0, v3
7307; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v32, 16, v5
7308; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v30, s0, v5
7309; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v36, 16, v6
7310; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v34, s0, v6
7311; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v28, 16, v8
7312; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v26, s0, v8
7313; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v40, 16, v7
7314; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v38, s0, v7
7315; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v44, 16, v9
7316; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v42, s0, v9
7317; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v48, 16, v10
7318; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v46, s0, v10
7319; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v24, 16, v12
7320; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7321; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v22, s0, v12
7322; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v52, 16, v11
7323; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v50, s0, v11
7324; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v56, 16, v13
7325; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v54, s0, v13
7326; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v17
7327; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v60, 16, v14
7328; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v58, s0, v14
7329; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v20, 16, v16
7330; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v18, s0, v16
7331; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v10, 16, v15
7332; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v8, s0, v15
7333; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, s0, v17
7334; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
7335; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, v1
7336; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, v1
7337; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v55, v1
7338; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v57, v1
7339; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v51, v1
7340; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v53, v1
7341; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v43, v1
7342; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v45, v1
7343; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v39, v1
7344; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v41, v1
7345; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v31, v1
7346; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v33, v1
7347; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, v23
7348; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, v1
7349; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, v1
7350; GCN-NOHSA-SI-NEXT:    buffer_store_dword v4, off, s[12:15], 0 offset:36 ; 4-byte Folded Spill
7351; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
7352; GCN-NOHSA-SI-NEXT:    buffer_store_dword v5, off, s[12:15], 0 offset:40 ; 4-byte Folded Spill
7353; GCN-NOHSA-SI-NEXT:    buffer_store_dword v6, off, s[12:15], 0 offset:44 ; 4-byte Folded Spill
7354; GCN-NOHSA-SI-NEXT:    buffer_store_dword v7, off, s[12:15], 0 offset:48 ; 4-byte Folded Spill
7355; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, v1
7356; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v59, v1
7357; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, v1
7358; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v47, v1
7359; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v27, v1
7360; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v35, v1
7361; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7362; GCN-NOHSA-SI-NEXT:    buffer_load_dword v4, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload
7363; GCN-NOHSA-SI-NEXT:    buffer_load_dword v5, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload
7364; GCN-NOHSA-SI-NEXT:    buffer_load_dword v6, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload
7365; GCN-NOHSA-SI-NEXT:    buffer_load_dword v7, off, s[12:15], 0 offset:32 ; 4-byte Folded Reload
7366; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
7367; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, v1
7368; GCN-NOHSA-SI-NEXT:    buffer_load_dword v12, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
7369; GCN-NOHSA-SI-NEXT:    buffer_load_dword v13, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
7370; GCN-NOHSA-SI-NEXT:    buffer_load_dword v14, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
7371; GCN-NOHSA-SI-NEXT:    buffer_load_dword v15, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload
7372; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
7373; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, v1
7374; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
7375; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
7376; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
7377; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, 0
7378; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v61, 0
7379; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v25, 0
7380; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7381; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, v12
7382; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, v13
7383; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, v14
7384; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, 0
7385; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, 0
7386; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v37, 0
7387; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v29, 0
7388; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v49, 0
7389; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
7390; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[54:57], off, s[0:3], 0 offset:176
7391; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[50:53], off, s[0:3], 0 offset:144
7392; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[42:45], off, s[0:3], 0 offset:112
7393; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[38:41], off, s[0:3], 0 offset:80
7394; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[30:33], off, s[0:3], 0 offset:48
7395; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(5)
7396; GCN-NOHSA-SI-NEXT:    buffer_load_dword v8, off, s[12:15], 0 offset:36 ; 4-byte Folded Reload
7397; GCN-NOHSA-SI-NEXT:    buffer_load_dword v9, off, s[12:15], 0 offset:40 ; 4-byte Folded Reload
7398; GCN-NOHSA-SI-NEXT:    buffer_load_dword v10, off, s[12:15], 0 offset:44 ; 4-byte Folded Reload
7399; GCN-NOHSA-SI-NEXT:    buffer_load_dword v11, off, s[12:15], 0 offset:48 ; 4-byte Folded Reload
7400; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
7401; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16
7402; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:224
7403; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[58:61], off, s[0:3], 0 offset:192
7404; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:160
7405; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[46:49], off, s[0:3], 0 offset:128
7406; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[26:29], off, s[0:3], 0 offset:96
7407; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[34:37], off, s[0:3], 0 offset:64
7408; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
7409; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
7410; GCN-NOHSA-SI-NEXT:    s_endpgm
7411;
7412; GCN-HSA-LABEL: global_zextload_v32i16_to_v32i64:
7413; GCN-HSA:       ; %bb.0:
7414; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
7415; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
7416; GCN-HSA-NEXT:    s_mov_b32 s16, 0xffff
7417; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
7418; GCN-HSA-NEXT:    v_mov_b32_e32 v5, v1
7419; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
7420; GCN-HSA-NEXT:    s_add_u32 s4, s2, 16
7421; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
7422; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s5
7423; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s4
7424; GCN-HSA-NEXT:    s_add_u32 s4, s2, 32
7425; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
7426; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s5
7427; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s4
7428; GCN-HSA-NEXT:    flat_load_dwordx4 v[6:9], v[6:7]
7429; GCN-HSA-NEXT:    flat_load_dwordx4 v[10:13], v[10:11]
7430; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s3
7431; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s2
7432; GCN-HSA-NEXT:    s_add_u32 s2, s2, 48
7433; GCN-HSA-NEXT:    s_addc_u32 s3, s3, 0
7434; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s3
7435; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s2
7436; GCN-HSA-NEXT:    flat_load_dwordx4 v[14:17], v[14:15]
7437; GCN-HSA-NEXT:    flat_load_dwordx4 v[18:21], v[18:19]
7438; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
7439; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
7440; GCN-HSA-NEXT:    s_add_u32 s4, s0, 16
7441; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
7442; GCN-HSA-NEXT:    s_add_u32 s6, s0, 0xf0
7443; GCN-HSA-NEXT:    s_addc_u32 s7, s1, 0
7444; GCN-HSA-NEXT:    s_add_u32 s8, s0, 0xd0
7445; GCN-HSA-NEXT:    s_addc_u32 s9, s1, 0
7446; GCN-HSA-NEXT:    s_add_u32 s10, s0, 0xb0
7447; GCN-HSA-NEXT:    s_addc_u32 s11, s1, 0
7448; GCN-HSA-NEXT:    s_add_u32 s12, s0, 0x90
7449; GCN-HSA-NEXT:    s_addc_u32 s13, s1, 0
7450; GCN-HSA-NEXT:    s_add_u32 s14, s0, 0x70
7451; GCN-HSA-NEXT:    s_addc_u32 s15, s1, 0
7452; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s15
7453; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s14
7454; GCN-HSA-NEXT:    s_add_u32 s14, s0, 0x50
7455; GCN-HSA-NEXT:    s_addc_u32 s15, s1, 0
7456; GCN-HSA-NEXT:    s_waitcnt vmcnt(3)
7457; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v4, 16, v9
7458; GCN-HSA-NEXT:    v_and_b32_e32 v2, s16, v9
7459; GCN-HSA-NEXT:    flat_store_dwordx4 v[22:23], v[2:5]
7460; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s15
7461; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v4, 16, v7
7462; GCN-HSA-NEXT:    v_and_b32_e32 v2, s16, v7
7463; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s14
7464; GCN-HSA-NEXT:    flat_store_dwordx4 v[22:23], v[2:5]
7465; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s11
7466; GCN-HSA-NEXT:    s_waitcnt vmcnt(4)
7467; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v4, 16, v13
7468; GCN-HSA-NEXT:    v_and_b32_e32 v2, s16, v13
7469; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s10
7470; GCN-HSA-NEXT:    flat_store_dwordx4 v[22:23], v[2:5]
7471; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s13
7472; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v4, 16, v11
7473; GCN-HSA-NEXT:    v_and_b32_e32 v2, s16, v11
7474; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s12
7475; GCN-HSA-NEXT:    flat_store_dwordx4 v[22:23], v[2:5]
7476; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s5
7477; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s4
7478; GCN-HSA-NEXT:    s_waitcnt vmcnt(4)
7479; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v4, 16, v19
7480; GCN-HSA-NEXT:    v_and_b32_e32 v2, s16, v19
7481; GCN-HSA-NEXT:    flat_store_dwordx4 v[22:23], v[2:5]
7482; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s7
7483; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v4, 16, v17
7484; GCN-HSA-NEXT:    v_and_b32_e32 v2, s16, v17
7485; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s6
7486; GCN-HSA-NEXT:    flat_store_dwordx4 v[22:23], v[2:5]
7487; GCN-HSA-NEXT:    s_add_u32 s4, s0, 32
7488; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s9
7489; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
7490; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v4, 16, v15
7491; GCN-HSA-NEXT:    v_and_b32_e32 v2, s16, v15
7492; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s8
7493; GCN-HSA-NEXT:    flat_store_dwordx4 v[22:23], v[2:5]
7494; GCN-HSA-NEXT:    s_add_u32 s6, s0, 0xe0
7495; GCN-HSA-NEXT:    v_mov_b32_e32 v7, 0
7496; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v4, 16, v18
7497; GCN-HSA-NEXT:    v_and_b32_e32 v2, s16, v18
7498; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s1
7499; GCN-HSA-NEXT:    v_mov_b32_e32 v5, v7
7500; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s0
7501; GCN-HSA-NEXT:    s_addc_u32 s7, s1, 0
7502; GCN-HSA-NEXT:    flat_store_dwordx4 v[17:18], v[2:5]
7503; GCN-HSA-NEXT:    v_and_b32_e32 v0, s16, v21
7504; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v4, 16, v16
7505; GCN-HSA-NEXT:    v_and_b32_e32 v2, s16, v16
7506; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s7
7507; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s6
7508; GCN-HSA-NEXT:    s_add_u32 s6, s0, 0xc0
7509; GCN-HSA-NEXT:    s_addc_u32 s7, s1, 0
7510; GCN-HSA-NEXT:    flat_store_dwordx4 v[15:16], v[2:5]
7511; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s3
7512; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v4, 16, v14
7513; GCN-HSA-NEXT:    v_and_b32_e32 v2, s16, v14
7514; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s7
7515; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s6
7516; GCN-HSA-NEXT:    s_add_u32 s6, s0, 0xa0
7517; GCN-HSA-NEXT:    s_addc_u32 s7, s1, 0
7518; GCN-HSA-NEXT:    flat_store_dwordx4 v[13:14], v[2:5]
7519; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s2
7520; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v4, 16, v12
7521; GCN-HSA-NEXT:    v_and_b32_e32 v2, s16, v12
7522; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s7
7523; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s6
7524; GCN-HSA-NEXT:    s_add_u32 s6, s0, 0x80
7525; GCN-HSA-NEXT:    s_addc_u32 s7, s1, 0
7526; GCN-HSA-NEXT:    flat_store_dwordx4 v[11:12], v[2:5]
7527; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
7528; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v4, 16, v10
7529; GCN-HSA-NEXT:    v_and_b32_e32 v2, s16, v10
7530; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s7
7531; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s6
7532; GCN-HSA-NEXT:    flat_store_dwordx4 v[9:10], v[2:5]
7533; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
7534; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v2, 16, v21
7535; GCN-HSA-NEXT:    flat_store_dwordx4 v[15:16], v[0:3]
7536; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v11, 16, v6
7537; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s4
7538; GCN-HSA-NEXT:    v_and_b32_e32 v9, s16, v6
7539; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v6, 16, v20
7540; GCN-HSA-NEXT:    v_and_b32_e32 v4, s16, v20
7541; GCN-HSA-NEXT:    v_mov_b32_e32 v5, v1
7542; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s5
7543; GCN-HSA-NEXT:    s_add_u32 s0, s0, 64
7544; GCN-HSA-NEXT:    flat_store_dwordx4 v[2:3], v[4:7]
7545; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
7546; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v14, 16, v8
7547; GCN-HSA-NEXT:    v_and_b32_e32 v12, s16, v8
7548; GCN-HSA-NEXT:    v_mov_b32_e32 v13, v1
7549; GCN-HSA-NEXT:    v_mov_b32_e32 v15, v7
7550; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
7551; GCN-HSA-NEXT:    v_mov_b32_e32 v10, v1
7552; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
7553; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
7554; GCN-HSA-NEXT:    flat_store_dwordx4 v[2:3], v[12:15]
7555; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
7556; GCN-HSA-NEXT:    v_mov_b32_e32 v12, v7
7557; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[9:12]
7558; GCN-HSA-NEXT:    s_endpgm
7559;
7560; GCN-NOHSA-VI-LABEL: global_zextload_v32i16_to_v32i64:
7561; GCN-NOHSA-VI:       ; %bb.0:
7562; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
7563; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
7564; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
7565; GCN-NOHSA-VI-NEXT:    s_mov_b32 s10, s2
7566; GCN-NOHSA-VI-NEXT:    s_mov_b32 s11, s3
7567; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
7568; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s6
7569; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s7
7570; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
7571; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
7572; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[31:34], off, s[8:11], 0 offset:32
7573; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[35:38], off, s[8:11], 0 offset:48
7574; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, 0xffff
7575; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v57, 0
7576; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
7577; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v11, 0
7578; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v15, 0
7579; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v27, 0
7580; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v23, 0
7581; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v19, 0
7582; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(3)
7583; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v8, s0, v1
7584; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v10, 16, v1
7585; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
7586; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v48, s0, v36
7587; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v50, 16, v36
7588; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v54, s0, v38
7589; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v56, 16, v38
7590; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v36, s0, v37
7591; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v38, 16, v37
7592; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v37, 0
7593; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v1, s0, v3
7594; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v12, s0, v0
7595; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v16, s0, v2
7596; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v20, s0, v5
7597; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v24, s0, v4
7598; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v26, 16, v4
7599; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v4, s0, v6
7600; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v28, s0, v7
7601; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v39, s0, v32
7602; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v41, 16, v32
7603; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v32, s0, v34
7604; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v42, s0, v31
7605; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v45, s0, v33
7606; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v51, s0, v35
7607; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v55, v37
7608; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
7609; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v53, 16, v35
7610; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[54:57], off, s[0:3], 0 offset:240
7611; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v52, v37
7612; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v54, 0
7613; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[51:54], off, s[0:3], 0 offset:192
7614; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v49, v37
7615; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v51, 0
7616; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v47, 16, v33
7617; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:208
7618; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v46, v37
7619; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v48, 0
7620; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v22, 16, v5
7621; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v30, 16, v7
7622; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v44, 16, v31
7623; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[45:48], off, s[0:3], 0 offset:160
7624; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
7625; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v45, 0
7626; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v43, v37
7627; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v7, 0
7628; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, v37
7629; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v18, 16, v2
7630; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[42:45], off, s[0:3], 0 offset:128
7631; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:96
7632; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v14, 16, v0
7633; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
7634; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v34, 16, v34
7635; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v35, 0
7636; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v33, v37
7637; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v42, 0
7638; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v40, v37
7639; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v31, 0
7640; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, 0
7641; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v29, v37
7642; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, v37
7643; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v13, v37
7644; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v9, v37
7645; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[39:42], off, s[0:3], 0 offset:144
7646; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:176
7647; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v39, 0
7648; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v25, v37
7649; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v21, v37
7650; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v17, v37
7651; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:112
7652; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:64
7653; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:80
7654; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[36:39], off, s[0:3], 0 offset:224
7655; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32
7656; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[1:4], off, s[0:3], 0 offset:48
7657; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0
7658; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16
7659; GCN-NOHSA-VI-NEXT:    s_endpgm
7660;
7661; EG-LABEL: global_zextload_v32i16_to_v32i64:
7662; EG:       ; %bb.0:
7663; EG-NEXT:    ALU 0, @30, KC0[CB0:0-32], KC1[]
7664; EG-NEXT:    TEX 2 @22
7665; EG-NEXT:    ALU 33, @31, KC0[], KC1[]
7666; EG-NEXT:    TEX 0 @28
7667; EG-NEXT:    ALU 93, @65, KC0[CB0:0-32], KC1[]
7668; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T50.X, 0
7669; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T49.X, 0
7670; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T48.X, 0
7671; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T47.X, 0
7672; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T46.X, 0
7673; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T45.X, 0
7674; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T44.X, 0
7675; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T43.X, 0
7676; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T42.X, 0
7677; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T41.X, 0
7678; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T40.X, 0
7679; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T39.X, 0
7680; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T38.X, 0
7681; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T37.X, 0
7682; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T36.X, 0
7683; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T35.X, 1
7684; EG-NEXT:    CF_END
7685; EG-NEXT:    Fetch clause starting at 22:
7686; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 48, #1
7687; EG-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 16, #1
7688; EG-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 32, #1
7689; EG-NEXT:    Fetch clause starting at 28:
7690; EG-NEXT:     VTX_READ_128 T29.XYZW, T19.X, 0, #1
7691; EG-NEXT:    ALU clause starting at 30:
7692; EG-NEXT:     MOV * T19.X, KC0[2].Z,
7693; EG-NEXT:    ALU clause starting at 31:
7694; EG-NEXT:     LSHR * T23.Z, T20.Z, literal.x,
7695; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7696; EG-NEXT:     AND_INT T23.X, T20.Z, literal.x,
7697; EG-NEXT:     MOV T23.Y, 0.0,
7698; EG-NEXT:     LSHR T24.Z, T20.W, literal.y,
7699; EG-NEXT:     AND_INT * T24.X, T20.W, literal.x,
7700; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7701; EG-NEXT:     MOV T24.Y, 0.0,
7702; EG-NEXT:     LSHR * T25.Z, T20.X, literal.x,
7703; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7704; EG-NEXT:     AND_INT T25.X, T20.X, literal.x,
7705; EG-NEXT:     MOV T25.Y, 0.0,
7706; EG-NEXT:     LSHR T20.Z, T20.Y, literal.y,
7707; EG-NEXT:     AND_INT * T20.X, T20.Y, literal.x,
7708; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7709; EG-NEXT:     MOV T20.Y, 0.0,
7710; EG-NEXT:     LSHR * T26.Z, T22.Z, literal.x,
7711; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7712; EG-NEXT:     AND_INT T26.X, T22.Z, literal.x,
7713; EG-NEXT:     MOV T26.Y, 0.0,
7714; EG-NEXT:     LSHR T27.Z, T22.W, literal.y,
7715; EG-NEXT:     AND_INT * T27.X, T22.W, literal.x,
7716; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7717; EG-NEXT:     MOV T27.Y, 0.0,
7718; EG-NEXT:     LSHR * T28.Z, T22.X, literal.x,
7719; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7720; EG-NEXT:     AND_INT T28.X, T22.X, literal.x,
7721; EG-NEXT:     MOV T28.Y, 0.0,
7722; EG-NEXT:     LSHR T22.Z, T22.Y, literal.y,
7723; EG-NEXT:     AND_INT * T22.X, T22.Y, literal.x,
7724; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7725; EG-NEXT:     MOV T22.Y, 0.0,
7726; EG-NEXT:     LSHR * T19.Z, T21.Z, literal.x,
7727; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7728; EG-NEXT:    ALU clause starting at 65:
7729; EG-NEXT:     AND_INT T19.X, T21.Z, literal.x,
7730; EG-NEXT:     MOV T19.Y, 0.0,
7731; EG-NEXT:     LSHR T30.Z, T21.W, literal.y,
7732; EG-NEXT:     AND_INT * T30.X, T21.W, literal.x,
7733; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7734; EG-NEXT:     MOV T30.Y, 0.0,
7735; EG-NEXT:     LSHR * T31.Z, T21.X, literal.x,
7736; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7737; EG-NEXT:     AND_INT T31.X, T21.X, literal.x,
7738; EG-NEXT:     MOV T31.Y, 0.0,
7739; EG-NEXT:     LSHR T21.Z, T21.Y, literal.y,
7740; EG-NEXT:     AND_INT * T21.X, T21.Y, literal.x,
7741; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7742; EG-NEXT:     MOV T21.Y, 0.0,
7743; EG-NEXT:     LSHR * T32.Z, T29.Z, literal.x,
7744; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7745; EG-NEXT:     AND_INT T32.X, T29.Z, literal.x,
7746; EG-NEXT:     MOV T32.Y, 0.0,
7747; EG-NEXT:     LSHR T33.Z, T29.W, literal.y,
7748; EG-NEXT:     AND_INT * T33.X, T29.W, literal.x,
7749; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7750; EG-NEXT:     MOV T33.Y, 0.0,
7751; EG-NEXT:     LSHR * T34.Z, T29.X, literal.x,
7752; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7753; EG-NEXT:     AND_INT T34.X, T29.X, literal.x,
7754; EG-NEXT:     MOV T34.Y, 0.0,
7755; EG-NEXT:     LSHR T29.Z, T29.Y, literal.y,
7756; EG-NEXT:     AND_INT * T29.X, T29.Y, literal.x,
7757; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7758; EG-NEXT:     MOV T29.Y, 0.0,
7759; EG-NEXT:     MOV T23.W, 0.0,
7760; EG-NEXT:     MOV * T24.W, 0.0,
7761; EG-NEXT:     MOV T25.W, 0.0,
7762; EG-NEXT:     MOV * T20.W, 0.0,
7763; EG-NEXT:     MOV T26.W, 0.0,
7764; EG-NEXT:     MOV * T27.W, 0.0,
7765; EG-NEXT:     MOV T28.W, 0.0,
7766; EG-NEXT:     MOV * T22.W, 0.0,
7767; EG-NEXT:     MOV T19.W, 0.0,
7768; EG-NEXT:     MOV * T30.W, 0.0,
7769; EG-NEXT:     MOV T31.W, 0.0,
7770; EG-NEXT:     MOV * T21.W, 0.0,
7771; EG-NEXT:     MOV T32.W, 0.0,
7772; EG-NEXT:     MOV * T33.W, 0.0,
7773; EG-NEXT:     MOV T34.W, 0.0,
7774; EG-NEXT:     MOV * T29.W, 0.0,
7775; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
7776; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7777; EG-NEXT:     LSHR T35.X, PV.W, literal.x,
7778; EG-NEXT:     LSHR * T36.X, KC0[2].Y, literal.x,
7779; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
7780; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
7781; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
7782; EG-NEXT:     LSHR T37.X, PV.W, literal.x,
7783; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7784; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
7785; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
7786; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7787; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
7788; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
7789; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7790; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
7791; EG-NEXT:     LSHR T40.X, PV.W, literal.x,
7792; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7793; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
7794; EG-NEXT:     LSHR T41.X, PV.W, literal.x,
7795; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7796; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
7797; EG-NEXT:     LSHR T42.X, PV.W, literal.x,
7798; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7799; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
7800; EG-NEXT:     LSHR T43.X, PV.W, literal.x,
7801; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7802; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
7803; EG-NEXT:     LSHR T44.X, PV.W, literal.x,
7804; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7805; EG-NEXT:    2(2.802597e-45), 176(2.466285e-43)
7806; EG-NEXT:     LSHR T45.X, PV.W, literal.x,
7807; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7808; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
7809; EG-NEXT:     LSHR T46.X, PV.W, literal.x,
7810; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7811; EG-NEXT:    2(2.802597e-45), 208(2.914701e-43)
7812; EG-NEXT:     LSHR T47.X, PV.W, literal.x,
7813; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7814; EG-NEXT:    2(2.802597e-45), 192(2.690493e-43)
7815; EG-NEXT:     LSHR T48.X, PV.W, literal.x,
7816; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7817; EG-NEXT:    2(2.802597e-45), 240(3.363116e-43)
7818; EG-NEXT:     LSHR T49.X, PV.W, literal.x,
7819; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7820; EG-NEXT:    2(2.802597e-45), 224(3.138909e-43)
7821; EG-NEXT:     LSHR * T50.X, PV.W, literal.x,
7822; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
7823;
7824; CM-LABEL: global_zextload_v32i16_to_v32i64:
7825; CM:       ; %bb.0:
7826; CM-NEXT:    ALU 0, @30, KC0[CB0:0-32], KC1[]
7827; CM-NEXT:    TEX 2 @22
7828; CM-NEXT:    ALU 33, @31, KC0[], KC1[]
7829; CM-NEXT:    TEX 0 @28
7830; CM-NEXT:    ALU 94, @65, KC0[CB0:0-32], KC1[]
7831; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T23, T50.X
7832; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T24, T49.X
7833; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T25, T48.X
7834; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T26, T47.X
7835; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T20, T46.X
7836; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T27, T45.X
7837; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T28, T44.X
7838; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T29, T43.X
7839; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T19, T42.X
7840; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T30, T41.X
7841; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T31, T40.X
7842; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T32, T39.X
7843; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T21, T38.X
7844; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T33, T37.X
7845; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T34, T36.X
7846; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T35, T22.X
7847; CM-NEXT:    CF_END
7848; CM-NEXT:    Fetch clause starting at 22:
7849; CM-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 0, #1
7850; CM-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 32, #1
7851; CM-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 16, #1
7852; CM-NEXT:    Fetch clause starting at 28:
7853; CM-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 48, #1
7854; CM-NEXT:    ALU clause starting at 30:
7855; CM-NEXT:     MOV * T19.X, KC0[2].Z,
7856; CM-NEXT:    ALU clause starting at 31:
7857; CM-NEXT:     LSHR * T23.Z, T20.Y, literal.x,
7858; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7859; CM-NEXT:     AND_INT T23.X, T20.Y, literal.x,
7860; CM-NEXT:     MOV T23.Y, 0.0,
7861; CM-NEXT:     LSHR * T24.Z, T20.X, literal.y,
7862; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7863; CM-NEXT:     AND_INT T24.X, T20.X, literal.x,
7864; CM-NEXT:     MOV T24.Y, 0.0,
7865; CM-NEXT:     LSHR * T25.Z, T20.W, literal.y,
7866; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7867; CM-NEXT:     AND_INT T25.X, T20.W, literal.x,
7868; CM-NEXT:     MOV T25.Y, 0.0,
7869; CM-NEXT:     LSHR * T26.Z, T20.Z, literal.y,
7870; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7871; CM-NEXT:     AND_INT T26.X, T20.Z, literal.x,
7872; CM-NEXT:     MOV T26.Y, 0.0,
7873; CM-NEXT:     LSHR * T20.Z, T22.Y, literal.y,
7874; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7875; CM-NEXT:     AND_INT T20.X, T22.Y, literal.x,
7876; CM-NEXT:     MOV T20.Y, 0.0,
7877; CM-NEXT:     LSHR * T27.Z, T22.X, literal.y,
7878; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7879; CM-NEXT:     AND_INT T27.X, T22.X, literal.x,
7880; CM-NEXT:     MOV T27.Y, 0.0,
7881; CM-NEXT:     LSHR * T28.Z, T22.W, literal.y,
7882; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7883; CM-NEXT:     AND_INT T28.X, T22.W, literal.x,
7884; CM-NEXT:     MOV T28.Y, 0.0,
7885; CM-NEXT:     LSHR * T29.Z, T22.Z, literal.y,
7886; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7887; CM-NEXT:     AND_INT T29.X, T22.Z, literal.x,
7888; CM-NEXT:     MOV T29.Y, 0.0,
7889; CM-NEXT:     LSHR * T19.Z, T21.Y, literal.y,
7890; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7891; CM-NEXT:    ALU clause starting at 65:
7892; CM-NEXT:     AND_INT T19.X, T21.Y, literal.x,
7893; CM-NEXT:     MOV T19.Y, 0.0,
7894; CM-NEXT:     LSHR * T30.Z, T21.X, literal.y,
7895; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7896; CM-NEXT:     AND_INT T30.X, T21.X, literal.x,
7897; CM-NEXT:     MOV T30.Y, 0.0,
7898; CM-NEXT:     LSHR * T31.Z, T21.W, literal.y,
7899; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7900; CM-NEXT:     AND_INT T31.X, T21.W, literal.x,
7901; CM-NEXT:     MOV T31.Y, 0.0,
7902; CM-NEXT:     LSHR * T32.Z, T21.Z, literal.y,
7903; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7904; CM-NEXT:     AND_INT T32.X, T21.Z, literal.x,
7905; CM-NEXT:     MOV T32.Y, 0.0,
7906; CM-NEXT:     LSHR * T21.Z, T22.Y, literal.y,
7907; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7908; CM-NEXT:     AND_INT T21.X, T22.Y, literal.x,
7909; CM-NEXT:     MOV T21.Y, 0.0,
7910; CM-NEXT:     LSHR * T33.Z, T22.X, literal.y,
7911; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7912; CM-NEXT:     AND_INT T33.X, T22.X, literal.x,
7913; CM-NEXT:     MOV T33.Y, 0.0,
7914; CM-NEXT:     LSHR * T34.Z, T22.W, literal.y,
7915; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7916; CM-NEXT:     AND_INT T34.X, T22.W, literal.x,
7917; CM-NEXT:     MOV T34.Y, 0.0,
7918; CM-NEXT:     LSHR * T35.Z, T22.Z, literal.y,
7919; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7920; CM-NEXT:     AND_INT T35.X, T22.Z, literal.x,
7921; CM-NEXT:     MOV T35.Y, 0.0,
7922; CM-NEXT:     MOV * T23.W, 0.0,
7923; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
7924; CM-NEXT:     MOV * T24.W, 0.0,
7925; CM-NEXT:     MOV * T25.W, 0.0,
7926; CM-NEXT:     MOV * T26.W, 0.0,
7927; CM-NEXT:     MOV * T20.W, 0.0,
7928; CM-NEXT:     MOV * T27.W, 0.0,
7929; CM-NEXT:     MOV * T28.W, 0.0,
7930; CM-NEXT:     MOV * T29.W, 0.0,
7931; CM-NEXT:     MOV * T19.W, 0.0,
7932; CM-NEXT:     MOV * T30.W, 0.0,
7933; CM-NEXT:     MOV * T31.W, 0.0,
7934; CM-NEXT:     MOV * T32.W, 0.0,
7935; CM-NEXT:     MOV * T21.W, 0.0,
7936; CM-NEXT:     MOV * T33.W, 0.0,
7937; CM-NEXT:     MOV * T34.W, 0.0,
7938; CM-NEXT:     MOV * T35.W, 0.0,
7939; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
7940; CM-NEXT:    224(3.138909e-43), 0(0.000000e+00)
7941; CM-NEXT:     LSHR T22.X, PV.W, literal.x,
7942; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7943; CM-NEXT:    2(2.802597e-45), 240(3.363116e-43)
7944; CM-NEXT:     LSHR T36.X, PV.W, literal.x,
7945; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7946; CM-NEXT:    2(2.802597e-45), 192(2.690493e-43)
7947; CM-NEXT:     LSHR T37.X, PV.W, literal.x,
7948; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7949; CM-NEXT:    2(2.802597e-45), 208(2.914701e-43)
7950; CM-NEXT:     LSHR T38.X, PV.W, literal.x,
7951; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7952; CM-NEXT:    2(2.802597e-45), 160(2.242078e-43)
7953; CM-NEXT:     LSHR T39.X, PV.W, literal.x,
7954; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7955; CM-NEXT:    2(2.802597e-45), 176(2.466285e-43)
7956; CM-NEXT:     LSHR T40.X, PV.W, literal.x,
7957; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7958; CM-NEXT:    2(2.802597e-45), 128(1.793662e-43)
7959; CM-NEXT:     LSHR T41.X, PV.W, literal.x,
7960; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7961; CM-NEXT:    2(2.802597e-45), 144(2.017870e-43)
7962; CM-NEXT:     LSHR T42.X, PV.W, literal.x,
7963; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7964; CM-NEXT:    2(2.802597e-45), 96(1.345247e-43)
7965; CM-NEXT:     LSHR T43.X, PV.W, literal.x,
7966; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7967; CM-NEXT:    2(2.802597e-45), 112(1.569454e-43)
7968; CM-NEXT:     LSHR T44.X, PV.W, literal.x,
7969; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7970; CM-NEXT:    2(2.802597e-45), 64(8.968310e-44)
7971; CM-NEXT:     LSHR T45.X, PV.W, literal.x,
7972; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7973; CM-NEXT:    2(2.802597e-45), 80(1.121039e-43)
7974; CM-NEXT:     LSHR T46.X, PV.W, literal.x,
7975; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7976; CM-NEXT:    2(2.802597e-45), 32(4.484155e-44)
7977; CM-NEXT:     LSHR T47.X, PV.W, literal.x,
7978; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7979; CM-NEXT:    2(2.802597e-45), 48(6.726233e-44)
7980; CM-NEXT:     LSHR * T48.X, PV.W, literal.x,
7981; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
7982; CM-NEXT:     LSHR T49.X, KC0[2].Y, literal.x,
7983; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7984; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
7985; CM-NEXT:     LSHR * T50.X, PV.W, literal.x,
7986; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
7987  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
7988  %ext = zext <32 x i16> %load to <32 x i64>
7989  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
7990  ret void
7991}
7992
7993define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
7994; GCN-NOHSA-SI-LABEL: global_sextload_v32i16_to_v32i64:
7995; GCN-NOHSA-SI:       ; %bb.0:
7996; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
7997; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
7998; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
7999; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s2
8000; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s3
8001; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
8002; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
8003; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
8004; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:48
8005; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:32
8006; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
8007; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
8008; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0
8009; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:16
8010; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(3)
8011; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, v3
8012; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v16, v16, 0, 16
8013; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[18:19], v[2:3], 48
8014; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8015; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:240
8016; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8017; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[18:19], v[0:1], 48
8018; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v16, v1, 0, 16
8019; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8020; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:208
8021; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(4)
8022; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, v7
8023; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8024; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v16, v1, 0, 16
8025; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[18:19], v[6:7], 48
8026; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8027; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
8028; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8029; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[18:19], v[4:5], 48
8030; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v16, v5, 0, 16
8031; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8032; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:144
8033; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(4)
8034; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, v15
8035; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8036; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v16, v1, 0, 16
8037; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[18:19], v[14:15], 48
8038; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8039; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:112
8040; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8041; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[17:18], v[12:13], 48
8042; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v15, v13, 0, 16
8043; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
8044; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:80
8045; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, v11
8046; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8047; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v15, v1, 0, 16
8048; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[17:18], v[10:11], 48
8049; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
8050; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:48
8051; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8052; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[17:18], v[8:9], 48
8053; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v15, v9, 0, 16
8054; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
8055; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:16
8056; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
8057; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8058; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v17, v1, 0, 16
8059; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v15, v2, 0, 16
8060; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
8061; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v18, 31, v17
8062; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:224
8063; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
8064; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v6
8065; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v0, 0, 16
8066; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v1, 0, 16
8067; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v9, 16, v12
8068; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
8069; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
8070; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
8071; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v7, 16, v10
8072; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8073; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v5, 0, 16
8074; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v6, 0, 16
8075; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
8076; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
8077; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
8078; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8079; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v0, 16, v8
8080; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v0, 0, 16
8081; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v8, 0, 16
8082; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v5, v10, 0, 16
8083; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v7, v7, 0, 16
8084; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v11, v9, 0, 16
8085; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v9, v12, 0, 16
8086; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v14
8087; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v13, v14, 0, 16
8088; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v15, v1, 0, 16
8089; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
8090; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v17, v4, 0, 16
8091; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v19, v1, 0, 16
8092; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
8093; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
8094; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v10, 31, v9
8095; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v14, 31, v13
8096; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v18, 31, v17
8097; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
8098; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v8, 31, v7
8099; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
8100; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
8101; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v20, 31, v19
8102; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:128
8103; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:96
8104; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:64
8105; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[5:8], off, s[0:3], 0 offset:32
8106; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
8107; GCN-NOHSA-SI-NEXT:    s_endpgm
8108;
8109; GCN-HSA-LABEL: global_sextload_v32i16_to_v32i64:
8110; GCN-HSA:       ; %bb.0:
8111; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
8112; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
8113; GCN-HSA-NEXT:    s_add_u32 s4, s2, 48
8114; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
8115; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
8116; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
8117; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
8118; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
8119; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
8120; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
8121; GCN-HSA-NEXT:    s_add_u32 s4, s2, 32
8122; GCN-HSA-NEXT:    s_addc_u32 s5, s3, 0
8123; GCN-HSA-NEXT:    s_add_u32 s2, s2, 16
8124; GCN-HSA-NEXT:    s_addc_u32 s3, s3, 0
8125; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s5
8126; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
8127; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
8128; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s4
8129; GCN-HSA-NEXT:    flat_load_dwordx4 v[8:11], v[8:9]
8130; GCN-HSA-NEXT:    flat_load_dwordx4 v[12:15], v[12:13]
8131; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
8132; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
8133; GCN-HSA-NEXT:    s_add_u32 s4, s0, 16
8134; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
8135; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s5
8136; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s4
8137; GCN-HSA-NEXT:    s_waitcnt vmcnt(3)
8138; GCN-HSA-NEXT:    v_bfe_i32 v16, v1, 0, 16
8139; GCN-HSA-NEXT:    v_ashr_i64 v[18:19], v[0:1], 48
8140; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8141; GCN-HSA-NEXT:    flat_store_dwordx4 v[20:21], v[16:19]
8142; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s3
8143; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s2
8144; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xf0
8145; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
8146; GCN-HSA-NEXT:    s_add_u32 s4, s0, 0xd0
8147; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
8148; GCN-HSA-NEXT:    s_add_u32 s6, s0, 0xb0
8149; GCN-HSA-NEXT:    s_addc_u32 s7, s1, 0
8150; GCN-HSA-NEXT:    s_add_u32 s8, s0, 0x90
8151; GCN-HSA-NEXT:    s_addc_u32 s9, s1, 0
8152; GCN-HSA-NEXT:    s_add_u32 s10, s0, 0x70
8153; GCN-HSA-NEXT:    s_addc_u32 s11, s1, 0
8154; GCN-HSA-NEXT:    s_add_u32 s12, s0, 0x50
8155; GCN-HSA-NEXT:    v_mov_b32_e32 v1, v3
8156; GCN-HSA-NEXT:    v_bfe_i32 v16, v1, 0, 16
8157; GCN-HSA-NEXT:    s_addc_u32 s13, s1, 0
8158; GCN-HSA-NEXT:    v_ashr_i64 v[18:19], v[2:3], 48
8159; GCN-HSA-NEXT:    s_add_u32 s14, s0, 32
8160; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8161; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
8162; GCN-HSA-NEXT:    flat_store_dwordx4 v[20:21], v[16:19]
8163; GCN-HSA-NEXT:    s_addc_u32 s15, s1, 0
8164; GCN-HSA-NEXT:    v_bfe_i32 v18, v1, 0, 16
8165; GCN-HSA-NEXT:    v_bfe_i32 v16, v2, 0, 16
8166; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s14
8167; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8168; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v19, 31, v18
8169; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s15
8170; GCN-HSA-NEXT:    flat_store_dwordx4 v[1:2], v[16:19]
8171; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
8172; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s1
8173; GCN-HSA-NEXT:    v_bfe_i32 v2, v1, 0, 16
8174; GCN-HSA-NEXT:    v_bfe_i32 v0, v0, 0, 16
8175; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s0
8176; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
8177; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
8178; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[0:3]
8179; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s5
8180; GCN-HSA-NEXT:    s_waitcnt vmcnt(6)
8181; GCN-HSA-NEXT:    v_bfe_i32 v0, v5, 0, 16
8182; GCN-HSA-NEXT:    v_ashr_i64 v[2:3], v[4:5], 48
8183; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s4
8184; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
8185; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[0:3]
8186; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s3
8187; GCN-HSA-NEXT:    v_mov_b32_e32 v0, v7
8188; GCN-HSA-NEXT:    v_bfe_i32 v0, v0, 0, 16
8189; GCN-HSA-NEXT:    v_ashr_i64 v[2:3], v[6:7], 48
8190; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s2
8191; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
8192; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[0:3]
8193; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s9
8194; GCN-HSA-NEXT:    s_waitcnt vmcnt(7)
8195; GCN-HSA-NEXT:    v_bfe_i32 v0, v9, 0, 16
8196; GCN-HSA-NEXT:    v_ashr_i64 v[2:3], v[8:9], 48
8197; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s8
8198; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
8199; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[0:3]
8200; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s7
8201; GCN-HSA-NEXT:    v_mov_b32_e32 v0, v11
8202; GCN-HSA-NEXT:    v_bfe_i32 v0, v0, 0, 16
8203; GCN-HSA-NEXT:    v_ashr_i64 v[2:3], v[10:11], 48
8204; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s6
8205; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
8206; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[0:3]
8207; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s13
8208; GCN-HSA-NEXT:    s_waitcnt vmcnt(8)
8209; GCN-HSA-NEXT:    v_bfe_i32 v0, v13, 0, 16
8210; GCN-HSA-NEXT:    v_ashr_i64 v[2:3], v[12:13], 48
8211; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s12
8212; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
8213; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[0:3]
8214; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xe0
8215; GCN-HSA-NEXT:    v_mov_b32_e32 v0, v15
8216; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s11
8217; GCN-HSA-NEXT:    v_bfe_i32 v0, v0, 0, 16
8218; GCN-HSA-NEXT:    v_ashr_i64 v[2:3], v[14:15], 48
8219; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s10
8220; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
8221; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v5, 16, v6
8222; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
8223; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[0:3]
8224; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
8225; GCN-HSA-NEXT:    v_bfe_i32 v0, v6, 0, 16
8226; GCN-HSA-NEXT:    v_bfe_i32 v2, v5, 0, 16
8227; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s3
8228; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s2
8229; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xc0
8230; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
8231; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
8232; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
8233; GCN-HSA-NEXT:    flat_store_dwordx4 v[5:6], v[0:3]
8234; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v15, 16, v10
8235; GCN-HSA-NEXT:    v_bfe_i32 v0, v4, 0, 16
8236; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
8237; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
8238; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0xa0
8239; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
8240; GCN-HSA-NEXT:    v_bfe_i32 v2, v7, 0, 16
8241; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s3
8242; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
8243; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
8244; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8245; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v6, 16, v14
8246; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v2, 16, v12
8247; GCN-HSA-NEXT:    v_bfe_i32 v0, v12, 0, 16
8248; GCN-HSA-NEXT:    v_bfe_i32 v4, v14, 0, 16
8249; GCN-HSA-NEXT:    v_bfe_i32 v14, v15, 0, 16
8250; GCN-HSA-NEXT:    v_bfe_i32 v12, v10, 0, 16
8251; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s2
8252; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x80
8253; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v13, 31, v12
8254; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v15, 31, v14
8255; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
8256; GCN-HSA-NEXT:    v_lshrrev_b32_e32 v11, 16, v8
8257; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[12:15]
8258; GCN-HSA-NEXT:    v_bfe_i32 v10, v11, 0, 16
8259; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
8260; GCN-HSA-NEXT:    v_bfe_i32 v8, v8, 0, 16
8261; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
8262; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
8263; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
8264; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
8265; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v11, 31, v10
8266; GCN-HSA-NEXT:    s_add_u32 s0, s0, 64
8267; GCN-HSA-NEXT:    v_bfe_i32 v6, v6, 0, 16
8268; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[8:11]
8269; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
8270; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s3
8271; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
8272; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s2
8273; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
8274; GCN-HSA-NEXT:    v_bfe_i32 v2, v2, 0, 16
8275; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
8276; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
8277; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
8278; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
8279; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
8280; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8281; GCN-HSA-NEXT:    s_endpgm
8282;
8283; GCN-NOHSA-VI-LABEL: global_sextload_v32i16_to_v32i64:
8284; GCN-NOHSA-VI:       ; %bb.0:
8285; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
8286; GCN-NOHSA-VI-NEXT:    s_mov_b32 s3, 0xf000
8287; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, -1
8288; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
8289; GCN-NOHSA-VI-NEXT:    s_mov_b32 s0, s4
8290; GCN-NOHSA-VI-NEXT:    s_mov_b32 s1, s5
8291; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s6
8292; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s7
8293; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s2
8294; GCN-NOHSA-VI-NEXT:    s_mov_b32 s7, s3
8295; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
8296; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
8297; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
8298; GCN-NOHSA-VI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
8299; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
8300; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v16, 16, v14
8301; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v18, v16, 0, 16
8302; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v16, v14, 0, 16
8303; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v14, v15
8304; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8305; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v19, 31, v18
8306; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v15, 16, v15
8307; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:224
8308; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v14, v14, 0, 16
8309; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v16, v15, 0, 16
8310; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v15, 31, v14
8311; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8312; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:240
8313; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v18, v5, 0, 16
8314; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v14, 16, v12
8315; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v16, v14, 0, 16
8316; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v14, v12, 0, 16
8317; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v15, 31, v14
8318; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8319; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:192
8320; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v12, v13, 0, 16
8321; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v14, 16, v13
8322; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v14, v14, 0, 16
8323; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v13, 31, v12
8324; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v15, 31, v14
8325; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:208
8326; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v16, v11
8327; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v12, 16, v10
8328; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v17, 16, v11
8329; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v14, v10, 0, 16
8330; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v11, v16, 0, 16
8331; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v16, v12, 0, 16
8332; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v13, v17, 0, 16
8333; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v15, 31, v14
8334; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8335; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:160
8336; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
8337; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v14, 31, v13
8338; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:176
8339; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v10, v1, 0, 16
8340; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v11, 16, v8
8341; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v13, v11, 0, 16
8342; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v11, v8, 0, 16
8343; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
8344; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v14, 31, v13
8345; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v8, 16, v9
8346; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:128
8347; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
8348; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v11, v9, 0, 16
8349; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v13, v8, 0, 16
8350; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v8, v7
8351; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
8352; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v14, 31, v13
8353; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
8354; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:144
8355; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v9, 16, v6
8356; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v11, v6, 0, 16
8357; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v6, v8, 0, 16
8358; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v8, v7, 0, 16
8359; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v13, v9, 0, 16
8360; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
8361; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
8362; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:112
8363; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
8364; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v6, 16, v4
8365; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v8, v6, 0, 16
8366; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v6, v4, 0, 16
8367; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v14, 31, v13
8368; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
8369; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
8370; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:96
8371; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:64
8372; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v11, 16, v0
8373; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v6, v0, 0, 16
8374; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
8375; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v16, v0, 0, 16
8376; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v0, 16, v5
8377; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, v3
8378; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
8379; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v20, v0, 0, 16
8380; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v12, v1, 0, 16
8381; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v8, v11, 0, 16
8382; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v14, v2, 0, 16
8383; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v1, v4, 0, 16
8384; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v3, v3, 0, 16
8385; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v19, 31, v18
8386; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v21, 31, v20
8387; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v11, 31, v10
8388; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v13, 31, v12
8389; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
8390; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
8391; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
8392; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
8393; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v15, 31, v14
8394; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
8395; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:80
8396; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:32
8397; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[1:4], off, s[0:3], 0 offset:48
8398; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0
8399; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:16
8400; GCN-NOHSA-VI-NEXT:    s_endpgm
8401;
8402; EG-LABEL: global_sextload_v32i16_to_v32i64:
8403; EG:       ; %bb.0:
8404; EG-NEXT:    ALU 0, @30, KC0[CB0:0-32], KC1[]
8405; EG-NEXT:    TEX 0 @22
8406; EG-NEXT:    ALU 56, @31, KC0[CB0:0-32], KC1[]
8407; EG-NEXT:    TEX 2 @24
8408; EG-NEXT:    ALU 74, @88, KC0[CB0:0-32], KC1[]
8409; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T38.X, 0
8410; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T36.X, 0
8411; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T34.X, 0
8412; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T33.X, 0
8413; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T32.X, 0
8414; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T31.X, 0
8415; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T30.X, 0
8416; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T29.X, 0
8417; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T28.X, 0
8418; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T27.X, 0
8419; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T26.X, 0
8420; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T25.X, 0
8421; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T24.X, 0
8422; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T23.X, 0
8423; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T22.X, 0
8424; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T21.X, 1
8425; EG-NEXT:    CF_END
8426; EG-NEXT:    Fetch clause starting at 22:
8427; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 0, #1
8428; EG-NEXT:    Fetch clause starting at 24:
8429; EG-NEXT:     VTX_READ_128 T38.XYZW, T19.X, 48, #1
8430; EG-NEXT:     VTX_READ_128 T39.XYZW, T19.X, 32, #1
8431; EG-NEXT:     VTX_READ_128 T40.XYZW, T19.X, 16, #1
8432; EG-NEXT:    ALU clause starting at 30:
8433; EG-NEXT:     MOV * T19.X, KC0[2].Z,
8434; EG-NEXT:    ALU clause starting at 31:
8435; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
8436; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
8437; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
8438; EG-NEXT:     LSHR * T22.X, KC0[2].Y, literal.x,
8439; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
8440; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
8441; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
8442; EG-NEXT:     LSHR T23.X, PV.W, literal.x,
8443; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8444; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
8445; EG-NEXT:     LSHR T24.X, PV.W, literal.x,
8446; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8447; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
8448; EG-NEXT:     LSHR T25.X, PV.W, literal.x,
8449; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8450; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
8451; EG-NEXT:     LSHR T26.X, PV.W, literal.x,
8452; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8453; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
8454; EG-NEXT:     LSHR T27.X, PV.W, literal.x,
8455; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8456; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
8457; EG-NEXT:     LSHR T28.X, PV.W, literal.x,
8458; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8459; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
8460; EG-NEXT:     LSHR T29.X, PV.W, literal.x,
8461; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8462; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
8463; EG-NEXT:     LSHR T30.X, PV.W, literal.x,
8464; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8465; EG-NEXT:    2(2.802597e-45), 176(2.466285e-43)
8466; EG-NEXT:     LSHR T31.X, PV.W, literal.x,
8467; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8468; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
8469; EG-NEXT:     LSHR T32.X, PV.W, literal.x,
8470; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8471; EG-NEXT:    2(2.802597e-45), 208(2.914701e-43)
8472; EG-NEXT:     LSHR T33.X, PV.W, literal.x,
8473; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8474; EG-NEXT:    2(2.802597e-45), 192(2.690493e-43)
8475; EG-NEXT:     LSHR T34.X, PV.W, literal.x,
8476; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.y,
8477; EG-NEXT:     ASHR * T35.W, T20.Y, literal.z,
8478; EG-NEXT:    2(2.802597e-45), 240(3.363116e-43)
8479; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8480; EG-NEXT:     LSHR T36.X, PV.W, literal.x,
8481; EG-NEXT:     ASHR T35.Z, T20.Y, literal.y,
8482; EG-NEXT:     ASHR * T37.W, T20.X, literal.z,
8483; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
8484; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8485; EG-NEXT:     BFE_INT T35.X, T20.Y, 0.0, literal.x,
8486; EG-NEXT:     ASHR * T37.Z, T20.X, literal.x,
8487; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
8488; EG-NEXT:     BFE_INT T37.X, T20.X, 0.0, literal.x,
8489; EG-NEXT:     ASHR T35.Y, PV.X, literal.y,
8490; EG-NEXT:     ASHR * T19.W, T20.W, literal.y,
8491; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8492; EG-NEXT:    ALU clause starting at 88:
8493; EG-NEXT:     ASHR T19.Z, T20.W, literal.x,
8494; EG-NEXT:     ASHR * T41.W, T20.Z, literal.y,
8495; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8496; EG-NEXT:     BFE_INT T19.X, T20.W, 0.0, literal.x,
8497; EG-NEXT:     ASHR T37.Y, T37.X, literal.y,
8498; EG-NEXT:     ASHR T41.Z, T20.Z, literal.x,
8499; EG-NEXT:     ASHR * T20.W, T40.Y, literal.y,
8500; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8501; EG-NEXT:     BFE_INT T41.X, T20.Z, 0.0, literal.x,
8502; EG-NEXT:     ASHR T19.Y, PV.X, literal.y,
8503; EG-NEXT:     ASHR T20.Z, T40.Y, literal.x,
8504; EG-NEXT:     ASHR * T42.W, T40.X, literal.y,
8505; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8506; EG-NEXT:     BFE_INT T20.X, T40.Y, 0.0, literal.x,
8507; EG-NEXT:     ASHR T41.Y, PV.X, literal.y,
8508; EG-NEXT:     ASHR T42.Z, T40.X, literal.x,
8509; EG-NEXT:     ASHR * T43.W, T40.W, literal.y,
8510; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8511; EG-NEXT:     BFE_INT T42.X, T40.X, 0.0, literal.x,
8512; EG-NEXT:     ASHR T20.Y, PV.X, literal.y,
8513; EG-NEXT:     ASHR T43.Z, T40.W, literal.x,
8514; EG-NEXT:     ASHR * T44.W, T40.Z, literal.y,
8515; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8516; EG-NEXT:     BFE_INT T43.X, T40.W, 0.0, literal.x,
8517; EG-NEXT:     ASHR T42.Y, PV.X, literal.y,
8518; EG-NEXT:     ASHR T44.Z, T40.Z, literal.x,
8519; EG-NEXT:     ASHR * T40.W, T39.Y, literal.y,
8520; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8521; EG-NEXT:     BFE_INT T44.X, T40.Z, 0.0, literal.x,
8522; EG-NEXT:     ASHR T43.Y, PV.X, literal.y,
8523; EG-NEXT:     ASHR T40.Z, T39.Y, literal.x,
8524; EG-NEXT:     ASHR * T45.W, T39.X, literal.y,
8525; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8526; EG-NEXT:     BFE_INT T40.X, T39.Y, 0.0, literal.x,
8527; EG-NEXT:     ASHR T44.Y, PV.X, literal.y,
8528; EG-NEXT:     ASHR T45.Z, T39.X, literal.x,
8529; EG-NEXT:     ASHR * T46.W, T39.W, literal.y,
8530; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8531; EG-NEXT:     BFE_INT T45.X, T39.X, 0.0, literal.x,
8532; EG-NEXT:     ASHR T40.Y, PV.X, literal.y,
8533; EG-NEXT:     ASHR T46.Z, T39.W, literal.x,
8534; EG-NEXT:     ASHR * T47.W, T39.Z, literal.y,
8535; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8536; EG-NEXT:     BFE_INT T46.X, T39.W, 0.0, literal.x,
8537; EG-NEXT:     ASHR T45.Y, PV.X, literal.y,
8538; EG-NEXT:     ASHR T47.Z, T39.Z, literal.x,
8539; EG-NEXT:     ASHR * T39.W, T38.Y, literal.y,
8540; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8541; EG-NEXT:     BFE_INT T47.X, T39.Z, 0.0, literal.x,
8542; EG-NEXT:     ASHR T46.Y, PV.X, literal.y,
8543; EG-NEXT:     ASHR T39.Z, T38.Y, literal.x,
8544; EG-NEXT:     ASHR * T48.W, T38.X, literal.y,
8545; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8546; EG-NEXT:     BFE_INT T39.X, T38.Y, 0.0, literal.x,
8547; EG-NEXT:     ASHR T47.Y, PV.X, literal.y,
8548; EG-NEXT:     ASHR T48.Z, T38.X, literal.x,
8549; EG-NEXT:     ASHR * T49.W, T38.W, literal.y,
8550; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8551; EG-NEXT:     BFE_INT T48.X, T38.X, 0.0, literal.x,
8552; EG-NEXT:     ASHR T39.Y, PV.X, literal.y,
8553; EG-NEXT:     ASHR T49.Z, T38.W, literal.x,
8554; EG-NEXT:     ASHR * T50.W, T38.Z, literal.y,
8555; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8556; EG-NEXT:     BFE_INT T49.X, T38.W, 0.0, literal.x,
8557; EG-NEXT:     ASHR T48.Y, PV.X, literal.y,
8558; EG-NEXT:     ASHR * T50.Z, T38.Z, literal.x,
8559; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8560; EG-NEXT:     BFE_INT T50.X, T38.Z, 0.0, literal.x,
8561; EG-NEXT:     ASHR T49.Y, PV.X, literal.y,
8562; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
8563; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8564; EG-NEXT:    224(3.138909e-43), 0(0.000000e+00)
8565; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
8566; EG-NEXT:     ASHR * T50.Y, PV.X, literal.y,
8567; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
8568;
8569; CM-LABEL: global_sextload_v32i16_to_v32i64:
8570; CM:       ; %bb.0:
8571; CM-NEXT:    ALU 0, @30, KC0[CB0:0-32], KC1[]
8572; CM-NEXT:    TEX 0 @22
8573; CM-NEXT:    ALU 55, @31, KC0[CB0:0-32], KC1[]
8574; CM-NEXT:    TEX 2 @24
8575; CM-NEXT:    ALU 73, @87, KC0[CB0:0-32], KC1[]
8576; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T38, T50.X
8577; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T49, T36.X
8578; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T48, T34.X
8579; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T47, T33.X
8580; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T39, T32.X
8581; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T46, T31.X
8582; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T45, T30.X
8583; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T44, T29.X
8584; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T40, T28.X
8585; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T43, T27.X
8586; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T42, T26.X
8587; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T41, T25.X
8588; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T20, T24.X
8589; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T19, T23.X
8590; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T37, T22.X
8591; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T35, T21.X
8592; CM-NEXT:    CF_END
8593; CM-NEXT:    Fetch clause starting at 22:
8594; CM-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 48, #1
8595; CM-NEXT:    Fetch clause starting at 24:
8596; CM-NEXT:     VTX_READ_128 T38.XYZW, T19.X, 0, #1
8597; CM-NEXT:     VTX_READ_128 T39.XYZW, T19.X, 16, #1
8598; CM-NEXT:     VTX_READ_128 T40.XYZW, T19.X, 32, #1
8599; CM-NEXT:    ALU clause starting at 30:
8600; CM-NEXT:     MOV * T19.X, KC0[2].Z,
8601; CM-NEXT:    ALU clause starting at 31:
8602; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
8603; CM-NEXT:    224(3.138909e-43), 0(0.000000e+00)
8604; CM-NEXT:     LSHR T21.X, PV.W, literal.x,
8605; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8606; CM-NEXT:    2(2.802597e-45), 240(3.363116e-43)
8607; CM-NEXT:     LSHR T22.X, PV.W, literal.x,
8608; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8609; CM-NEXT:    2(2.802597e-45), 192(2.690493e-43)
8610; CM-NEXT:     LSHR T23.X, PV.W, literal.x,
8611; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8612; CM-NEXT:    2(2.802597e-45), 208(2.914701e-43)
8613; CM-NEXT:     LSHR T24.X, PV.W, literal.x,
8614; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8615; CM-NEXT:    2(2.802597e-45), 160(2.242078e-43)
8616; CM-NEXT:     LSHR T25.X, PV.W, literal.x,
8617; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8618; CM-NEXT:    2(2.802597e-45), 176(2.466285e-43)
8619; CM-NEXT:     LSHR T26.X, PV.W, literal.x,
8620; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8621; CM-NEXT:    2(2.802597e-45), 128(1.793662e-43)
8622; CM-NEXT:     LSHR T27.X, PV.W, literal.x,
8623; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8624; CM-NEXT:    2(2.802597e-45), 144(2.017870e-43)
8625; CM-NEXT:     LSHR T28.X, PV.W, literal.x,
8626; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8627; CM-NEXT:    2(2.802597e-45), 96(1.345247e-43)
8628; CM-NEXT:     LSHR T29.X, PV.W, literal.x,
8629; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8630; CM-NEXT:    2(2.802597e-45), 112(1.569454e-43)
8631; CM-NEXT:     LSHR T30.X, PV.W, literal.x,
8632; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8633; CM-NEXT:    2(2.802597e-45), 64(8.968310e-44)
8634; CM-NEXT:     LSHR T31.X, PV.W, literal.x,
8635; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8636; CM-NEXT:    2(2.802597e-45), 80(1.121039e-43)
8637; CM-NEXT:     LSHR T32.X, PV.W, literal.x,
8638; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8639; CM-NEXT:    2(2.802597e-45), 32(4.484155e-44)
8640; CM-NEXT:     LSHR T33.X, PV.W, literal.x,
8641; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8642; CM-NEXT:    2(2.802597e-45), 48(6.726233e-44)
8643; CM-NEXT:     LSHR T34.X, PV.W, literal.x,
8644; CM-NEXT:     ASHR * T35.W, T20.Z, literal.y,
8645; CM-NEXT:    2(2.802597e-45), 31(4.344025e-44)
8646; CM-NEXT:     LSHR T36.X, KC0[2].Y, literal.x,
8647; CM-NEXT:     ASHR T35.Z, T20.Z, literal.y,
8648; CM-NEXT:     ASHR * T37.W, T20.W, literal.z,
8649; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
8650; CM-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8651; CM-NEXT:     BFE_INT T35.X, T20.Z, 0.0, literal.x,
8652; CM-NEXT:     ASHR * T37.Z, T20.W, literal.x,
8653; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
8654; CM-NEXT:     BFE_INT T37.X, T20.W, 0.0, literal.x,
8655; CM-NEXT:     ASHR T35.Y, PV.X, literal.y,
8656; CM-NEXT:     ASHR * T19.W, T20.X, literal.y,
8657; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8658; CM-NEXT:    ALU clause starting at 87:
8659; CM-NEXT:     ASHR T19.Z, T20.X, literal.x,
8660; CM-NEXT:     ASHR * T20.W, T20.Y, literal.y,
8661; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8662; CM-NEXT:     BFE_INT T19.X, T20.X, 0.0, literal.x,
8663; CM-NEXT:     ASHR T37.Y, T37.X, literal.y, BS:VEC_120/SCL_212
8664; CM-NEXT:     ASHR T20.Z, T20.Y, literal.x,
8665; CM-NEXT:     ASHR * T41.W, T40.Z, literal.y,
8666; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8667; CM-NEXT:     BFE_INT T20.X, T20.Y, 0.0, literal.x,
8668; CM-NEXT:     ASHR T19.Y, PV.X, literal.y,
8669; CM-NEXT:     ASHR T41.Z, T40.Z, literal.x,
8670; CM-NEXT:     ASHR * T42.W, T40.W, literal.y,
8671; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8672; CM-NEXT:     BFE_INT T41.X, T40.Z, 0.0, literal.x,
8673; CM-NEXT:     ASHR T20.Y, PV.X, literal.y,
8674; CM-NEXT:     ASHR T42.Z, T40.W, literal.x,
8675; CM-NEXT:     ASHR * T43.W, T40.X, literal.y,
8676; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8677; CM-NEXT:     BFE_INT T42.X, T40.W, 0.0, literal.x,
8678; CM-NEXT:     ASHR T41.Y, PV.X, literal.y,
8679; CM-NEXT:     ASHR T43.Z, T40.X, literal.x,
8680; CM-NEXT:     ASHR * T40.W, T40.Y, literal.y,
8681; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8682; CM-NEXT:     BFE_INT T43.X, T40.X, 0.0, literal.x,
8683; CM-NEXT:     ASHR T42.Y, PV.X, literal.y,
8684; CM-NEXT:     ASHR T40.Z, T40.Y, literal.x,
8685; CM-NEXT:     ASHR * T44.W, T39.Z, literal.y,
8686; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8687; CM-NEXT:     BFE_INT T40.X, T40.Y, 0.0, literal.x,
8688; CM-NEXT:     ASHR T43.Y, PV.X, literal.y,
8689; CM-NEXT:     ASHR T44.Z, T39.Z, literal.x,
8690; CM-NEXT:     ASHR * T45.W, T39.W, literal.y,
8691; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8692; CM-NEXT:     BFE_INT T44.X, T39.Z, 0.0, literal.x,
8693; CM-NEXT:     ASHR T40.Y, PV.X, literal.y,
8694; CM-NEXT:     ASHR T45.Z, T39.W, literal.x,
8695; CM-NEXT:     ASHR * T46.W, T39.X, literal.y,
8696; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8697; CM-NEXT:     BFE_INT T45.X, T39.W, 0.0, literal.x,
8698; CM-NEXT:     ASHR T44.Y, PV.X, literal.y,
8699; CM-NEXT:     ASHR T46.Z, T39.X, literal.x,
8700; CM-NEXT:     ASHR * T39.W, T39.Y, literal.y,
8701; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8702; CM-NEXT:     BFE_INT T46.X, T39.X, 0.0, literal.x,
8703; CM-NEXT:     ASHR T45.Y, PV.X, literal.y,
8704; CM-NEXT:     ASHR T39.Z, T39.Y, literal.x,
8705; CM-NEXT:     ASHR * T47.W, T38.Z, literal.y,
8706; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8707; CM-NEXT:     BFE_INT T39.X, T39.Y, 0.0, literal.x,
8708; CM-NEXT:     ASHR T46.Y, PV.X, literal.y,
8709; CM-NEXT:     ASHR T47.Z, T38.Z, literal.x,
8710; CM-NEXT:     ASHR * T48.W, T38.W, literal.y,
8711; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8712; CM-NEXT:     BFE_INT T47.X, T38.Z, 0.0, literal.x,
8713; CM-NEXT:     ASHR T39.Y, PV.X, literal.y,
8714; CM-NEXT:     ASHR T48.Z, T38.W, literal.x,
8715; CM-NEXT:     ASHR * T49.W, T38.X, literal.y,
8716; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8717; CM-NEXT:     BFE_INT T48.X, T38.W, 0.0, literal.x,
8718; CM-NEXT:     ASHR T47.Y, PV.X, literal.y,
8719; CM-NEXT:     ASHR T49.Z, T38.X, literal.x,
8720; CM-NEXT:     ASHR * T38.W, T38.Y, literal.y,
8721; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8722; CM-NEXT:     BFE_INT T49.X, T38.X, 0.0, literal.x,
8723; CM-NEXT:     ASHR T48.Y, PV.X, literal.y,
8724; CM-NEXT:     ASHR * T38.Z, T38.Y, literal.x,
8725; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8726; CM-NEXT:     BFE_INT T38.X, T38.Y, 0.0, literal.x,
8727; CM-NEXT:     ASHR T49.Y, PV.X, literal.y,
8728; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
8729; CM-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8730; CM-NEXT:     LSHR T50.X, PV.W, literal.x,
8731; CM-NEXT:     ASHR * T38.Y, PV.X, literal.y,
8732; CM-NEXT:    2(2.802597e-45), 31(4.344025e-44)
8733  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
8734  %ext = sext <32 x i16> %load to <32 x i64>
8735  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
8736  ret void
8737}
8738
8739; define amdgpu_kernel void @global_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
8740;   %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
8741;   %ext = zext <64 x i16> %load to <64 x i64>
8742;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
8743;   ret void
8744; }
8745
8746; define amdgpu_kernel void @global_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
8747;   %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
8748;   %ext = sext <64 x i16> %load to <64 x i64>
8749;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
8750;   ret void
8751; }
8752
8753attributes #0 = { nounwind }
8754