1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s
5
6define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(<4 x i16> addrspace(4)* inreg %ptr, i32 inreg %idx) {
7; GCN-LABEL: extractelement_sgpr_v4i16_sgpr_idx:
8; GCN:       ; %bb.0:
9; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
10; GCN-NEXT:    s_lshr_b32 s2, s4, 1
11; GCN-NEXT:    s_cmp_eq_u32 s2, 1
12; GCN-NEXT:    s_waitcnt lgkmcnt(0)
13; GCN-NEXT:    s_cselect_b32 s0, s1, s0
14; GCN-NEXT:    s_and_b32 s1, s4, 1
15; GCN-NEXT:    s_lshl_b32 s1, s1, 4
16; GCN-NEXT:    s_lshr_b32 s0, s0, s1
17; GCN-NEXT:    ; return to shader part epilog
18  %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr
19  %element = extractelement <4 x i16> %vector, i32 %idx
20  ret i16 %element
21}
22
23define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(<4 x i16> addrspace(1)* %ptr, i32 inreg %idx) {
24; GFX9-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
25; GFX9:       ; %bb.0:
26; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
27; GFX9-NEXT:    s_lshr_b32 s0, s2, 1
28; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 1
29; GFX9-NEXT:    s_and_b32 s1, s2, 1
30; GFX9-NEXT:    s_lshl_b32 s0, s1, 4
31; GFX9-NEXT:    s_waitcnt vmcnt(0)
32; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
33; GFX9-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
34; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
35; GFX9-NEXT:    ; return to shader part epilog
36;
37; GFX8-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
38; GFX8:       ; %bb.0:
39; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
40; GFX8-NEXT:    s_lshr_b32 s0, s2, 1
41; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 1
42; GFX8-NEXT:    s_and_b32 s1, s2, 1
43; GFX8-NEXT:    s_lshl_b32 s0, s1, 4
44; GFX8-NEXT:    s_waitcnt vmcnt(0)
45; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
46; GFX8-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
47; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
48; GFX8-NEXT:    ; return to shader part epilog
49;
50; GFX7-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
51; GFX7:       ; %bb.0:
52; GFX7-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
53; GFX7-NEXT:    s_lshr_b32 s0, s2, 1
54; GFX7-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 1
55; GFX7-NEXT:    s_and_b32 s1, s2, 1
56; GFX7-NEXT:    s_lshl_b32 s0, s1, 4
57; GFX7-NEXT:    s_waitcnt vmcnt(0)
58; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
59; GFX7-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
60; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
61; GFX7-NEXT:    ; return to shader part epilog
62  %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr
63  %element = extractelement <4 x i16> %vector, i32 %idx
64  ret i16 %element
65}
66
67define i16 @extractelement_vgpr_v4i16_vgpr_idx(<4 x i16> addrspace(1)* %ptr, i32 %idx) {
68; GFX9-LABEL: extractelement_vgpr_v4i16_vgpr_idx:
69; GFX9:       ; %bb.0:
70; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
72; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 1, v2
73; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v3
74; GFX9-NEXT:    v_and_b32_e32 v2, 1, v2
75; GFX9-NEXT:    s_waitcnt vmcnt(0)
76; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
77; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 4, v2
78; GFX9-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
79; GFX9-NEXT:    s_setpc_b64 s[30:31]
80;
81; GFX8-LABEL: extractelement_vgpr_v4i16_vgpr_idx:
82; GFX8:       ; %bb.0:
83; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
84; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
85; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 1, v2
86; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v3
87; GFX8-NEXT:    v_and_b32_e32 v2, 1, v2
88; GFX8-NEXT:    s_waitcnt vmcnt(0)
89; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
90; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 4, v2
91; GFX8-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
92; GFX8-NEXT:    s_setpc_b64 s[30:31]
93;
94; GFX7-LABEL: extractelement_vgpr_v4i16_vgpr_idx:
95; GFX7:       ; %bb.0:
96; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97; GFX7-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
98; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 1, v2
99; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v3
100; GFX7-NEXT:    v_and_b32_e32 v2, 1, v2
101; GFX7-NEXT:    s_waitcnt vmcnt(0)
102; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
103; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 4, v2
104; GFX7-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
105; GFX7-NEXT:    s_setpc_b64 s[30:31]
106  %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr
107  %element = extractelement <4 x i16> %vector, i32 %idx
108  ret i16 %element
109}
110
111define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(<4 x i16> addrspace(4)* inreg %ptr, i32 %idx) {
112; GCN-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
113; GCN:       ; %bb.0:
114; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
115; GCN-NEXT:    v_lshrrev_b32_e32 v1, 1, v0
116; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
117; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
118; GCN-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
119; GCN-NEXT:    s_waitcnt lgkmcnt(0)
120; GCN-NEXT:    v_mov_b32_e32 v2, s0
121; GCN-NEXT:    v_mov_b32_e32 v3, s1
122; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
123; GCN-NEXT:    v_lshrrev_b32_e32 v0, v0, v1
124; GCN-NEXT:    v_readfirstlane_b32 s0, v0
125; GCN-NEXT:    ; return to shader part epilog
126  %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr
127  %element = extractelement <4 x i16> %vector, i32 %idx
128  ret i16 %element
129}
130
131define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx0(<4 x i16> addrspace(4)* inreg %ptr) {
132; GCN-LABEL: extractelement_sgpr_v4i16_idx0:
133; GCN:       ; %bb.0:
134; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
135; GCN-NEXT:    s_waitcnt lgkmcnt(0)
136; GCN-NEXT:    ; return to shader part epilog
137  %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr
138  %element = extractelement <4 x i16> %vector, i32 0
139  ret i16 %element
140}
141
142define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx1(<4 x i16> addrspace(4)* inreg %ptr) {
143; GCN-LABEL: extractelement_sgpr_v4i16_idx1:
144; GCN:       ; %bb.0:
145; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
146; GCN-NEXT:    s_waitcnt lgkmcnt(0)
147; GCN-NEXT:    s_lshr_b32 s0, s0, 16
148; GCN-NEXT:    ; return to shader part epilog
149  %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr
150  %element = extractelement <4 x i16> %vector, i32 1
151  ret i16 %element
152}
153
154define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx2(<4 x i16> addrspace(4)* inreg %ptr) {
155; GCN-LABEL: extractelement_sgpr_v4i16_idx2:
156; GCN:       ; %bb.0:
157; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
158; GCN-NEXT:    s_waitcnt lgkmcnt(0)
159; GCN-NEXT:    s_mov_b32 s0, s1
160; GCN-NEXT:    ; return to shader part epilog
161  %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr
162  %element = extractelement <4 x i16> %vector, i32 2
163  ret i16 %element
164}
165
166define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx3(<4 x i16> addrspace(4)* inreg %ptr) {
167; GCN-LABEL: extractelement_sgpr_v4i16_idx3:
168; GCN:       ; %bb.0:
169; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
170; GCN-NEXT:    s_waitcnt lgkmcnt(0)
171; GCN-NEXT:    s_lshr_b32 s0, s1, 16
172; GCN-NEXT:    ; return to shader part epilog
173  %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr
174  %element = extractelement <4 x i16> %vector, i32 3
175  ret i16 %element
176}
177
178define i16 @extractelement_vgpr_v4i16_idx0(<4 x i16> addrspace(1)* %ptr) {
179; GFX9-LABEL: extractelement_vgpr_v4i16_idx0:
180; GFX9:       ; %bb.0:
181; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
183; GFX9-NEXT:    s_waitcnt vmcnt(0)
184; GFX9-NEXT:    s_setpc_b64 s[30:31]
185;
186; GFX8-LABEL: extractelement_vgpr_v4i16_idx0:
187; GFX8:       ; %bb.0:
188; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
189; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
190; GFX8-NEXT:    s_waitcnt vmcnt(0)
191; GFX8-NEXT:    s_setpc_b64 s[30:31]
192;
193; GFX7-LABEL: extractelement_vgpr_v4i16_idx0:
194; GFX7:       ; %bb.0:
195; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196; GFX7-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
197; GFX7-NEXT:    s_waitcnt vmcnt(0)
198; GFX7-NEXT:    s_setpc_b64 s[30:31]
199  %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr
200  %element = extractelement <4 x i16> %vector, i32 0
201  ret i16 %element
202}
203
204define i16 @extractelement_vgpr_v4i16_idx1(<4 x i16> addrspace(1)* %ptr) {
205; GFX9-LABEL: extractelement_vgpr_v4i16_idx1:
206; GFX9:       ; %bb.0:
207; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
208; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
209; GFX9-NEXT:    s_waitcnt vmcnt(0)
210; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
211; GFX9-NEXT:    s_setpc_b64 s[30:31]
212;
213; GFX8-LABEL: extractelement_vgpr_v4i16_idx1:
214; GFX8:       ; %bb.0:
215; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
216; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
217; GFX8-NEXT:    s_waitcnt vmcnt(0)
218; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
219; GFX8-NEXT:    s_setpc_b64 s[30:31]
220;
221; GFX7-LABEL: extractelement_vgpr_v4i16_idx1:
222; GFX7:       ; %bb.0:
223; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
224; GFX7-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
225; GFX7-NEXT:    s_waitcnt vmcnt(0)
226; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
227; GFX7-NEXT:    s_setpc_b64 s[30:31]
228  %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr
229  %element = extractelement <4 x i16> %vector, i32 1
230  ret i16 %element
231}
232
233define i16 @extractelement_vgpr_v4i16_idx2(<4 x i16> addrspace(1)* %ptr) {
234; GFX9-LABEL: extractelement_vgpr_v4i16_idx2:
235; GFX9:       ; %bb.0:
236; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
237; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
238; GFX9-NEXT:    s_waitcnt vmcnt(0)
239; GFX9-NEXT:    v_mov_b32_e32 v0, v1
240; GFX9-NEXT:    s_setpc_b64 s[30:31]
241;
242; GFX8-LABEL: extractelement_vgpr_v4i16_idx2:
243; GFX8:       ; %bb.0:
244; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
246; GFX8-NEXT:    s_waitcnt vmcnt(0)
247; GFX8-NEXT:    v_mov_b32_e32 v0, v1
248; GFX8-NEXT:    s_setpc_b64 s[30:31]
249;
250; GFX7-LABEL: extractelement_vgpr_v4i16_idx2:
251; GFX7:       ; %bb.0:
252; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
253; GFX7-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
254; GFX7-NEXT:    s_waitcnt vmcnt(0)
255; GFX7-NEXT:    v_mov_b32_e32 v0, v1
256; GFX7-NEXT:    s_setpc_b64 s[30:31]
257  %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr
258  %element = extractelement <4 x i16> %vector, i32 2
259  ret i16 %element
260}
261
262define i16 @extractelement_vgpr_v4i16_idx3(<4 x i16> addrspace(1)* %ptr) {
263; GFX9-LABEL: extractelement_vgpr_v4i16_idx3:
264; GFX9:       ; %bb.0:
265; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
267; GFX9-NEXT:    s_waitcnt vmcnt(0)
268; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
269; GFX9-NEXT:    s_setpc_b64 s[30:31]
270;
271; GFX8-LABEL: extractelement_vgpr_v4i16_idx3:
272; GFX8:       ; %bb.0:
273; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
274; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
275; GFX8-NEXT:    s_waitcnt vmcnt(0)
276; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
277; GFX8-NEXT:    s_setpc_b64 s[30:31]
278;
279; GFX7-LABEL: extractelement_vgpr_v4i16_idx3:
280; GFX7:       ; %bb.0:
281; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
282; GFX7-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
283; GFX7-NEXT:    s_waitcnt vmcnt(0)
284; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
285; GFX7-NEXT:    s_setpc_b64 s[30:31]
286  %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr
287  %element = extractelement <4 x i16> %vector, i32 3
288  ret i16 %element
289}
290
291define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(<8 x i16> addrspace(4)* inreg %ptr, i32 inreg %idx) {
292; GCN-LABEL: extractelement_sgpr_v8i16_sgpr_idx:
293; GCN:       ; %bb.0:
294; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
295; GCN-NEXT:    s_lshr_b32 s5, s4, 1
296; GCN-NEXT:    s_cmp_eq_u32 s5, 1
297; GCN-NEXT:    s_waitcnt lgkmcnt(0)
298; GCN-NEXT:    s_cselect_b32 s0, s1, s0
299; GCN-NEXT:    s_cmp_eq_u32 s5, 2
300; GCN-NEXT:    s_cselect_b32 s0, s2, s0
301; GCN-NEXT:    s_cmp_eq_u32 s5, 3
302; GCN-NEXT:    s_cselect_b32 s0, s3, s0
303; GCN-NEXT:    s_and_b32 s1, s4, 1
304; GCN-NEXT:    s_lshl_b32 s1, s1, 4
305; GCN-NEXT:    s_lshr_b32 s0, s0, s1
306; GCN-NEXT:    ; return to shader part epilog
307  %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
308  %element = extractelement <8 x i16> %vector, i32 %idx
309  ret i16 %element
310}
311
312define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(<8 x i16> addrspace(1)* %ptr, i32 inreg %idx) {
313; GFX9-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
314; GFX9:       ; %bb.0:
315; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
316; GFX9-NEXT:    s_lshr_b32 s0, s2, 1
317; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 1
318; GFX9-NEXT:    s_and_b32 s1, s2, 1
319; GFX9-NEXT:    s_waitcnt vmcnt(0)
320; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
321; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 2
322; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
323; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 3
324; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
325; GFX9-NEXT:    s_lshl_b32 s0, s1, 4
326; GFX9-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
327; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
328; GFX9-NEXT:    ; return to shader part epilog
329;
330; GFX8-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
331; GFX8:       ; %bb.0:
332; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
333; GFX8-NEXT:    s_lshr_b32 s0, s2, 1
334; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 1
335; GFX8-NEXT:    s_and_b32 s1, s2, 1
336; GFX8-NEXT:    s_waitcnt vmcnt(0)
337; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
338; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 2
339; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
340; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 3
341; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
342; GFX8-NEXT:    s_lshl_b32 s0, s1, 4
343; GFX8-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
344; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
345; GFX8-NEXT:    ; return to shader part epilog
346;
347; GFX7-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
348; GFX7:       ; %bb.0:
349; GFX7-NEXT:    s_mov_b32 s6, 0
350; GFX7-NEXT:    s_mov_b32 s7, 0xf000
351; GFX7-NEXT:    s_mov_b64 s[4:5], 0
352; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
353; GFX7-NEXT:    s_lshr_b32 s0, s2, 1
354; GFX7-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 1
355; GFX7-NEXT:    s_and_b32 s1, s2, 1
356; GFX7-NEXT:    s_waitcnt vmcnt(0)
357; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
358; GFX7-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 2
359; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
360; GFX7-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 3
361; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
362; GFX7-NEXT:    s_lshl_b32 s0, s1, 4
363; GFX7-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
364; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
365; GFX7-NEXT:    ; return to shader part epilog
366  %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
367  %element = extractelement <8 x i16> %vector, i32 %idx
368  ret i16 %element
369}
370
371define i16 @extractelement_vgpr_v8i16_vgpr_idx(<8 x i16> addrspace(1)* %ptr, i32 %idx) {
372; GFX9-LABEL: extractelement_vgpr_v8i16_vgpr_idx:
373; GFX9:       ; %bb.0:
374; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
375; GFX9-NEXT:    global_load_dwordx4 v[3:6], v[0:1], off
376; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 1, v2
377; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
378; GFX9-NEXT:    v_and_b32_e32 v1, 1, v2
379; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 4, v1
380; GFX9-NEXT:    s_waitcnt vmcnt(0)
381; GFX9-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
382; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
383; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
384; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
385; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
386; GFX9-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
387; GFX9-NEXT:    s_setpc_b64 s[30:31]
388;
389; GFX8-LABEL: extractelement_vgpr_v8i16_vgpr_idx:
390; GFX8:       ; %bb.0:
391; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
392; GFX8-NEXT:    flat_load_dwordx4 v[3:6], v[0:1]
393; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 1, v2
394; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
395; GFX8-NEXT:    v_and_b32_e32 v1, 1, v2
396; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 4, v1
397; GFX8-NEXT:    s_waitcnt vmcnt(0)
398; GFX8-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
399; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
400; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
401; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
402; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
403; GFX8-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
404; GFX8-NEXT:    s_setpc_b64 s[30:31]
405;
406; GFX7-LABEL: extractelement_vgpr_v8i16_vgpr_idx:
407; GFX7:       ; %bb.0:
408; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
409; GFX7-NEXT:    s_mov_b32 s6, 0
410; GFX7-NEXT:    s_mov_b32 s7, 0xf000
411; GFX7-NEXT:    s_mov_b64 s[4:5], 0
412; GFX7-NEXT:    buffer_load_dwordx4 v[3:6], v[0:1], s[4:7], 0 addr64
413; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 1, v2
414; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
415; GFX7-NEXT:    v_and_b32_e32 v1, 1, v2
416; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 4, v1
417; GFX7-NEXT:    s_waitcnt vmcnt(0)
418; GFX7-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
419; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
420; GFX7-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
421; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
422; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
423; GFX7-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
424; GFX7-NEXT:    s_setpc_b64 s[30:31]
425  %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
426  %element = extractelement <8 x i16> %vector, i32 %idx
427  ret i16 %element
428}
429
430define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(<8 x i16> addrspace(4)* inreg %ptr, i32 %idx) {
431; GCN-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
432; GCN:       ; %bb.0:
433; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
434; GCN-NEXT:    v_lshrrev_b32_e32 v1, 1, v0
435; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
436; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
437; GCN-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
438; GCN-NEXT:    s_waitcnt lgkmcnt(0)
439; GCN-NEXT:    v_mov_b32_e32 v2, s0
440; GCN-NEXT:    v_mov_b32_e32 v3, s1
441; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
442; GCN-NEXT:    v_mov_b32_e32 v4, s2
443; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v1
444; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
445; GCN-NEXT:    v_mov_b32_e32 v5, s3
446; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v1
447; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v5, vcc
448; GCN-NEXT:    v_lshrrev_b32_e32 v0, v0, v1
449; GCN-NEXT:    v_readfirstlane_b32 s0, v0
450; GCN-NEXT:    ; return to shader part epilog
451  %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
452  %element = extractelement <8 x i16> %vector, i32 %idx
453  ret i16 %element
454}
455
456define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx0(<8 x i16> addrspace(4)* inreg %ptr) {
457; GCN-LABEL: extractelement_sgpr_v8i16_idx0:
458; GCN:       ; %bb.0:
459; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
460; GCN-NEXT:    s_waitcnt lgkmcnt(0)
461; GCN-NEXT:    ; return to shader part epilog
462  %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
463  %element = extractelement <8 x i16> %vector, i32 0
464  ret i16 %element
465}
466
467define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx1(<8 x i16> addrspace(4)* inreg %ptr) {
468; GCN-LABEL: extractelement_sgpr_v8i16_idx1:
469; GCN:       ; %bb.0:
470; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
471; GCN-NEXT:    s_waitcnt lgkmcnt(0)
472; GCN-NEXT:    s_lshr_b32 s0, s0, 16
473; GCN-NEXT:    ; return to shader part epilog
474  %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
475  %element = extractelement <8 x i16> %vector, i32 1
476  ret i16 %element
477}
478
479define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx2(<8 x i16> addrspace(4)* inreg %ptr) {
480; GCN-LABEL: extractelement_sgpr_v8i16_idx2:
481; GCN:       ; %bb.0:
482; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
483; GCN-NEXT:    s_waitcnt lgkmcnt(0)
484; GCN-NEXT:    s_mov_b32 s0, s1
485; GCN-NEXT:    ; return to shader part epilog
486  %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
487  %element = extractelement <8 x i16> %vector, i32 2
488  ret i16 %element
489}
490
491define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx3(<8 x i16> addrspace(4)* inreg %ptr) {
492; GCN-LABEL: extractelement_sgpr_v8i16_idx3:
493; GCN:       ; %bb.0:
494; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
495; GCN-NEXT:    s_waitcnt lgkmcnt(0)
496; GCN-NEXT:    s_lshr_b32 s0, s1, 16
497; GCN-NEXT:    ; return to shader part epilog
498  %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
499  %element = extractelement <8 x i16> %vector, i32 3
500  ret i16 %element
501}
502
503define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx4(<8 x i16> addrspace(4)* inreg %ptr) {
504; GCN-LABEL: extractelement_sgpr_v8i16_idx4:
505; GCN:       ; %bb.0:
506; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
507; GCN-NEXT:    s_waitcnt lgkmcnt(0)
508; GCN-NEXT:    s_mov_b32 s0, s2
509; GCN-NEXT:    ; return to shader part epilog
510  %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
511  %element = extractelement <8 x i16> %vector, i32 4
512  ret i16 %element
513}
514
515define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx5(<8 x i16> addrspace(4)* inreg %ptr) {
516; GCN-LABEL: extractelement_sgpr_v8i16_idx5:
517; GCN:       ; %bb.0:
518; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
519; GCN-NEXT:    s_waitcnt lgkmcnt(0)
520; GCN-NEXT:    s_lshr_b32 s0, s2, 16
521; GCN-NEXT:    ; return to shader part epilog
522  %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
523  %element = extractelement <8 x i16> %vector, i32 5
524  ret i16 %element
525}
526
527define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx6(<8 x i16> addrspace(4)* inreg %ptr) {
528; GCN-LABEL: extractelement_sgpr_v8i16_idx6:
529; GCN:       ; %bb.0:
530; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
531; GCN-NEXT:    s_waitcnt lgkmcnt(0)
532; GCN-NEXT:    s_mov_b32 s0, s3
533; GCN-NEXT:    ; return to shader part epilog
534  %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
535  %element = extractelement <8 x i16> %vector, i32 6
536  ret i16 %element
537}
538
539define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx7(<8 x i16> addrspace(4)* inreg %ptr) {
540; GCN-LABEL: extractelement_sgpr_v8i16_idx7:
541; GCN:       ; %bb.0:
542; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
543; GCN-NEXT:    s_waitcnt lgkmcnt(0)
544; GCN-NEXT:    s_lshr_b32 s0, s3, 16
545; GCN-NEXT:    ; return to shader part epilog
546  %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
547  %element = extractelement <8 x i16> %vector, i32 7
548  ret i16 %element
549}
550
551define i16 @extractelement_vgpr_v8i16_idx0(<8 x i16> addrspace(1)* %ptr) {
552; GFX9-LABEL: extractelement_vgpr_v8i16_idx0:
553; GFX9:       ; %bb.0:
554; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
555; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
556; GFX9-NEXT:    s_waitcnt vmcnt(0)
557; GFX9-NEXT:    s_setpc_b64 s[30:31]
558;
559; GFX8-LABEL: extractelement_vgpr_v8i16_idx0:
560; GFX8:       ; %bb.0:
561; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
562; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
563; GFX8-NEXT:    s_waitcnt vmcnt(0)
564; GFX8-NEXT:    s_setpc_b64 s[30:31]
565;
566; GFX7-LABEL: extractelement_vgpr_v8i16_idx0:
567; GFX7:       ; %bb.0:
568; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
569; GFX7-NEXT:    s_mov_b32 s6, 0
570; GFX7-NEXT:    s_mov_b32 s7, 0xf000
571; GFX7-NEXT:    s_mov_b64 s[4:5], 0
572; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
573; GFX7-NEXT:    s_waitcnt vmcnt(0)
574; GFX7-NEXT:    s_setpc_b64 s[30:31]
575  %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
576  %element = extractelement <8 x i16> %vector, i32 0
577  ret i16 %element
578}
579
580define i16 @extractelement_vgpr_v8i16_idx1(<8 x i16> addrspace(1)* %ptr) {
581; GFX9-LABEL: extractelement_vgpr_v8i16_idx1:
582; GFX9:       ; %bb.0:
583; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
584; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
585; GFX9-NEXT:    s_waitcnt vmcnt(0)
586; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
587; GFX9-NEXT:    s_setpc_b64 s[30:31]
588;
589; GFX8-LABEL: extractelement_vgpr_v8i16_idx1:
590; GFX8:       ; %bb.0:
591; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
592; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
593; GFX8-NEXT:    s_waitcnt vmcnt(0)
594; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
595; GFX8-NEXT:    s_setpc_b64 s[30:31]
596;
597; GFX7-LABEL: extractelement_vgpr_v8i16_idx1:
598; GFX7:       ; %bb.0:
599; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
600; GFX7-NEXT:    s_mov_b32 s6, 0
601; GFX7-NEXT:    s_mov_b32 s7, 0xf000
602; GFX7-NEXT:    s_mov_b64 s[4:5], 0
603; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
604; GFX7-NEXT:    s_waitcnt vmcnt(0)
605; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
606; GFX7-NEXT:    s_setpc_b64 s[30:31]
607  %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
608  %element = extractelement <8 x i16> %vector, i32 1
609  ret i16 %element
610}
611
612define i16 @extractelement_vgpr_v8i16_idx2(<8 x i16> addrspace(1)* %ptr) {
613; GFX9-LABEL: extractelement_vgpr_v8i16_idx2:
614; GFX9:       ; %bb.0:
615; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
616; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
617; GFX9-NEXT:    s_waitcnt vmcnt(0)
618; GFX9-NEXT:    v_mov_b32_e32 v0, v1
619; GFX9-NEXT:    s_setpc_b64 s[30:31]
620;
621; GFX8-LABEL: extractelement_vgpr_v8i16_idx2:
622; GFX8:       ; %bb.0:
623; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
624; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
625; GFX8-NEXT:    s_waitcnt vmcnt(0)
626; GFX8-NEXT:    v_mov_b32_e32 v0, v1
627; GFX8-NEXT:    s_setpc_b64 s[30:31]
628;
629; GFX7-LABEL: extractelement_vgpr_v8i16_idx2:
630; GFX7:       ; %bb.0:
631; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
632; GFX7-NEXT:    s_mov_b32 s6, 0
633; GFX7-NEXT:    s_mov_b32 s7, 0xf000
634; GFX7-NEXT:    s_mov_b64 s[4:5], 0
635; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
636; GFX7-NEXT:    s_waitcnt vmcnt(0)
637; GFX7-NEXT:    v_mov_b32_e32 v0, v1
638; GFX7-NEXT:    s_setpc_b64 s[30:31]
639  %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
640  %element = extractelement <8 x i16> %vector, i32 2
641  ret i16 %element
642}
643
644define i16 @extractelement_vgpr_v8i16_idx3(<8 x i16> addrspace(1)* %ptr) {
645; GFX9-LABEL: extractelement_vgpr_v8i16_idx3:
646; GFX9:       ; %bb.0:
647; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
648; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
649; GFX9-NEXT:    s_waitcnt vmcnt(0)
650; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
651; GFX9-NEXT:    s_setpc_b64 s[30:31]
652;
653; GFX8-LABEL: extractelement_vgpr_v8i16_idx3:
654; GFX8:       ; %bb.0:
655; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
656; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
657; GFX8-NEXT:    s_waitcnt vmcnt(0)
658; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
659; GFX8-NEXT:    s_setpc_b64 s[30:31]
660;
661; GFX7-LABEL: extractelement_vgpr_v8i16_idx3:
662; GFX7:       ; %bb.0:
663; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
664; GFX7-NEXT:    s_mov_b32 s6, 0
665; GFX7-NEXT:    s_mov_b32 s7, 0xf000
666; GFX7-NEXT:    s_mov_b64 s[4:5], 0
667; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
668; GFX7-NEXT:    s_waitcnt vmcnt(0)
669; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
670; GFX7-NEXT:    s_setpc_b64 s[30:31]
671  %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
672  %element = extractelement <8 x i16> %vector, i32 3
673  ret i16 %element
674}
675
676define i16 @extractelement_vgpr_v8i16_idx4(<8 x i16> addrspace(1)* %ptr) {
677; GFX9-LABEL: extractelement_vgpr_v8i16_idx4:
678; GFX9:       ; %bb.0:
679; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
680; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
681; GFX9-NEXT:    s_waitcnt vmcnt(0)
682; GFX9-NEXT:    v_mov_b32_e32 v0, v2
683; GFX9-NEXT:    s_setpc_b64 s[30:31]
684;
685; GFX8-LABEL: extractelement_vgpr_v8i16_idx4:
686; GFX8:       ; %bb.0:
687; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
688; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
689; GFX8-NEXT:    s_waitcnt vmcnt(0)
690; GFX8-NEXT:    v_mov_b32_e32 v0, v2
691; GFX8-NEXT:    s_setpc_b64 s[30:31]
692;
693; GFX7-LABEL: extractelement_vgpr_v8i16_idx4:
694; GFX7:       ; %bb.0:
695; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
696; GFX7-NEXT:    s_mov_b32 s6, 0
697; GFX7-NEXT:    s_mov_b32 s7, 0xf000
698; GFX7-NEXT:    s_mov_b64 s[4:5], 0
699; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
700; GFX7-NEXT:    s_waitcnt vmcnt(0)
701; GFX7-NEXT:    v_mov_b32_e32 v0, v2
702; GFX7-NEXT:    s_setpc_b64 s[30:31]
703  %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
704  %element = extractelement <8 x i16> %vector, i32 4
705  ret i16 %element
706}
707
708define i16 @extractelement_vgpr_v8i16_idx5(<8 x i16> addrspace(1)* %ptr) {
709; GFX9-LABEL: extractelement_vgpr_v8i16_idx5:
710; GFX9:       ; %bb.0:
711; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
712; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
713; GFX9-NEXT:    s_waitcnt vmcnt(0)
714; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
715; GFX9-NEXT:    s_setpc_b64 s[30:31]
716;
717; GFX8-LABEL: extractelement_vgpr_v8i16_idx5:
718; GFX8:       ; %bb.0:
719; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
720; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
721; GFX8-NEXT:    s_waitcnt vmcnt(0)
722; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
723; GFX8-NEXT:    s_setpc_b64 s[30:31]
724;
725; GFX7-LABEL: extractelement_vgpr_v8i16_idx5:
726; GFX7:       ; %bb.0:
727; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
728; GFX7-NEXT:    s_mov_b32 s6, 0
729; GFX7-NEXT:    s_mov_b32 s7, 0xf000
730; GFX7-NEXT:    s_mov_b64 s[4:5], 0
731; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
732; GFX7-NEXT:    s_waitcnt vmcnt(0)
733; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
734; GFX7-NEXT:    s_setpc_b64 s[30:31]
735  %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
736  %element = extractelement <8 x i16> %vector, i32 5
737  ret i16 %element
738}
739
740define i16 @extractelement_vgpr_v8i16_idx6(<8 x i16> addrspace(1)* %ptr) {
741; GFX9-LABEL: extractelement_vgpr_v8i16_idx6:
742; GFX9:       ; %bb.0:
743; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
744; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
745; GFX9-NEXT:    s_waitcnt vmcnt(0)
746; GFX9-NEXT:    v_mov_b32_e32 v0, v3
747; GFX9-NEXT:    s_setpc_b64 s[30:31]
748;
749; GFX8-LABEL: extractelement_vgpr_v8i16_idx6:
750; GFX8:       ; %bb.0:
751; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
752; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
753; GFX8-NEXT:    s_waitcnt vmcnt(0)
754; GFX8-NEXT:    v_mov_b32_e32 v0, v3
755; GFX8-NEXT:    s_setpc_b64 s[30:31]
756;
757; GFX7-LABEL: extractelement_vgpr_v8i16_idx6:
758; GFX7:       ; %bb.0:
759; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
760; GFX7-NEXT:    s_mov_b32 s6, 0
761; GFX7-NEXT:    s_mov_b32 s7, 0xf000
762; GFX7-NEXT:    s_mov_b64 s[4:5], 0
763; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
764; GFX7-NEXT:    s_waitcnt vmcnt(0)
765; GFX7-NEXT:    v_mov_b32_e32 v0, v3
766; GFX7-NEXT:    s_setpc_b64 s[30:31]
767  %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
768  %element = extractelement <8 x i16> %vector, i32 6
769  ret i16 %element
770}
771
772define i16 @extractelement_vgpr_v8i16_idx7(<8 x i16> addrspace(1)* %ptr) {
773; GFX9-LABEL: extractelement_vgpr_v8i16_idx7:
774; GFX9:       ; %bb.0:
775; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
776; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
777; GFX9-NEXT:    s_waitcnt vmcnt(0)
778; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v3
779; GFX9-NEXT:    s_setpc_b64 s[30:31]
780;
781; GFX8-LABEL: extractelement_vgpr_v8i16_idx7:
782; GFX8:       ; %bb.0:
783; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
784; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
785; GFX8-NEXT:    s_waitcnt vmcnt(0)
786; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v3
787; GFX8-NEXT:    s_setpc_b64 s[30:31]
788;
789; GFX7-LABEL: extractelement_vgpr_v8i16_idx7:
790; GFX7:       ; %bb.0:
791; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
792; GFX7-NEXT:    s_mov_b32 s6, 0
793; GFX7-NEXT:    s_mov_b32 s7, 0xf000
794; GFX7-NEXT:    s_mov_b64 s[4:5], 0
795; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
796; GFX7-NEXT:    s_waitcnt vmcnt(0)
797; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v3
798; GFX7-NEXT:    s_setpc_b64 s[30:31]
799  %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
800  %element = extractelement <8 x i16> %vector, i32 7
801  ret i16 %element
802}
803