1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s
5
6define amdgpu_ps i8 @extractelement_sgpr_v4i8_sgpr_idx(<4 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) {
7; GCN-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
8; GCN:       ; %bb.0:
9; GCN-NEXT:    s_load_dword s0, s[2:3], 0x0
10; GCN-NEXT:    s_movk_i32 s5, 0xff
11; GCN-NEXT:    s_waitcnt lgkmcnt(0)
12; GCN-NEXT:    s_lshr_b32 s1, s0, 8
13; GCN-NEXT:    s_and_b32 s1, s1, s5
14; GCN-NEXT:    s_lshr_b32 s2, s0, 16
15; GCN-NEXT:    s_lshr_b32 s3, s0, 24
16; GCN-NEXT:    s_and_b32 s0, s0, s5
17; GCN-NEXT:    s_lshl_b32 s1, s1, 8
18; GCN-NEXT:    s_or_b32 s0, s0, s1
19; GCN-NEXT:    s_and_b32 s1, s2, s5
20; GCN-NEXT:    s_lshl_b32 s1, s1, 16
21; GCN-NEXT:    s_or_b32 s0, s0, s1
22; GCN-NEXT:    s_lshl_b32 s1, s3, 24
23; GCN-NEXT:    s_or_b32 s0, s0, s1
24; GCN-NEXT:    s_and_b32 s1, s4, 3
25; GCN-NEXT:    s_lshl_b32 s1, s1, 3
26; GCN-NEXT:    s_lshr_b32 s0, s0, s1
27; GCN-NEXT:    ; return to shader part epilog
28  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
29  %element = extractelement <4 x i8> %vector, i32 %idx
30  ret i8 %element
31}
32
33define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 inreg %idx) {
34; GFX9-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
35; GFX9:       ; %bb.0:
36; GFX9-NEXT:    global_load_dword v0, v[0:1], off
37; GFX9-NEXT:    s_mov_b32 s0, 8
38; GFX9-NEXT:    s_movk_i32 s1, 0xff
39; GFX9-NEXT:    s_and_b32 s2, s2, 3
40; GFX9-NEXT:    s_waitcnt vmcnt(0)
41; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
42; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
43; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
44; GFX9-NEXT:    v_and_b32_sdwa v3, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
45; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
46; GFX9-NEXT:    v_and_or_b32 v0, v0, s1, v1
47; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v2
48; GFX9-NEXT:    s_lshl_b32 s0, s2, 3
49; GFX9-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
50; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
51; GFX9-NEXT:    ; return to shader part epilog
52;
53; GFX8-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
54; GFX8:       ; %bb.0:
55; GFX8-NEXT:    flat_load_dword v0, v[0:1]
56; GFX8-NEXT:    s_movk_i32 s0, 0xff
57; GFX8-NEXT:    v_mov_b32_e32 v1, 8
58; GFX8-NEXT:    v_mov_b32_e32 v2, s0
59; GFX8-NEXT:    s_and_b32 s0, s2, 3
60; GFX8-NEXT:    s_lshl_b32 s0, s0, 3
61; GFX8-NEXT:    s_waitcnt vmcnt(0)
62; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
63; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
64; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
65; GFX8-NEXT:    v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
66; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
67; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 24, v4
68; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
69; GFX8-NEXT:    v_or_b32_e32 v0, v0, v3
70; GFX8-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
71; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
72; GFX8-NEXT:    ; return to shader part epilog
73;
74; GFX7-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
75; GFX7:       ; %bb.0:
76; GFX7-NEXT:    s_mov_b32 s6, 0
77; GFX7-NEXT:    s_mov_b32 s7, 0xf000
78; GFX7-NEXT:    s_mov_b64 s[4:5], 0
79; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
80; GFX7-NEXT:    s_movk_i32 s0, 0xff
81; GFX7-NEXT:    s_and_b32 s1, s2, 3
82; GFX7-NEXT:    s_waitcnt vmcnt(0)
83; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
84; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
85; GFX7-NEXT:    v_and_b32_e32 v1, s0, v1
86; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
87; GFX7-NEXT:    v_and_b32_e32 v2, s0, v2
88; GFX7-NEXT:    v_and_b32_e32 v0, s0, v0
89; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
90; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
91; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
92; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
93; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
94; GFX7-NEXT:    v_or_b32_e32 v0, v0, v3
95; GFX7-NEXT:    s_lshl_b32 s0, s1, 3
96; GFX7-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
97; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
98; GFX7-NEXT:    ; return to shader part epilog
99  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
100  %element = extractelement <4 x i8> %vector, i32 %idx
101  ret i8 %element
102}
103
104define i8 @extractelement_vgpr_v4i8_vgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 %idx) {
105; GFX9-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
106; GFX9:       ; %bb.0:
107; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108; GFX9-NEXT:    global_load_dword v0, v[0:1], off
109; GFX9-NEXT:    v_and_b32_e32 v1, 3, v2
110; GFX9-NEXT:    s_mov_b32 s4, 8
111; GFX9-NEXT:    s_movk_i32 s5, 0xff
112; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 3, v1
113; GFX9-NEXT:    s_waitcnt vmcnt(0)
114; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 8, v0
115; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
116; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
117; GFX9-NEXT:    v_and_b32_sdwa v4, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
118; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
119; GFX9-NEXT:    v_and_or_b32 v0, v0, s5, v2
120; GFX9-NEXT:    v_or3_b32 v0, v0, v4, v3
121; GFX9-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
122; GFX9-NEXT:    s_setpc_b64 s[30:31]
123;
124; GFX8-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
125; GFX8:       ; %bb.0:
126; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127; GFX8-NEXT:    flat_load_dword v0, v[0:1]
128; GFX8-NEXT:    s_movk_i32 s4, 0xff
129; GFX8-NEXT:    v_mov_b32_e32 v1, 8
130; GFX8-NEXT:    v_mov_b32_e32 v3, s4
131; GFX8-NEXT:    v_and_b32_e32 v2, 3, v2
132; GFX8-NEXT:    s_waitcnt vmcnt(0)
133; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 8, v0
134; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
135; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 24, v0
136; GFX8-NEXT:    v_and_b32_sdwa v3, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
137; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
138; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 24, v5
139; GFX8-NEXT:    v_or_b32_e32 v0, v0, v3
140; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
141; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
142; GFX8-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
143; GFX8-NEXT:    s_setpc_b64 s[30:31]
144;
145; GFX7-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
146; GFX7:       ; %bb.0:
147; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
148; GFX7-NEXT:    s_mov_b32 s6, 0
149; GFX7-NEXT:    s_mov_b32 s7, 0xf000
150; GFX7-NEXT:    s_mov_b64 s[4:5], 0
151; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
152; GFX7-NEXT:    v_and_b32_e32 v1, 3, v2
153; GFX7-NEXT:    s_movk_i32 s4, 0xff
154; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 3, v1
155; GFX7-NEXT:    s_waitcnt vmcnt(0)
156; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 8, v0
157; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
158; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
159; GFX7-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
160; GFX7-NEXT:    v_and_b32_e32 v3, s4, v3
161; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
162; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
163; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
164; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
165; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 24, v4
166; GFX7-NEXT:    v_or_b32_e32 v0, v0, v3
167; GFX7-NEXT:    v_or_b32_e32 v0, v0, v4
168; GFX7-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
169; GFX7-NEXT:    s_setpc_b64 s[30:31]
170  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
171  %element = extractelement <4 x i8> %vector, i32 %idx
172  ret i8 %element
173}
174
175define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(<4 x i8> addrspace(4)* inreg %ptr, i32 %idx) {
176; GFX9-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
177; GFX9:       ; %bb.0:
178; GFX9-NEXT:    s_load_dword s0, s[2:3], 0x0
179; GFX9-NEXT:    s_movk_i32 s4, 0xff
180; GFX9-NEXT:    v_and_b32_e32 v0, 3, v0
181; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
182; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
183; GFX9-NEXT:    s_lshr_b32 s1, s0, 8
184; GFX9-NEXT:    s_and_b32 s1, s1, s4
185; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
186; GFX9-NEXT:    s_lshr_b32 s3, s0, 24
187; GFX9-NEXT:    s_and_b32 s0, s0, s4
188; GFX9-NEXT:    s_lshl_b32 s1, s1, 8
189; GFX9-NEXT:    s_or_b32 s0, s0, s1
190; GFX9-NEXT:    s_and_b32 s1, s2, s4
191; GFX9-NEXT:    s_lshl_b32 s1, s1, 16
192; GFX9-NEXT:    s_or_b32 s0, s0, s1
193; GFX9-NEXT:    s_lshl_b32 s1, s3, 24
194; GFX9-NEXT:    s_or_b32 s0, s0, s1
195; GFX9-NEXT:    v_lshrrev_b32_e64 v0, v0, s0
196; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
197; GFX9-NEXT:    ; return to shader part epilog
198;
199; GFX8-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
200; GFX8:       ; %bb.0:
201; GFX8-NEXT:    s_load_dword s0, s[2:3], 0x0
202; GFX8-NEXT:    s_movk_i32 s4, 0xff
203; GFX8-NEXT:    v_and_b32_e32 v0, 3, v0
204; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
205; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
206; GFX8-NEXT:    s_lshr_b32 s1, s0, 8
207; GFX8-NEXT:    s_and_b32 s1, s1, s4
208; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
209; GFX8-NEXT:    s_lshr_b32 s3, s0, 24
210; GFX8-NEXT:    s_and_b32 s0, s0, s4
211; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
212; GFX8-NEXT:    s_or_b32 s0, s0, s1
213; GFX8-NEXT:    s_and_b32 s1, s2, s4
214; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
215; GFX8-NEXT:    s_or_b32 s0, s0, s1
216; GFX8-NEXT:    s_lshl_b32 s1, s3, 24
217; GFX8-NEXT:    s_or_b32 s0, s0, s1
218; GFX8-NEXT:    v_lshrrev_b32_e64 v0, v0, s0
219; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
220; GFX8-NEXT:    ; return to shader part epilog
221;
222; GFX7-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
223; GFX7:       ; %bb.0:
224; GFX7-NEXT:    s_load_dword s0, s[2:3], 0x0
225; GFX7-NEXT:    s_movk_i32 s4, 0xff
226; GFX7-NEXT:    v_and_b32_e32 v0, 3, v0
227; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
228; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
229; GFX7-NEXT:    s_lshr_b32 s1, s0, 8
230; GFX7-NEXT:    s_and_b32 s1, s1, s4
231; GFX7-NEXT:    s_lshr_b32 s2, s0, 16
232; GFX7-NEXT:    s_lshr_b32 s3, s0, 24
233; GFX7-NEXT:    s_and_b32 s0, s0, s4
234; GFX7-NEXT:    s_lshl_b32 s1, s1, 8
235; GFX7-NEXT:    s_or_b32 s0, s0, s1
236; GFX7-NEXT:    s_and_b32 s1, s2, s4
237; GFX7-NEXT:    s_lshl_b32 s1, s1, 16
238; GFX7-NEXT:    s_or_b32 s0, s0, s1
239; GFX7-NEXT:    s_lshl_b32 s1, s3, 24
240; GFX7-NEXT:    s_or_b32 s0, s0, s1
241; GFX7-NEXT:    v_lshr_b32_e32 v0, s0, v0
242; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
243; GFX7-NEXT:    ; return to shader part epilog
244  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
245  %element = extractelement <4 x i8> %vector, i32 %idx
246  ret i8 %element
247}
248
249define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx0(<4 x i8> addrspace(4)* inreg %ptr) {
250; GCN-LABEL: extractelement_sgpr_v4i8_idx0:
251; GCN:       ; %bb.0:
252; GCN-NEXT:    s_load_dword s1, s[2:3], 0x0
253; GCN-NEXT:    s_movk_i32 s0, 0xff
254; GCN-NEXT:    s_waitcnt lgkmcnt(0)
255; GCN-NEXT:    s_lshr_b32 s2, s1, 8
256; GCN-NEXT:    s_and_b32 s2, s2, s0
257; GCN-NEXT:    s_lshr_b32 s3, s1, 16
258; GCN-NEXT:    s_lshr_b32 s4, s1, 24
259; GCN-NEXT:    s_and_b32 s1, s1, s0
260; GCN-NEXT:    s_and_b32 s0, s3, s0
261; GCN-NEXT:    s_lshl_b32 s2, s2, 8
262; GCN-NEXT:    s_or_b32 s1, s1, s2
263; GCN-NEXT:    s_lshl_b32 s0, s0, 16
264; GCN-NEXT:    s_or_b32 s0, s1, s0
265; GCN-NEXT:    s_lshl_b32 s1, s4, 24
266; GCN-NEXT:    s_or_b32 s0, s0, s1
267; GCN-NEXT:    ; return to shader part epilog
268  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
269  %element = extractelement <4 x i8> %vector, i32 0
270  ret i8 %element
271}
272
273define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx1(<4 x i8> addrspace(4)* inreg %ptr) {
274; GCN-LABEL: extractelement_sgpr_v4i8_idx1:
275; GCN:       ; %bb.0:
276; GCN-NEXT:    s_load_dword s1, s[2:3], 0x0
277; GCN-NEXT:    s_movk_i32 s0, 0xff
278; GCN-NEXT:    s_waitcnt lgkmcnt(0)
279; GCN-NEXT:    s_lshr_b32 s2, s1, 8
280; GCN-NEXT:    s_and_b32 s2, s2, s0
281; GCN-NEXT:    s_lshr_b32 s3, s1, 16
282; GCN-NEXT:    s_lshr_b32 s4, s1, 24
283; GCN-NEXT:    s_and_b32 s1, s1, s0
284; GCN-NEXT:    s_and_b32 s0, s3, s0
285; GCN-NEXT:    s_lshl_b32 s2, s2, 8
286; GCN-NEXT:    s_or_b32 s1, s1, s2
287; GCN-NEXT:    s_lshl_b32 s0, s0, 16
288; GCN-NEXT:    s_or_b32 s0, s1, s0
289; GCN-NEXT:    s_lshl_b32 s1, s4, 24
290; GCN-NEXT:    s_or_b32 s0, s0, s1
291; GCN-NEXT:    s_lshr_b32 s0, s0, 8
292; GCN-NEXT:    ; return to shader part epilog
293  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
294  %element = extractelement <4 x i8> %vector, i32 1
295  ret i8 %element
296}
297
298define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx2(<4 x i8> addrspace(4)* inreg %ptr) {
299; GCN-LABEL: extractelement_sgpr_v4i8_idx2:
300; GCN:       ; %bb.0:
301; GCN-NEXT:    s_load_dword s1, s[2:3], 0x0
302; GCN-NEXT:    s_movk_i32 s0, 0xff
303; GCN-NEXT:    s_waitcnt lgkmcnt(0)
304; GCN-NEXT:    s_lshr_b32 s2, s1, 8
305; GCN-NEXT:    s_and_b32 s2, s2, s0
306; GCN-NEXT:    s_lshr_b32 s3, s1, 16
307; GCN-NEXT:    s_lshr_b32 s4, s1, 24
308; GCN-NEXT:    s_and_b32 s1, s1, s0
309; GCN-NEXT:    s_and_b32 s0, s3, s0
310; GCN-NEXT:    s_lshl_b32 s2, s2, 8
311; GCN-NEXT:    s_or_b32 s1, s1, s2
312; GCN-NEXT:    s_lshl_b32 s0, s0, 16
313; GCN-NEXT:    s_or_b32 s0, s1, s0
314; GCN-NEXT:    s_lshl_b32 s1, s4, 24
315; GCN-NEXT:    s_or_b32 s0, s0, s1
316; GCN-NEXT:    s_lshr_b32 s0, s0, 16
317; GCN-NEXT:    ; return to shader part epilog
318  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
319  %element = extractelement <4 x i8> %vector, i32 2
320  ret i8 %element
321}
322
323define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx3(<4 x i8> addrspace(4)* inreg %ptr) {
324; GCN-LABEL: extractelement_sgpr_v4i8_idx3:
325; GCN:       ; %bb.0:
326; GCN-NEXT:    s_load_dword s1, s[2:3], 0x0
327; GCN-NEXT:    s_movk_i32 s0, 0xff
328; GCN-NEXT:    s_waitcnt lgkmcnt(0)
329; GCN-NEXT:    s_lshr_b32 s2, s1, 8
330; GCN-NEXT:    s_and_b32 s2, s2, s0
331; GCN-NEXT:    s_lshr_b32 s3, s1, 16
332; GCN-NEXT:    s_lshr_b32 s4, s1, 24
333; GCN-NEXT:    s_and_b32 s1, s1, s0
334; GCN-NEXT:    s_and_b32 s0, s3, s0
335; GCN-NEXT:    s_lshl_b32 s2, s2, 8
336; GCN-NEXT:    s_or_b32 s1, s1, s2
337; GCN-NEXT:    s_lshl_b32 s0, s0, 16
338; GCN-NEXT:    s_or_b32 s0, s1, s0
339; GCN-NEXT:    s_lshl_b32 s1, s4, 24
340; GCN-NEXT:    s_or_b32 s0, s0, s1
341; GCN-NEXT:    s_lshr_b32 s0, s0, 24
342; GCN-NEXT:    ; return to shader part epilog
343  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
344  %element = extractelement <4 x i8> %vector, i32 3
345  ret i8 %element
346}
347
348define i8 @extractelement_vgpr_v4i8_idx0(<4 x i8> addrspace(1)* %ptr) {
349; GFX9-LABEL: extractelement_vgpr_v4i8_idx0:
350; GFX9:       ; %bb.0:
351; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
352; GFX9-NEXT:    global_load_dword v0, v[0:1], off
353; GFX9-NEXT:    s_mov_b32 s4, 8
354; GFX9-NEXT:    s_movk_i32 s5, 0xff
355; GFX9-NEXT:    s_waitcnt vmcnt(0)
356; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
357; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
358; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
359; GFX9-NEXT:    v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
360; GFX9-NEXT:    v_and_or_b32 v0, v0, s5, v1
361; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
362; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
363; GFX9-NEXT:    s_setpc_b64 s[30:31]
364;
365; GFX8-LABEL: extractelement_vgpr_v4i8_idx0:
366; GFX8:       ; %bb.0:
367; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
368; GFX8-NEXT:    flat_load_dword v0, v[0:1]
369; GFX8-NEXT:    s_movk_i32 s4, 0xff
370; GFX8-NEXT:    v_mov_b32_e32 v1, 8
371; GFX8-NEXT:    v_mov_b32_e32 v2, s4
372; GFX8-NEXT:    s_waitcnt vmcnt(0)
373; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
374; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
375; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
376; GFX8-NEXT:    v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
377; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
378; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
379; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
380; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
381; GFX8-NEXT:    s_setpc_b64 s[30:31]
382;
383; GFX7-LABEL: extractelement_vgpr_v4i8_idx0:
384; GFX7:       ; %bb.0:
385; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
386; GFX7-NEXT:    s_mov_b32 s6, 0
387; GFX7-NEXT:    s_mov_b32 s7, 0xf000
388; GFX7-NEXT:    s_mov_b64 s[4:5], 0
389; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
390; GFX7-NEXT:    s_movk_i32 s4, 0xff
391; GFX7-NEXT:    s_waitcnt vmcnt(0)
392; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
393; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
394; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
395; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
396; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
397; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
398; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
399; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
400; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
401; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
402; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
403; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
404; GFX7-NEXT:    s_setpc_b64 s[30:31]
405  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
406  %element = extractelement <4 x i8> %vector, i32 0
407  ret i8 %element
408}
409
410define i8 @extractelement_vgpr_v4i8_idx1(<4 x i8> addrspace(1)* %ptr) {
411; GFX9-LABEL: extractelement_vgpr_v4i8_idx1:
412; GFX9:       ; %bb.0:
413; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
414; GFX9-NEXT:    global_load_dword v0, v[0:1], off
415; GFX9-NEXT:    s_mov_b32 s4, 8
416; GFX9-NEXT:    s_movk_i32 s5, 0xff
417; GFX9-NEXT:    s_waitcnt vmcnt(0)
418; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
419; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
420; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
421; GFX9-NEXT:    v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
422; GFX9-NEXT:    v_and_or_b32 v0, v0, s5, v1
423; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
424; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
425; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
426; GFX9-NEXT:    s_setpc_b64 s[30:31]
427;
428; GFX8-LABEL: extractelement_vgpr_v4i8_idx1:
429; GFX8:       ; %bb.0:
430; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431; GFX8-NEXT:    flat_load_dword v0, v[0:1]
432; GFX8-NEXT:    s_movk_i32 s4, 0xff
433; GFX8-NEXT:    v_mov_b32_e32 v1, 8
434; GFX8-NEXT:    v_mov_b32_e32 v2, s4
435; GFX8-NEXT:    s_waitcnt vmcnt(0)
436; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
437; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
438; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
439; GFX8-NEXT:    v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
440; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
441; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
442; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
443; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
444; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
445; GFX8-NEXT:    s_setpc_b64 s[30:31]
446;
447; GFX7-LABEL: extractelement_vgpr_v4i8_idx1:
448; GFX7:       ; %bb.0:
449; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
450; GFX7-NEXT:    s_mov_b32 s6, 0
451; GFX7-NEXT:    s_mov_b32 s7, 0xf000
452; GFX7-NEXT:    s_mov_b64 s[4:5], 0
453; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
454; GFX7-NEXT:    s_movk_i32 s4, 0xff
455; GFX7-NEXT:    s_waitcnt vmcnt(0)
456; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
457; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
458; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
459; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
460; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
461; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
462; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
463; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
464; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
465; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
466; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
467; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
468; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
469; GFX7-NEXT:    s_setpc_b64 s[30:31]
470  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
471  %element = extractelement <4 x i8> %vector, i32 1
472  ret i8 %element
473}
474
475define i8 @extractelement_vgpr_v4i8_idx2(<4 x i8> addrspace(1)* %ptr) {
476; GFX9-LABEL: extractelement_vgpr_v4i8_idx2:
477; GFX9:       ; %bb.0:
478; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
479; GFX9-NEXT:    global_load_dword v0, v[0:1], off
480; GFX9-NEXT:    s_mov_b32 s4, 8
481; GFX9-NEXT:    s_movk_i32 s5, 0xff
482; GFX9-NEXT:    s_waitcnt vmcnt(0)
483; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
484; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
485; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
486; GFX9-NEXT:    v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
487; GFX9-NEXT:    v_and_or_b32 v0, v0, s5, v1
488; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
489; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
490; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
491; GFX9-NEXT:    s_setpc_b64 s[30:31]
492;
493; GFX8-LABEL: extractelement_vgpr_v4i8_idx2:
494; GFX8:       ; %bb.0:
495; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
496; GFX8-NEXT:    flat_load_dword v0, v[0:1]
497; GFX8-NEXT:    s_movk_i32 s4, 0xff
498; GFX8-NEXT:    v_mov_b32_e32 v1, 8
499; GFX8-NEXT:    v_mov_b32_e32 v2, s4
500; GFX8-NEXT:    s_waitcnt vmcnt(0)
501; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
502; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
503; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
504; GFX8-NEXT:    v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
505; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
506; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
507; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
508; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
509; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
510; GFX8-NEXT:    s_setpc_b64 s[30:31]
511;
512; GFX7-LABEL: extractelement_vgpr_v4i8_idx2:
513; GFX7:       ; %bb.0:
514; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
515; GFX7-NEXT:    s_mov_b32 s6, 0
516; GFX7-NEXT:    s_mov_b32 s7, 0xf000
517; GFX7-NEXT:    s_mov_b64 s[4:5], 0
518; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
519; GFX7-NEXT:    s_movk_i32 s4, 0xff
520; GFX7-NEXT:    s_waitcnt vmcnt(0)
521; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
522; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
523; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
524; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
525; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
526; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
527; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
528; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
529; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
530; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
531; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
532; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
533; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
534; GFX7-NEXT:    s_setpc_b64 s[30:31]
535  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
536  %element = extractelement <4 x i8> %vector, i32 2
537  ret i8 %element
538}
539
540define i8 @extractelement_vgpr_v4i8_idx3(<4 x i8> addrspace(1)* %ptr) {
541; GFX9-LABEL: extractelement_vgpr_v4i8_idx3:
542; GFX9:       ; %bb.0:
543; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
544; GFX9-NEXT:    global_load_dword v0, v[0:1], off
545; GFX9-NEXT:    s_mov_b32 s4, 8
546; GFX9-NEXT:    s_movk_i32 s5, 0xff
547; GFX9-NEXT:    s_waitcnt vmcnt(0)
548; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
549; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
550; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
551; GFX9-NEXT:    v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
552; GFX9-NEXT:    v_and_or_b32 v0, v0, s5, v1
553; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
554; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
555; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
556; GFX9-NEXT:    s_setpc_b64 s[30:31]
557;
558; GFX8-LABEL: extractelement_vgpr_v4i8_idx3:
559; GFX8:       ; %bb.0:
560; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
561; GFX8-NEXT:    flat_load_dword v0, v[0:1]
562; GFX8-NEXT:    s_movk_i32 s4, 0xff
563; GFX8-NEXT:    v_mov_b32_e32 v1, 8
564; GFX8-NEXT:    v_mov_b32_e32 v2, s4
565; GFX8-NEXT:    s_waitcnt vmcnt(0)
566; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
567; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
568; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
569; GFX8-NEXT:    v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
570; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
571; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
572; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
573; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
574; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
575; GFX8-NEXT:    s_setpc_b64 s[30:31]
576;
577; GFX7-LABEL: extractelement_vgpr_v4i8_idx3:
578; GFX7:       ; %bb.0:
579; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
580; GFX7-NEXT:    s_mov_b32 s6, 0
581; GFX7-NEXT:    s_mov_b32 s7, 0xf000
582; GFX7-NEXT:    s_mov_b64 s[4:5], 0
583; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
584; GFX7-NEXT:    s_movk_i32 s4, 0xff
585; GFX7-NEXT:    s_waitcnt vmcnt(0)
586; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
587; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
588; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
589; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
590; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
591; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
592; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
593; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
594; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
595; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
596; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
597; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
598; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
599; GFX7-NEXT:    s_setpc_b64 s[30:31]
600  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
601  %element = extractelement <4 x i8> %vector, i32 3
602  ret i8 %element
603}
604
605define amdgpu_ps i8 @extractelement_sgpr_v8i8_sgpr_idx(<8 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) {
606; GCN-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
607; GCN:       ; %bb.0:
608; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
609; GCN-NEXT:    s_movk_i32 s9, 0xff
610; GCN-NEXT:    s_waitcnt lgkmcnt(0)
611; GCN-NEXT:    s_lshr_b32 s2, s0, 8
612; GCN-NEXT:    s_and_b32 s2, s2, s9
613; GCN-NEXT:    s_lshr_b32 s3, s0, 16
614; GCN-NEXT:    s_lshr_b32 s5, s0, 24
615; GCN-NEXT:    s_and_b32 s0, s0, s9
616; GCN-NEXT:    s_lshl_b32 s2, s2, 8
617; GCN-NEXT:    s_or_b32 s0, s0, s2
618; GCN-NEXT:    s_and_b32 s2, s3, s9
619; GCN-NEXT:    s_lshl_b32 s2, s2, 16
620; GCN-NEXT:    s_or_b32 s0, s0, s2
621; GCN-NEXT:    s_lshl_b32 s2, s5, 24
622; GCN-NEXT:    s_lshr_b32 s6, s1, 8
623; GCN-NEXT:    s_or_b32 s0, s0, s2
624; GCN-NEXT:    s_and_b32 s2, s6, s9
625; GCN-NEXT:    s_lshr_b32 s7, s1, 16
626; GCN-NEXT:    s_lshr_b32 s8, s1, 24
627; GCN-NEXT:    s_and_b32 s1, s1, s9
628; GCN-NEXT:    s_lshl_b32 s2, s2, 8
629; GCN-NEXT:    s_or_b32 s1, s1, s2
630; GCN-NEXT:    s_and_b32 s2, s7, s9
631; GCN-NEXT:    s_lshl_b32 s2, s2, 16
632; GCN-NEXT:    s_or_b32 s1, s1, s2
633; GCN-NEXT:    s_lshl_b32 s2, s8, 24
634; GCN-NEXT:    s_or_b32 s1, s1, s2
635; GCN-NEXT:    s_lshr_b32 s2, s4, 2
636; GCN-NEXT:    s_cmp_eq_u32 s2, 1
637; GCN-NEXT:    s_cselect_b32 s0, s1, s0
638; GCN-NEXT:    s_and_b32 s1, s4, 3
639; GCN-NEXT:    s_lshl_b32 s1, s1, 3
640; GCN-NEXT:    s_lshr_b32 s0, s0, s1
641; GCN-NEXT:    ; return to shader part epilog
642  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
643  %element = extractelement <8 x i8> %vector, i32 %idx
644  ret i8 %element
645}
646
647define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 inreg %idx) {
648; GFX9-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
649; GFX9:       ; %bb.0:
650; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
651; GFX9-NEXT:    s_mov_b32 s0, 8
652; GFX9-NEXT:    s_movk_i32 s1, 0xff
653; GFX9-NEXT:    s_lshr_b32 s3, s2, 2
654; GFX9-NEXT:    s_and_b32 s2, s2, 3
655; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 1
656; GFX9-NEXT:    s_waitcnt vmcnt(0)
657; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 8, v0
658; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
659; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
660; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 24, v1
661; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, s0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
662; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
663; GFX9-NEXT:    v_and_b32_sdwa v6, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
664; GFX9-NEXT:    v_and_b32_sdwa v7, v1, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
665; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
666; GFX9-NEXT:    v_and_or_b32 v0, v0, s1, v2
667; GFX9-NEXT:    v_lshlrev_b32_e32 v5, 24, v5
668; GFX9-NEXT:    v_and_or_b32 v1, v1, s1, v4
669; GFX9-NEXT:    v_or3_b32 v0, v0, v6, v3
670; GFX9-NEXT:    v_or3_b32 v1, v1, v7, v5
671; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
672; GFX9-NEXT:    s_lshl_b32 s0, s2, 3
673; GFX9-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
674; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
675; GFX9-NEXT:    ; return to shader part epilog
676;
677; GFX8-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
678; GFX8:       ; %bb.0:
679; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
680; GFX8-NEXT:    s_movk_i32 s0, 0xff
681; GFX8-NEXT:    v_mov_b32_e32 v2, 8
682; GFX8-NEXT:    v_mov_b32_e32 v3, 8
683; GFX8-NEXT:    v_mov_b32_e32 v4, s0
684; GFX8-NEXT:    s_lshr_b32 s0, s2, 2
685; GFX8-NEXT:    s_and_b32 s1, s2, 3
686; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 1
687; GFX8-NEXT:    s_lshl_b32 s0, s1, 3
688; GFX8-NEXT:    s_waitcnt vmcnt(0)
689; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 8, v0
690; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 8, v1
691; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
692; GFX8-NEXT:    v_lshlrev_b32_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
693; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 24, v0
694; GFX8-NEXT:    v_and_b32_sdwa v9, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
695; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
696; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 24, v1
697; GFX8-NEXT:    v_and_b32_sdwa v4, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
698; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
699; GFX8-NEXT:    v_lshlrev_b32_e32 v5, 24, v6
700; GFX8-NEXT:    v_or_b32_e32 v0, v0, v9
701; GFX8-NEXT:    v_lshlrev_b32_e32 v6, 24, v8
702; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
703; GFX8-NEXT:    v_or_b32_e32 v0, v0, v5
704; GFX8-NEXT:    v_or_b32_e32 v1, v1, v6
705; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
706; GFX8-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
707; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
708; GFX8-NEXT:    ; return to shader part epilog
709;
710; GFX7-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
711; GFX7:       ; %bb.0:
712; GFX7-NEXT:    s_mov_b32 s6, 0
713; GFX7-NEXT:    s_mov_b32 s7, 0xf000
714; GFX7-NEXT:    s_mov_b64 s[4:5], 0
715; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
716; GFX7-NEXT:    s_movk_i32 s0, 0xff
717; GFX7-NEXT:    s_lshr_b32 s1, s2, 2
718; GFX7-NEXT:    s_and_b32 s2, s2, 3
719; GFX7-NEXT:    v_cmp_eq_u32_e64 vcc, s1, 1
720; GFX7-NEXT:    s_waitcnt vmcnt(0)
721; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 8, v0
722; GFX7-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
723; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
724; GFX7-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
725; GFX7-NEXT:    v_and_b32_e32 v2, s0, v2
726; GFX7-NEXT:    v_and_b32_e32 v5, s0, v5
727; GFX7-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
728; GFX7-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
729; GFX7-NEXT:    v_and_b32_e32 v3, s0, v3
730; GFX7-NEXT:    v_and_b32_e32 v6, s0, v6
731; GFX7-NEXT:    v_and_b32_e32 v0, s0, v0
732; GFX7-NEXT:    v_and_b32_e32 v1, s0, v1
733; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
734; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 8, v5
735; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
736; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
737; GFX7-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
738; GFX7-NEXT:    v_or_b32_e32 v1, v1, v5
739; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 24, v4
740; GFX7-NEXT:    v_or_b32_e32 v0, v0, v3
741; GFX7-NEXT:    v_lshlrev_b32_e32 v7, 24, v7
742; GFX7-NEXT:    v_or_b32_e32 v1, v1, v6
743; GFX7-NEXT:    v_or_b32_e32 v0, v0, v4
744; GFX7-NEXT:    v_or_b32_e32 v1, v1, v7
745; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
746; GFX7-NEXT:    s_lshl_b32 s0, s2, 3
747; GFX7-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
748; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
749; GFX7-NEXT:    ; return to shader part epilog
750  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
751  %element = extractelement <8 x i8> %vector, i32 %idx
752  ret i8 %element
753}
754
755define i8 @extractelement_vgpr_v8i8_vgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 %idx) {
756; GFX9-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
757; GFX9:       ; %bb.0:
758; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
759; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
760; GFX9-NEXT:    s_mov_b32 s4, 8
761; GFX9-NEXT:    s_movk_i32 s5, 0xff
762; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 2, v2
763; GFX9-NEXT:    v_and_b32_e32 v2, 3, v2
764; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v3
765; GFX9-NEXT:    s_waitcnt vmcnt(0)
766; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 8, v0
767; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 8, v1
768; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 24, v0
769; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
770; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
771; GFX9-NEXT:    v_lshlrev_b32_sdwa v6, s4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
772; GFX9-NEXT:    v_and_b32_sdwa v8, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
773; GFX9-NEXT:    v_and_b32_sdwa v9, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
774; GFX9-NEXT:    v_lshlrev_b32_e32 v5, 24, v5
775; GFX9-NEXT:    v_and_or_b32 v0, v0, s5, v4
776; GFX9-NEXT:    v_lshlrev_b32_e32 v7, 24, v7
777; GFX9-NEXT:    v_and_or_b32 v1, v1, s5, v6
778; GFX9-NEXT:    v_or3_b32 v0, v0, v8, v5
779; GFX9-NEXT:    v_or3_b32 v1, v1, v9, v7
780; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
781; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
782; GFX9-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
783; GFX9-NEXT:    s_setpc_b64 s[30:31]
784;
785; GFX8-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
786; GFX8:       ; %bb.0:
787; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
788; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
789; GFX8-NEXT:    s_movk_i32 s4, 0xff
790; GFX8-NEXT:    v_mov_b32_e32 v3, 8
791; GFX8-NEXT:    v_mov_b32_e32 v4, 8
792; GFX8-NEXT:    v_mov_b32_e32 v5, s4
793; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 2, v2
794; GFX8-NEXT:    v_and_b32_e32 v2, 3, v2
795; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v6
796; GFX8-NEXT:    s_waitcnt vmcnt(0)
797; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 8, v0
798; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 8, v1
799; GFX8-NEXT:    v_lshlrev_b32_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
800; GFX8-NEXT:    v_lshlrev_b32_sdwa v4, v4, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
801; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 24, v0
802; GFX8-NEXT:    v_and_b32_sdwa v11, v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
803; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
804; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 24, v1
805; GFX8-NEXT:    v_and_b32_sdwa v5, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
806; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
807; GFX8-NEXT:    v_lshlrev_b32_e32 v7, 24, v8
808; GFX8-NEXT:    v_or_b32_e32 v0, v0, v11
809; GFX8-NEXT:    v_lshlrev_b32_e32 v8, 24, v10
810; GFX8-NEXT:    v_or_b32_e32 v1, v1, v5
811; GFX8-NEXT:    v_or_b32_e32 v0, v0, v7
812; GFX8-NEXT:    v_or_b32_e32 v1, v1, v8
813; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
814; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
815; GFX8-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
816; GFX8-NEXT:    s_setpc_b64 s[30:31]
817;
818; GFX7-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
819; GFX7:       ; %bb.0:
820; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
821; GFX7-NEXT:    s_mov_b32 s6, 0
822; GFX7-NEXT:    s_mov_b32 s7, 0xf000
823; GFX7-NEXT:    s_mov_b64 s[4:5], 0
824; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
825; GFX7-NEXT:    s_movk_i32 s4, 0xff
826; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 2, v2
827; GFX7-NEXT:    v_and_b32_e32 v2, 3, v2
828; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v3
829; GFX7-NEXT:    s_waitcnt vmcnt(0)
830; GFX7-NEXT:    v_lshrrev_b32_e32 v4, 8, v0
831; GFX7-NEXT:    v_lshrrev_b32_e32 v7, 8, v1
832; GFX7-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
833; GFX7-NEXT:    v_lshrrev_b32_e32 v8, 16, v1
834; GFX7-NEXT:    v_and_b32_e32 v4, s4, v4
835; GFX7-NEXT:    v_and_b32_e32 v7, s4, v7
836; GFX7-NEXT:    v_lshrrev_b32_e32 v6, 24, v0
837; GFX7-NEXT:    v_lshrrev_b32_e32 v9, 24, v1
838; GFX7-NEXT:    v_and_b32_e32 v5, s4, v5
839; GFX7-NEXT:    v_and_b32_e32 v8, s4, v8
840; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
841; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
842; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
843; GFX7-NEXT:    v_lshlrev_b32_e32 v7, 8, v7
844; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
845; GFX7-NEXT:    v_or_b32_e32 v0, v0, v4
846; GFX7-NEXT:    v_lshlrev_b32_e32 v8, 16, v8
847; GFX7-NEXT:    v_or_b32_e32 v1, v1, v7
848; GFX7-NEXT:    v_lshlrev_b32_e32 v6, 24, v6
849; GFX7-NEXT:    v_or_b32_e32 v0, v0, v5
850; GFX7-NEXT:    v_lshlrev_b32_e32 v9, 24, v9
851; GFX7-NEXT:    v_or_b32_e32 v1, v1, v8
852; GFX7-NEXT:    v_or_b32_e32 v0, v0, v6
853; GFX7-NEXT:    v_or_b32_e32 v1, v1, v9
854; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
855; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
856; GFX7-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
857; GFX7-NEXT:    s_setpc_b64 s[30:31]
858  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
859  %element = extractelement <8 x i8> %vector, i32 %idx
860  ret i8 %element
861}
862
863define amdgpu_ps i8 @extractelement_sgpr_v8i8_vgpr_idx(<8 x i8> addrspace(4)* inreg %ptr, i32 %idx) {
864; GCN-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
865; GCN:       ; %bb.0:
866; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
867; GCN-NEXT:    s_movk_i32 s8, 0xff
868; GCN-NEXT:    v_lshrrev_b32_e32 v1, 2, v0
869; GCN-NEXT:    v_and_b32_e32 v0, 3, v0
870; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
871; GCN-NEXT:    s_waitcnt lgkmcnt(0)
872; GCN-NEXT:    s_lshr_b32 s2, s0, 8
873; GCN-NEXT:    s_and_b32 s2, s2, s8
874; GCN-NEXT:    s_lshr_b32 s3, s0, 16
875; GCN-NEXT:    s_lshr_b32 s4, s0, 24
876; GCN-NEXT:    s_and_b32 s0, s0, s8
877; GCN-NEXT:    s_lshl_b32 s2, s2, 8
878; GCN-NEXT:    s_or_b32 s0, s0, s2
879; GCN-NEXT:    s_and_b32 s2, s3, s8
880; GCN-NEXT:    s_lshl_b32 s2, s2, 16
881; GCN-NEXT:    s_or_b32 s0, s0, s2
882; GCN-NEXT:    s_lshl_b32 s2, s4, 24
883; GCN-NEXT:    s_lshr_b32 s5, s1, 8
884; GCN-NEXT:    s_or_b32 s0, s0, s2
885; GCN-NEXT:    s_and_b32 s2, s5, s8
886; GCN-NEXT:    s_lshr_b32 s6, s1, 16
887; GCN-NEXT:    s_lshr_b32 s7, s1, 24
888; GCN-NEXT:    s_and_b32 s1, s1, s8
889; GCN-NEXT:    s_lshl_b32 s2, s2, 8
890; GCN-NEXT:    s_or_b32 s1, s1, s2
891; GCN-NEXT:    s_and_b32 s2, s6, s8
892; GCN-NEXT:    s_lshl_b32 s2, s2, 16
893; GCN-NEXT:    s_or_b32 s1, s1, s2
894; GCN-NEXT:    s_lshl_b32 s2, s7, 24
895; GCN-NEXT:    s_or_b32 s1, s1, s2
896; GCN-NEXT:    v_mov_b32_e32 v2, s0
897; GCN-NEXT:    v_mov_b32_e32 v3, s1
898; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
899; GCN-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
900; GCN-NEXT:    v_lshrrev_b32_e32 v0, v0, v1
901; GCN-NEXT:    v_readfirstlane_b32 s0, v0
902; GCN-NEXT:    ; return to shader part epilog
903  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
904  %element = extractelement <8 x i8> %vector, i32 %idx
905  ret i8 %element
906}
907
908define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx0(<8 x i8> addrspace(4)* inreg %ptr) {
909; GCN-LABEL: extractelement_sgpr_v8i8_idx0:
910; GCN:       ; %bb.0:
911; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
912; GCN-NEXT:    s_movk_i32 s4, 0xff
913; GCN-NEXT:    s_waitcnt lgkmcnt(0)
914; GCN-NEXT:    s_lshr_b32 s1, s0, 8
915; GCN-NEXT:    s_and_b32 s1, s1, s4
916; GCN-NEXT:    s_lshr_b32 s2, s0, 16
917; GCN-NEXT:    s_lshr_b32 s3, s0, 24
918; GCN-NEXT:    s_and_b32 s0, s0, s4
919; GCN-NEXT:    s_lshl_b32 s1, s1, 8
920; GCN-NEXT:    s_or_b32 s0, s0, s1
921; GCN-NEXT:    s_and_b32 s1, s2, s4
922; GCN-NEXT:    s_lshl_b32 s1, s1, 16
923; GCN-NEXT:    s_or_b32 s0, s0, s1
924; GCN-NEXT:    s_lshl_b32 s1, s3, 24
925; GCN-NEXT:    s_or_b32 s0, s0, s1
926; GCN-NEXT:    ; return to shader part epilog
927  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
928  %element = extractelement <8 x i8> %vector, i32 0
929  ret i8 %element
930}
931
932define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx1(<8 x i8> addrspace(4)* inreg %ptr) {
933; GCN-LABEL: extractelement_sgpr_v8i8_idx1:
934; GCN:       ; %bb.0:
935; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
936; GCN-NEXT:    s_movk_i32 s4, 0xff
937; GCN-NEXT:    s_waitcnt lgkmcnt(0)
938; GCN-NEXT:    s_lshr_b32 s1, s0, 8
939; GCN-NEXT:    s_and_b32 s1, s1, s4
940; GCN-NEXT:    s_lshr_b32 s2, s0, 16
941; GCN-NEXT:    s_lshr_b32 s3, s0, 24
942; GCN-NEXT:    s_and_b32 s0, s0, s4
943; GCN-NEXT:    s_lshl_b32 s1, s1, 8
944; GCN-NEXT:    s_or_b32 s0, s0, s1
945; GCN-NEXT:    s_and_b32 s1, s2, s4
946; GCN-NEXT:    s_lshl_b32 s1, s1, 16
947; GCN-NEXT:    s_or_b32 s0, s0, s1
948; GCN-NEXT:    s_lshl_b32 s1, s3, 24
949; GCN-NEXT:    s_or_b32 s0, s0, s1
950; GCN-NEXT:    s_lshr_b32 s0, s0, 8
951; GCN-NEXT:    ; return to shader part epilog
952  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
953  %element = extractelement <8 x i8> %vector, i32 1
954  ret i8 %element
955}
956
957define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx2(<8 x i8> addrspace(4)* inreg %ptr) {
958; GCN-LABEL: extractelement_sgpr_v8i8_idx2:
959; GCN:       ; %bb.0:
960; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
961; GCN-NEXT:    s_movk_i32 s4, 0xff
962; GCN-NEXT:    s_waitcnt lgkmcnt(0)
963; GCN-NEXT:    s_lshr_b32 s1, s0, 8
964; GCN-NEXT:    s_and_b32 s1, s1, s4
965; GCN-NEXT:    s_lshr_b32 s2, s0, 16
966; GCN-NEXT:    s_lshr_b32 s3, s0, 24
967; GCN-NEXT:    s_and_b32 s0, s0, s4
968; GCN-NEXT:    s_lshl_b32 s1, s1, 8
969; GCN-NEXT:    s_or_b32 s0, s0, s1
970; GCN-NEXT:    s_and_b32 s1, s2, s4
971; GCN-NEXT:    s_lshl_b32 s1, s1, 16
972; GCN-NEXT:    s_or_b32 s0, s0, s1
973; GCN-NEXT:    s_lshl_b32 s1, s3, 24
974; GCN-NEXT:    s_or_b32 s0, s0, s1
975; GCN-NEXT:    s_lshr_b32 s0, s0, 16
976; GCN-NEXT:    ; return to shader part epilog
977  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
978  %element = extractelement <8 x i8> %vector, i32 2
979  ret i8 %element
980}
981
982define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx3(<8 x i8> addrspace(4)* inreg %ptr) {
983; GCN-LABEL: extractelement_sgpr_v8i8_idx3:
984; GCN:       ; %bb.0:
985; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
986; GCN-NEXT:    s_movk_i32 s4, 0xff
987; GCN-NEXT:    s_waitcnt lgkmcnt(0)
988; GCN-NEXT:    s_lshr_b32 s1, s0, 8
989; GCN-NEXT:    s_and_b32 s1, s1, s4
990; GCN-NEXT:    s_lshr_b32 s2, s0, 16
991; GCN-NEXT:    s_lshr_b32 s3, s0, 24
992; GCN-NEXT:    s_and_b32 s0, s0, s4
993; GCN-NEXT:    s_lshl_b32 s1, s1, 8
994; GCN-NEXT:    s_or_b32 s0, s0, s1
995; GCN-NEXT:    s_and_b32 s1, s2, s4
996; GCN-NEXT:    s_lshl_b32 s1, s1, 16
997; GCN-NEXT:    s_or_b32 s0, s0, s1
998; GCN-NEXT:    s_lshl_b32 s1, s3, 24
999; GCN-NEXT:    s_or_b32 s0, s0, s1
1000; GCN-NEXT:    s_lshr_b32 s0, s0, 24
1001; GCN-NEXT:    ; return to shader part epilog
1002  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1003  %element = extractelement <8 x i8> %vector, i32 3
1004  ret i8 %element
1005}
1006
1007define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx4(<8 x i8> addrspace(4)* inreg %ptr) {
1008; GCN-LABEL: extractelement_sgpr_v8i8_idx4:
1009; GCN:       ; %bb.0:
1010; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1011; GCN-NEXT:    s_movk_i32 s4, 0xff
1012; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1013; GCN-NEXT:    s_lshr_b32 s0, s1, 8
1014; GCN-NEXT:    s_and_b32 s0, s0, s4
1015; GCN-NEXT:    s_lshr_b32 s2, s1, 16
1016; GCN-NEXT:    s_lshr_b32 s3, s1, 24
1017; GCN-NEXT:    s_and_b32 s1, s1, s4
1018; GCN-NEXT:    s_lshl_b32 s0, s0, 8
1019; GCN-NEXT:    s_or_b32 s0, s1, s0
1020; GCN-NEXT:    s_and_b32 s1, s2, s4
1021; GCN-NEXT:    s_lshl_b32 s1, s1, 16
1022; GCN-NEXT:    s_or_b32 s0, s0, s1
1023; GCN-NEXT:    s_lshl_b32 s1, s3, 24
1024; GCN-NEXT:    s_or_b32 s0, s0, s1
1025; GCN-NEXT:    ; return to shader part epilog
1026  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1027  %element = extractelement <8 x i8> %vector, i32 4
1028  ret i8 %element
1029}
1030
1031define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx5(<8 x i8> addrspace(4)* inreg %ptr) {
1032; GCN-LABEL: extractelement_sgpr_v8i8_idx5:
1033; GCN:       ; %bb.0:
1034; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1035; GCN-NEXT:    s_movk_i32 s4, 0xff
1036; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1037; GCN-NEXT:    s_lshr_b32 s0, s1, 8
1038; GCN-NEXT:    s_and_b32 s0, s0, s4
1039; GCN-NEXT:    s_lshr_b32 s2, s1, 16
1040; GCN-NEXT:    s_lshr_b32 s3, s1, 24
1041; GCN-NEXT:    s_and_b32 s1, s1, s4
1042; GCN-NEXT:    s_lshl_b32 s0, s0, 8
1043; GCN-NEXT:    s_or_b32 s0, s1, s0
1044; GCN-NEXT:    s_and_b32 s1, s2, s4
1045; GCN-NEXT:    s_lshl_b32 s1, s1, 16
1046; GCN-NEXT:    s_or_b32 s0, s0, s1
1047; GCN-NEXT:    s_lshl_b32 s1, s3, 24
1048; GCN-NEXT:    s_or_b32 s0, s0, s1
1049; GCN-NEXT:    s_lshr_b32 s0, s0, 8
1050; GCN-NEXT:    ; return to shader part epilog
1051  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1052  %element = extractelement <8 x i8> %vector, i32 5
1053  ret i8 %element
1054}
1055
1056define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx6(<8 x i8> addrspace(4)* inreg %ptr) {
1057; GCN-LABEL: extractelement_sgpr_v8i8_idx6:
1058; GCN:       ; %bb.0:
1059; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1060; GCN-NEXT:    s_movk_i32 s4, 0xff
1061; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1062; GCN-NEXT:    s_lshr_b32 s0, s1, 8
1063; GCN-NEXT:    s_and_b32 s0, s0, s4
1064; GCN-NEXT:    s_lshr_b32 s2, s1, 16
1065; GCN-NEXT:    s_lshr_b32 s3, s1, 24
1066; GCN-NEXT:    s_and_b32 s1, s1, s4
1067; GCN-NEXT:    s_lshl_b32 s0, s0, 8
1068; GCN-NEXT:    s_or_b32 s0, s1, s0
1069; GCN-NEXT:    s_and_b32 s1, s2, s4
1070; GCN-NEXT:    s_lshl_b32 s1, s1, 16
1071; GCN-NEXT:    s_or_b32 s0, s0, s1
1072; GCN-NEXT:    s_lshl_b32 s1, s3, 24
1073; GCN-NEXT:    s_or_b32 s0, s0, s1
1074; GCN-NEXT:    s_lshr_b32 s0, s0, 16
1075; GCN-NEXT:    ; return to shader part epilog
1076  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1077  %element = extractelement <8 x i8> %vector, i32 6
1078  ret i8 %element
1079}
1080
1081define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx7(<8 x i8> addrspace(4)* inreg %ptr) {
1082; GCN-LABEL: extractelement_sgpr_v8i8_idx7:
1083; GCN:       ; %bb.0:
1084; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1085; GCN-NEXT:    s_movk_i32 s4, 0xff
1086; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1087; GCN-NEXT:    s_lshr_b32 s0, s1, 8
1088; GCN-NEXT:    s_and_b32 s0, s0, s4
1089; GCN-NEXT:    s_lshr_b32 s2, s1, 16
1090; GCN-NEXT:    s_lshr_b32 s3, s1, 24
1091; GCN-NEXT:    s_and_b32 s1, s1, s4
1092; GCN-NEXT:    s_lshl_b32 s0, s0, 8
1093; GCN-NEXT:    s_or_b32 s0, s1, s0
1094; GCN-NEXT:    s_and_b32 s1, s2, s4
1095; GCN-NEXT:    s_lshl_b32 s1, s1, 16
1096; GCN-NEXT:    s_or_b32 s0, s0, s1
1097; GCN-NEXT:    s_lshl_b32 s1, s3, 24
1098; GCN-NEXT:    s_or_b32 s0, s0, s1
1099; GCN-NEXT:    s_lshr_b32 s0, s0, 24
1100; GCN-NEXT:    ; return to shader part epilog
1101  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1102  %element = extractelement <8 x i8> %vector, i32 7
1103  ret i8 %element
1104}
1105
1106define i8 @extractelement_vgpr_v8i8_idx0(<8 x i8> addrspace(1)* %ptr) {
1107; GFX9-LABEL: extractelement_vgpr_v8i8_idx0:
1108; GFX9:       ; %bb.0:
1109; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1110; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1111; GFX9-NEXT:    s_mov_b32 s4, 8
1112; GFX9-NEXT:    s_movk_i32 s5, 0xff
1113; GFX9-NEXT:    s_waitcnt vmcnt(0)
1114; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
1115; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
1116; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1117; GFX9-NEXT:    v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1118; GFX9-NEXT:    v_and_or_b32 v0, v0, s5, v1
1119; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
1120; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
1121; GFX9-NEXT:    s_setpc_b64 s[30:31]
1122;
1123; GFX8-LABEL: extractelement_vgpr_v8i8_idx0:
1124; GFX8:       ; %bb.0:
1125; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1126; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
1127; GFX8-NEXT:    s_movk_i32 s4, 0xff
1128; GFX8-NEXT:    s_waitcnt vmcnt(0)
1129; GFX8-NEXT:    v_mov_b32_e32 v1, 8
1130; GFX8-NEXT:    v_mov_b32_e32 v2, s4
1131; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1132; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1133; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
1134; GFX8-NEXT:    v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1135; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1136; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
1137; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
1138; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1139; GFX8-NEXT:    s_setpc_b64 s[30:31]
1140;
1141; GFX7-LABEL: extractelement_vgpr_v8i8_idx0:
1142; GFX7:       ; %bb.0:
1143; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1144; GFX7-NEXT:    s_mov_b32 s6, 0
1145; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1146; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1147; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1148; GFX7-NEXT:    s_movk_i32 s4, 0xff
1149; GFX7-NEXT:    s_waitcnt vmcnt(0)
1150; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
1151; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
1152; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
1153; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
1154; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
1155; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
1156; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
1157; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
1158; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1159; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
1160; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1161; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
1162; GFX7-NEXT:    s_setpc_b64 s[30:31]
1163  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
1164  %element = extractelement <8 x i8> %vector, i32 0
1165  ret i8 %element
1166}
1167
1168define i8 @extractelement_vgpr_v8i8_idx1(<8 x i8> addrspace(1)* %ptr) {
1169; GFX9-LABEL: extractelement_vgpr_v8i8_idx1:
1170; GFX9:       ; %bb.0:
1171; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1172; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1173; GFX9-NEXT:    s_mov_b32 s4, 8
1174; GFX9-NEXT:    s_movk_i32 s5, 0xff
1175; GFX9-NEXT:    s_waitcnt vmcnt(0)
1176; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
1177; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
1178; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1179; GFX9-NEXT:    v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1180; GFX9-NEXT:    v_and_or_b32 v0, v0, s5, v1
1181; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
1182; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
1183; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
1184; GFX9-NEXT:    s_setpc_b64 s[30:31]
1185;
1186; GFX8-LABEL: extractelement_vgpr_v8i8_idx1:
1187; GFX8:       ; %bb.0:
1188; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1189; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
1190; GFX8-NEXT:    s_movk_i32 s4, 0xff
1191; GFX8-NEXT:    s_waitcnt vmcnt(0)
1192; GFX8-NEXT:    v_mov_b32_e32 v1, 8
1193; GFX8-NEXT:    v_mov_b32_e32 v2, s4
1194; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1195; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1196; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
1197; GFX8-NEXT:    v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1198; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1199; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
1200; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
1201; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1202; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
1203; GFX8-NEXT:    s_setpc_b64 s[30:31]
1204;
1205; GFX7-LABEL: extractelement_vgpr_v8i8_idx1:
1206; GFX7:       ; %bb.0:
1207; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1208; GFX7-NEXT:    s_mov_b32 s6, 0
1209; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1210; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1211; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1212; GFX7-NEXT:    s_movk_i32 s4, 0xff
1213; GFX7-NEXT:    s_waitcnt vmcnt(0)
1214; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
1215; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
1216; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
1217; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
1218; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
1219; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
1220; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
1221; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
1222; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1223; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
1224; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1225; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
1226; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
1227; GFX7-NEXT:    s_setpc_b64 s[30:31]
1228  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
1229  %element = extractelement <8 x i8> %vector, i32 1
1230  ret i8 %element
1231}
1232
1233define i8 @extractelement_vgpr_v8i8_idx2(<8 x i8> addrspace(1)* %ptr) {
1234; GFX9-LABEL: extractelement_vgpr_v8i8_idx2:
1235; GFX9:       ; %bb.0:
1236; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1237; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1238; GFX9-NEXT:    s_mov_b32 s4, 8
1239; GFX9-NEXT:    s_movk_i32 s5, 0xff
1240; GFX9-NEXT:    s_waitcnt vmcnt(0)
1241; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
1242; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
1243; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1244; GFX9-NEXT:    v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1245; GFX9-NEXT:    v_and_or_b32 v0, v0, s5, v1
1246; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
1247; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
1248; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1249; GFX9-NEXT:    s_setpc_b64 s[30:31]
1250;
1251; GFX8-LABEL: extractelement_vgpr_v8i8_idx2:
1252; GFX8:       ; %bb.0:
1253; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1254; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
1255; GFX8-NEXT:    s_movk_i32 s4, 0xff
1256; GFX8-NEXT:    s_waitcnt vmcnt(0)
1257; GFX8-NEXT:    v_mov_b32_e32 v1, 8
1258; GFX8-NEXT:    v_mov_b32_e32 v2, s4
1259; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1260; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1261; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
1262; GFX8-NEXT:    v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1263; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1264; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
1265; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
1266; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1267; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1268; GFX8-NEXT:    s_setpc_b64 s[30:31]
1269;
1270; GFX7-LABEL: extractelement_vgpr_v8i8_idx2:
1271; GFX7:       ; %bb.0:
1272; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1273; GFX7-NEXT:    s_mov_b32 s6, 0
1274; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1275; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1276; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1277; GFX7-NEXT:    s_movk_i32 s4, 0xff
1278; GFX7-NEXT:    s_waitcnt vmcnt(0)
1279; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
1280; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
1281; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
1282; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
1283; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
1284; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
1285; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
1286; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
1287; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1288; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
1289; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1290; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
1291; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1292; GFX7-NEXT:    s_setpc_b64 s[30:31]
1293  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
1294  %element = extractelement <8 x i8> %vector, i32 2
1295  ret i8 %element
1296}
1297
1298define i8 @extractelement_vgpr_v8i8_idx3(<8 x i8> addrspace(1)* %ptr) {
1299; GFX9-LABEL: extractelement_vgpr_v8i8_idx3:
1300; GFX9:       ; %bb.0:
1301; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1302; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1303; GFX9-NEXT:    s_mov_b32 s4, 8
1304; GFX9-NEXT:    s_movk_i32 s5, 0xff
1305; GFX9-NEXT:    s_waitcnt vmcnt(0)
1306; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
1307; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
1308; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1309; GFX9-NEXT:    v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1310; GFX9-NEXT:    v_and_or_b32 v0, v0, s5, v1
1311; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
1312; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
1313; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
1314; GFX9-NEXT:    s_setpc_b64 s[30:31]
1315;
1316; GFX8-LABEL: extractelement_vgpr_v8i8_idx3:
1317; GFX8:       ; %bb.0:
1318; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1319; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
1320; GFX8-NEXT:    s_movk_i32 s4, 0xff
1321; GFX8-NEXT:    s_waitcnt vmcnt(0)
1322; GFX8-NEXT:    v_mov_b32_e32 v1, 8
1323; GFX8-NEXT:    v_mov_b32_e32 v2, s4
1324; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1325; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1326; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
1327; GFX8-NEXT:    v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1328; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1329; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
1330; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
1331; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1332; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
1333; GFX8-NEXT:    s_setpc_b64 s[30:31]
1334;
1335; GFX7-LABEL: extractelement_vgpr_v8i8_idx3:
1336; GFX7:       ; %bb.0:
1337; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1338; GFX7-NEXT:    s_mov_b32 s6, 0
1339; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1340; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1341; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1342; GFX7-NEXT:    s_movk_i32 s4, 0xff
1343; GFX7-NEXT:    s_waitcnt vmcnt(0)
1344; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
1345; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
1346; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
1347; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
1348; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
1349; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
1350; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
1351; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
1352; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1353; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
1354; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1355; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
1356; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
1357; GFX7-NEXT:    s_setpc_b64 s[30:31]
1358  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
1359  %element = extractelement <8 x i8> %vector, i32 3
1360  ret i8 %element
1361}
1362
1363define i8 @extractelement_vgpr_v8i8_idx4(<8 x i8> addrspace(1)* %ptr) {
1364; GFX9-LABEL: extractelement_vgpr_v8i8_idx4:
1365; GFX9:       ; %bb.0:
1366; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1367; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1368; GFX9-NEXT:    s_mov_b32 s4, 8
1369; GFX9-NEXT:    s_movk_i32 s5, 0xff
1370; GFX9-NEXT:    s_waitcnt vmcnt(0)
1371; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v1
1372; GFX9-NEXT:    v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1373; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v1
1374; GFX9-NEXT:    v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1375; GFX9-NEXT:    v_and_or_b32 v0, v1, s5, v0
1376; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
1377; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
1378; GFX9-NEXT:    s_setpc_b64 s[30:31]
1379;
1380; GFX8-LABEL: extractelement_vgpr_v8i8_idx4:
1381; GFX8:       ; %bb.0:
1382; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1383; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
1384; GFX8-NEXT:    s_movk_i32 s4, 0xff
1385; GFX8-NEXT:    s_waitcnt vmcnt(0)
1386; GFX8-NEXT:    v_mov_b32_e32 v0, 8
1387; GFX8-NEXT:    v_mov_b32_e32 v2, s4
1388; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v1
1389; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1390; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v1
1391; GFX8-NEXT:    v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1392; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1393; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
1394; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
1395; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1396; GFX8-NEXT:    s_setpc_b64 s[30:31]
1397;
1398; GFX7-LABEL: extractelement_vgpr_v8i8_idx4:
1399; GFX7:       ; %bb.0:
1400; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1401; GFX7-NEXT:    s_mov_b32 s6, 0
1402; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1403; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1404; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1405; GFX7-NEXT:    s_movk_i32 s4, 0xff
1406; GFX7-NEXT:    s_waitcnt vmcnt(0)
1407; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v1
1408; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
1409; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
1410; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
1411; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
1412; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
1413; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
1414; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
1415; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1416; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
1417; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1418; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
1419; GFX7-NEXT:    s_setpc_b64 s[30:31]
1420  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
1421  %element = extractelement <8 x i8> %vector, i32 4
1422  ret i8 %element
1423}
1424
1425define i8 @extractelement_vgpr_v8i8_idx5(<8 x i8> addrspace(1)* %ptr) {
1426; GFX9-LABEL: extractelement_vgpr_v8i8_idx5:
1427; GFX9:       ; %bb.0:
1428; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1429; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1430; GFX9-NEXT:    s_mov_b32 s4, 8
1431; GFX9-NEXT:    s_movk_i32 s5, 0xff
1432; GFX9-NEXT:    s_waitcnt vmcnt(0)
1433; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v1
1434; GFX9-NEXT:    v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1435; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v1
1436; GFX9-NEXT:    v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1437; GFX9-NEXT:    v_and_or_b32 v0, v1, s5, v0
1438; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
1439; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
1440; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
1441; GFX9-NEXT:    s_setpc_b64 s[30:31]
1442;
1443; GFX8-LABEL: extractelement_vgpr_v8i8_idx5:
1444; GFX8:       ; %bb.0:
1445; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1446; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
1447; GFX8-NEXT:    s_movk_i32 s4, 0xff
1448; GFX8-NEXT:    s_waitcnt vmcnt(0)
1449; GFX8-NEXT:    v_mov_b32_e32 v0, 8
1450; GFX8-NEXT:    v_mov_b32_e32 v2, s4
1451; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v1
1452; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1453; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v1
1454; GFX8-NEXT:    v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1455; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1456; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
1457; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
1458; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1459; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
1460; GFX8-NEXT:    s_setpc_b64 s[30:31]
1461;
1462; GFX7-LABEL: extractelement_vgpr_v8i8_idx5:
1463; GFX7:       ; %bb.0:
1464; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1465; GFX7-NEXT:    s_mov_b32 s6, 0
1466; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1467; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1468; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1469; GFX7-NEXT:    s_movk_i32 s4, 0xff
1470; GFX7-NEXT:    s_waitcnt vmcnt(0)
1471; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v1
1472; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
1473; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
1474; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
1475; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
1476; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
1477; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
1478; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
1479; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1480; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
1481; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1482; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
1483; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
1484; GFX7-NEXT:    s_setpc_b64 s[30:31]
1485  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
1486  %element = extractelement <8 x i8> %vector, i32 5
1487  ret i8 %element
1488}
1489
1490define i8 @extractelement_vgpr_v8i8_idx6(<8 x i8> addrspace(1)* %ptr) {
1491; GFX9-LABEL: extractelement_vgpr_v8i8_idx6:
1492; GFX9:       ; %bb.0:
1493; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1494; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1495; GFX9-NEXT:    s_mov_b32 s4, 8
1496; GFX9-NEXT:    s_movk_i32 s5, 0xff
1497; GFX9-NEXT:    s_waitcnt vmcnt(0)
1498; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v1
1499; GFX9-NEXT:    v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1500; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v1
1501; GFX9-NEXT:    v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1502; GFX9-NEXT:    v_and_or_b32 v0, v1, s5, v0
1503; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
1504; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
1505; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1506; GFX9-NEXT:    s_setpc_b64 s[30:31]
1507;
1508; GFX8-LABEL: extractelement_vgpr_v8i8_idx6:
1509; GFX8:       ; %bb.0:
1510; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1511; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
1512; GFX8-NEXT:    s_movk_i32 s4, 0xff
1513; GFX8-NEXT:    s_waitcnt vmcnt(0)
1514; GFX8-NEXT:    v_mov_b32_e32 v0, 8
1515; GFX8-NEXT:    v_mov_b32_e32 v2, s4
1516; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v1
1517; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1518; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v1
1519; GFX8-NEXT:    v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1520; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1521; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
1522; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
1523; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1524; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1525; GFX8-NEXT:    s_setpc_b64 s[30:31]
1526;
1527; GFX7-LABEL: extractelement_vgpr_v8i8_idx6:
1528; GFX7:       ; %bb.0:
1529; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1530; GFX7-NEXT:    s_mov_b32 s6, 0
1531; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1532; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1533; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1534; GFX7-NEXT:    s_movk_i32 s4, 0xff
1535; GFX7-NEXT:    s_waitcnt vmcnt(0)
1536; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v1
1537; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
1538; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
1539; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
1540; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
1541; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
1542; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
1543; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
1544; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1545; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
1546; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1547; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
1548; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1549; GFX7-NEXT:    s_setpc_b64 s[30:31]
1550  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
1551  %element = extractelement <8 x i8> %vector, i32 6
1552  ret i8 %element
1553}
1554
1555define i8 @extractelement_vgpr_v8i8_idx7(<8 x i8> addrspace(1)* %ptr) {
1556; GFX9-LABEL: extractelement_vgpr_v8i8_idx7:
1557; GFX9:       ; %bb.0:
1558; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1559; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1560; GFX9-NEXT:    s_mov_b32 s4, 8
1561; GFX9-NEXT:    s_movk_i32 s5, 0xff
1562; GFX9-NEXT:    s_waitcnt vmcnt(0)
1563; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v1
1564; GFX9-NEXT:    v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1565; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v1
1566; GFX9-NEXT:    v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1567; GFX9-NEXT:    v_and_or_b32 v0, v1, s5, v0
1568; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
1569; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
1570; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
1571; GFX9-NEXT:    s_setpc_b64 s[30:31]
1572;
1573; GFX8-LABEL: extractelement_vgpr_v8i8_idx7:
1574; GFX8:       ; %bb.0:
1575; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1576; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
1577; GFX8-NEXT:    s_movk_i32 s4, 0xff
1578; GFX8-NEXT:    s_waitcnt vmcnt(0)
1579; GFX8-NEXT:    v_mov_b32_e32 v0, 8
1580; GFX8-NEXT:    v_mov_b32_e32 v2, s4
1581; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v1
1582; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1583; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v1
1584; GFX8-NEXT:    v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1585; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1586; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
1587; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
1588; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1589; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
1590; GFX8-NEXT:    s_setpc_b64 s[30:31]
1591;
1592; GFX7-LABEL: extractelement_vgpr_v8i8_idx7:
1593; GFX7:       ; %bb.0:
1594; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1595; GFX7-NEXT:    s_mov_b32 s6, 0
1596; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1597; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1598; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1599; GFX7-NEXT:    s_movk_i32 s4, 0xff
1600; GFX7-NEXT:    s_waitcnt vmcnt(0)
1601; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v1
1602; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
1603; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
1604; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
1605; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
1606; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
1607; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
1608; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
1609; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1610; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
1611; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1612; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
1613; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
1614; GFX7-NEXT:    s_setpc_b64 s[30:31]
1615  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
1616  %element = extractelement <8 x i8> %vector, i32 7
1617  ret i8 %element
1618}
1619
1620define amdgpu_ps i8 @extractelement_sgpr_v16i8_sgpr_idx(<16 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) {
1621; GCN-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
1622; GCN:       ; %bb.0:
1623; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
1624; GCN-NEXT:    s_movk_i32 s17, 0xff
1625; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1626; GCN-NEXT:    s_lshr_b32 s5, s0, 8
1627; GCN-NEXT:    s_and_b32 s5, s5, s17
1628; GCN-NEXT:    s_lshr_b32 s6, s0, 16
1629; GCN-NEXT:    s_lshr_b32 s7, s0, 24
1630; GCN-NEXT:    s_and_b32 s0, s0, s17
1631; GCN-NEXT:    s_lshl_b32 s5, s5, 8
1632; GCN-NEXT:    s_or_b32 s0, s0, s5
1633; GCN-NEXT:    s_and_b32 s5, s6, s17
1634; GCN-NEXT:    s_lshl_b32 s5, s5, 16
1635; GCN-NEXT:    s_or_b32 s0, s0, s5
1636; GCN-NEXT:    s_lshl_b32 s5, s7, 24
1637; GCN-NEXT:    s_lshr_b32 s8, s1, 8
1638; GCN-NEXT:    s_or_b32 s0, s0, s5
1639; GCN-NEXT:    s_and_b32 s5, s8, s17
1640; GCN-NEXT:    s_lshr_b32 s9, s1, 16
1641; GCN-NEXT:    s_lshr_b32 s10, s1, 24
1642; GCN-NEXT:    s_and_b32 s1, s1, s17
1643; GCN-NEXT:    s_lshl_b32 s5, s5, 8
1644; GCN-NEXT:    s_or_b32 s1, s1, s5
1645; GCN-NEXT:    s_and_b32 s5, s9, s17
1646; GCN-NEXT:    s_lshl_b32 s5, s5, 16
1647; GCN-NEXT:    s_or_b32 s1, s1, s5
1648; GCN-NEXT:    s_lshl_b32 s5, s10, 24
1649; GCN-NEXT:    s_lshr_b32 s11, s2, 8
1650; GCN-NEXT:    s_or_b32 s1, s1, s5
1651; GCN-NEXT:    s_and_b32 s5, s11, s17
1652; GCN-NEXT:    s_lshr_b32 s12, s2, 16
1653; GCN-NEXT:    s_lshr_b32 s13, s2, 24
1654; GCN-NEXT:    s_and_b32 s2, s2, s17
1655; GCN-NEXT:    s_lshl_b32 s5, s5, 8
1656; GCN-NEXT:    s_or_b32 s2, s2, s5
1657; GCN-NEXT:    s_and_b32 s5, s12, s17
1658; GCN-NEXT:    s_lshl_b32 s5, s5, 16
1659; GCN-NEXT:    s_or_b32 s2, s2, s5
1660; GCN-NEXT:    s_lshl_b32 s5, s13, 24
1661; GCN-NEXT:    s_lshr_b32 s14, s3, 8
1662; GCN-NEXT:    s_or_b32 s2, s2, s5
1663; GCN-NEXT:    s_and_b32 s5, s14, s17
1664; GCN-NEXT:    s_lshr_b32 s15, s3, 16
1665; GCN-NEXT:    s_lshr_b32 s16, s3, 24
1666; GCN-NEXT:    s_and_b32 s3, s3, s17
1667; GCN-NEXT:    s_lshl_b32 s5, s5, 8
1668; GCN-NEXT:    s_or_b32 s3, s3, s5
1669; GCN-NEXT:    s_and_b32 s5, s15, s17
1670; GCN-NEXT:    s_lshl_b32 s5, s5, 16
1671; GCN-NEXT:    s_or_b32 s3, s3, s5
1672; GCN-NEXT:    s_lshl_b32 s5, s16, 24
1673; GCN-NEXT:    s_or_b32 s3, s3, s5
1674; GCN-NEXT:    s_lshr_b32 s5, s4, 2
1675; GCN-NEXT:    s_cmp_eq_u32 s5, 1
1676; GCN-NEXT:    s_cselect_b32 s0, s1, s0
1677; GCN-NEXT:    s_cmp_eq_u32 s5, 2
1678; GCN-NEXT:    s_cselect_b32 s0, s2, s0
1679; GCN-NEXT:    s_cmp_eq_u32 s5, 3
1680; GCN-NEXT:    s_cselect_b32 s0, s3, s0
1681; GCN-NEXT:    s_and_b32 s1, s4, 3
1682; GCN-NEXT:    s_lshl_b32 s1, s1, 3
1683; GCN-NEXT:    s_lshr_b32 s0, s0, s1
1684; GCN-NEXT:    ; return to shader part epilog
1685  %vector = load <16 x i8>, <16 x i8> addrspace(4)* %ptr
1686  %element = extractelement <16 x i8> %vector, i32 %idx
1687  ret i8 %element
1688}
1689
1690define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 inreg %idx) {
1691; GFX9-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
1692; GFX9:       ; %bb.0:
1693; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
1694; GFX9-NEXT:    s_mov_b32 s0, 8
1695; GFX9-NEXT:    v_mov_b32_e32 v5, 8
1696; GFX9-NEXT:    s_movk_i32 s1, 0xff
1697; GFX9-NEXT:    s_lshr_b32 s3, s2, 2
1698; GFX9-NEXT:    v_mov_b32_e32 v4, 0xff
1699; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 1
1700; GFX9-NEXT:    s_and_b32 s2, s2, 3
1701; GFX9-NEXT:    s_waitcnt vmcnt(0)
1702; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 8, v0
1703; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 8, v1
1704; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 24, v0
1705; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 24, v1
1706; GFX9-NEXT:    v_lshrrev_b32_e32 v10, 8, v2
1707; GFX9-NEXT:    v_lshlrev_b32_sdwa v6, s0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1708; GFX9-NEXT:    v_lshlrev_b32_sdwa v8, v5, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1709; GFX9-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
1710; GFX9-NEXT:    v_and_b32_sdwa v14, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1711; GFX9-NEXT:    v_and_b32_sdwa v15, v1, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1712; GFX9-NEXT:    v_lshrrev_b32_e32 v12, 8, v3
1713; GFX9-NEXT:    v_lshlrev_b32_sdwa v10, v5, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1714; GFX9-NEXT:    v_lshlrev_b32_e32 v7, 24, v7
1715; GFX9-NEXT:    v_and_or_b32 v0, v0, s1, v6
1716; GFX9-NEXT:    v_lshlrev_b32_e32 v9, 24, v9
1717; GFX9-NEXT:    v_and_or_b32 v1, v1, s1, v8
1718; GFX9-NEXT:    v_and_b32_sdwa v16, v2, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1719; GFX9-NEXT:    v_lshrrev_b32_e32 v13, 24, v3
1720; GFX9-NEXT:    v_lshlrev_b32_sdwa v5, v5, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1721; GFX9-NEXT:    v_lshlrev_b32_e32 v11, 24, v11
1722; GFX9-NEXT:    v_and_or_b32 v2, v2, s1, v10
1723; GFX9-NEXT:    v_or3_b32 v0, v0, v14, v7
1724; GFX9-NEXT:    v_or3_b32 v1, v1, v15, v9
1725; GFX9-NEXT:    v_and_b32_sdwa v17, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1726; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1727; GFX9-NEXT:    v_lshlrev_b32_e32 v12, 24, v13
1728; GFX9-NEXT:    v_and_or_b32 v3, v3, v4, v5
1729; GFX9-NEXT:    v_or3_b32 v2, v2, v16, v11
1730; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 2
1731; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1732; GFX9-NEXT:    v_or3_b32 v3, v3, v17, v12
1733; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 3
1734; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1735; GFX9-NEXT:    s_lshl_b32 s0, s2, 3
1736; GFX9-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
1737; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1738; GFX9-NEXT:    ; return to shader part epilog
1739;
1740; GFX8-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
1741; GFX8:       ; %bb.0:
1742; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
1743; GFX8-NEXT:    s_movk_i32 s0, 0xff
1744; GFX8-NEXT:    v_mov_b32_e32 v5, 8
1745; GFX8-NEXT:    v_mov_b32_e32 v6, 8
1746; GFX8-NEXT:    v_mov_b32_e32 v7, s0
1747; GFX8-NEXT:    v_mov_b32_e32 v4, 0xff
1748; GFX8-NEXT:    s_lshr_b32 s0, s2, 2
1749; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 1
1750; GFX8-NEXT:    s_and_b32 s1, s2, 3
1751; GFX8-NEXT:    s_waitcnt vmcnt(0)
1752; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 8, v0
1753; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 24, v0
1754; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 8, v1
1755; GFX8-NEXT:    v_lshlrev_b32_sdwa v5, v5, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1756; GFX8-NEXT:    v_lshlrev_b32_e32 v8, 24, v9
1757; GFX8-NEXT:    v_lshrrev_b32_e32 v11, 24, v1
1758; GFX8-NEXT:    v_lshlrev_b32_sdwa v9, v6, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1759; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 8, v2
1760; GFX8-NEXT:    v_and_b32_sdwa v16, v0, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1761; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1762; GFX8-NEXT:    v_and_b32_sdwa v7, v1, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1763; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1764; GFX8-NEXT:    v_lshlrev_b32_e32 v10, 24, v11
1765; GFX8-NEXT:    v_lshrrev_b32_e32 v14, 8, v3
1766; GFX8-NEXT:    v_lshlrev_b32_sdwa v11, v6, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1767; GFX8-NEXT:    v_or_b32_e32 v0, v0, v16
1768; GFX8-NEXT:    v_or_b32_e32 v1, v1, v7
1769; GFX8-NEXT:    v_lshrrev_b32_e32 v13, 24, v2
1770; GFX8-NEXT:    v_and_b32_sdwa v17, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1771; GFX8-NEXT:    v_or_b32_sdwa v2, v2, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1772; GFX8-NEXT:    v_lshlrev_b32_sdwa v6, v6, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1773; GFX8-NEXT:    v_lshrrev_b32_e32 v15, 24, v3
1774; GFX8-NEXT:    v_and_b32_sdwa v4, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1775; GFX8-NEXT:    v_or_b32_sdwa v3, v3, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1776; GFX8-NEXT:    v_lshlrev_b32_e32 v12, 24, v13
1777; GFX8-NEXT:    v_or_b32_e32 v2, v2, v17
1778; GFX8-NEXT:    v_or_b32_e32 v0, v0, v8
1779; GFX8-NEXT:    v_or_b32_e32 v1, v1, v10
1780; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1781; GFX8-NEXT:    v_lshlrev_b32_e32 v13, 24, v15
1782; GFX8-NEXT:    v_or_b32_e32 v3, v3, v4
1783; GFX8-NEXT:    v_or_b32_e32 v2, v2, v12
1784; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 2
1785; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1786; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 3
1787; GFX8-NEXT:    v_or_b32_e32 v3, v3, v13
1788; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1789; GFX8-NEXT:    s_lshl_b32 s0, s1, 3
1790; GFX8-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
1791; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1792; GFX8-NEXT:    ; return to shader part epilog
1793;
1794; GFX7-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
1795; GFX7:       ; %bb.0:
1796; GFX7-NEXT:    s_mov_b32 s6, 0
1797; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1798; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1799; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
1800; GFX7-NEXT:    s_movk_i32 s0, 0xff
1801; GFX7-NEXT:    v_mov_b32_e32 v4, 0xff
1802; GFX7-NEXT:    s_lshr_b32 s1, s2, 2
1803; GFX7-NEXT:    v_cmp_eq_u32_e64 vcc, s1, 1
1804; GFX7-NEXT:    s_and_b32 s2, s2, 3
1805; GFX7-NEXT:    s_waitcnt vmcnt(0)
1806; GFX7-NEXT:    v_lshrrev_b32_e32 v5, 8, v0
1807; GFX7-NEXT:    v_lshrrev_b32_e32 v8, 8, v1
1808; GFX7-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
1809; GFX7-NEXT:    v_lshrrev_b32_e32 v9, 16, v1
1810; GFX7-NEXT:    v_lshrrev_b32_e32 v11, 8, v2
1811; GFX7-NEXT:    v_and_b32_e32 v5, s0, v5
1812; GFX7-NEXT:    v_and_b32_e32 v8, s0, v8
1813; GFX7-NEXT:    v_lshrrev_b32_e32 v7, 24, v0
1814; GFX7-NEXT:    v_lshrrev_b32_e32 v10, 24, v1
1815; GFX7-NEXT:    v_lshrrev_b32_e32 v12, 16, v2
1816; GFX7-NEXT:    v_lshrrev_b32_e32 v14, 8, v3
1817; GFX7-NEXT:    v_and_b32_e32 v6, s0, v6
1818; GFX7-NEXT:    v_and_b32_e32 v9, s0, v9
1819; GFX7-NEXT:    v_and_b32_e32 v11, s0, v11
1820; GFX7-NEXT:    v_and_b32_e32 v0, s0, v0
1821; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 8, v5
1822; GFX7-NEXT:    v_and_b32_e32 v1, s0, v1
1823; GFX7-NEXT:    v_lshlrev_b32_e32 v8, 8, v8
1824; GFX7-NEXT:    v_lshrrev_b32_e32 v13, 24, v2
1825; GFX7-NEXT:    v_and_b32_e32 v12, v12, v4
1826; GFX7-NEXT:    v_and_b32_e32 v14, v14, v4
1827; GFX7-NEXT:    v_and_b32_e32 v2, s0, v2
1828; GFX7-NEXT:    v_lshlrev_b32_e32 v11, 8, v11
1829; GFX7-NEXT:    v_lshrrev_b32_e32 v15, 16, v3
1830; GFX7-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
1831; GFX7-NEXT:    v_or_b32_e32 v0, v0, v5
1832; GFX7-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
1833; GFX7-NEXT:    v_or_b32_e32 v1, v1, v8
1834; GFX7-NEXT:    v_lshrrev_b32_e32 v16, 24, v3
1835; GFX7-NEXT:    v_and_b32_e32 v3, v3, v4
1836; GFX7-NEXT:    v_and_b32_e32 v4, v15, v4
1837; GFX7-NEXT:    v_lshlrev_b32_e32 v14, 8, v14
1838; GFX7-NEXT:    v_lshlrev_b32_e32 v7, 24, v7
1839; GFX7-NEXT:    v_or_b32_e32 v0, v0, v6
1840; GFX7-NEXT:    v_lshlrev_b32_e32 v10, 24, v10
1841; GFX7-NEXT:    v_or_b32_e32 v1, v1, v9
1842; GFX7-NEXT:    v_lshlrev_b32_e32 v12, 16, v12
1843; GFX7-NEXT:    v_or_b32_e32 v2, v2, v11
1844; GFX7-NEXT:    v_lshlrev_b32_e32 v13, 24, v13
1845; GFX7-NEXT:    v_or_b32_e32 v2, v2, v12
1846; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
1847; GFX7-NEXT:    v_or_b32_e32 v3, v3, v14
1848; GFX7-NEXT:    v_or_b32_e32 v0, v0, v7
1849; GFX7-NEXT:    v_or_b32_e32 v1, v1, v10
1850; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1851; GFX7-NEXT:    v_lshlrev_b32_e32 v15, 24, v16
1852; GFX7-NEXT:    v_or_b32_e32 v3, v3, v4
1853; GFX7-NEXT:    v_or_b32_e32 v2, v2, v13
1854; GFX7-NEXT:    v_cmp_eq_u32_e64 vcc, s1, 2
1855; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1856; GFX7-NEXT:    v_or_b32_e32 v3, v3, v15
1857; GFX7-NEXT:    v_cmp_eq_u32_e64 vcc, s1, 3
1858; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1859; GFX7-NEXT:    s_lshl_b32 s0, s2, 3
1860; GFX7-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
1861; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
1862; GFX7-NEXT:    ; return to shader part epilog
1863  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
1864  %element = extractelement <16 x i8> %vector, i32 %idx
1865  ret i8 %element
1866}
1867
1868define i8 @extractelement_vgpr_v16i8_vgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 %idx) {
1869; GFX9-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
1870; GFX9:       ; %bb.0:
1871; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1872; GFX9-NEXT:    global_load_dwordx4 v[3:6], v[0:1], off
1873; GFX9-NEXT:    s_mov_b32 s4, 8
1874; GFX9-NEXT:    v_mov_b32_e32 v1, 8
1875; GFX9-NEXT:    s_movk_i32 s5, 0xff
1876; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 2, v2
1877; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
1878; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v7
1879; GFX9-NEXT:    v_and_b32_e32 v2, 3, v2
1880; GFX9-NEXT:    s_waitcnt vmcnt(0)
1881; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 8, v3
1882; GFX9-NEXT:    v_lshrrev_b32_e32 v10, 8, v4
1883; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 24, v3
1884; GFX9-NEXT:    v_lshrrev_b32_e32 v11, 24, v4
1885; GFX9-NEXT:    v_lshrrev_b32_e32 v12, 8, v5
1886; GFX9-NEXT:    v_lshrrev_b32_e32 v14, 8, v6
1887; GFX9-NEXT:    v_lshlrev_b32_sdwa v8, s4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1888; GFX9-NEXT:    v_lshlrev_b32_sdwa v10, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1889; GFX9-NEXT:    v_lshrrev_b32_e32 v13, 24, v5
1890; GFX9-NEXT:    v_and_b32_sdwa v16, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1891; GFX9-NEXT:    v_and_b32_sdwa v17, v4, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1892; GFX9-NEXT:    v_lshlrev_b32_sdwa v12, v1, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1893; GFX9-NEXT:    v_lshlrev_b32_e32 v9, 24, v9
1894; GFX9-NEXT:    v_lshlrev_b32_e32 v11, 24, v11
1895; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1896; GFX9-NEXT:    v_and_or_b32 v3, v3, s5, v8
1897; GFX9-NEXT:    v_and_or_b32 v4, v4, s5, v10
1898; GFX9-NEXT:    v_lshrrev_b32_e32 v15, 24, v6
1899; GFX9-NEXT:    v_and_b32_sdwa v18, v5, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1900; GFX9-NEXT:    v_and_b32_sdwa v19, v6, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1901; GFX9-NEXT:    v_and_or_b32 v0, v6, v0, v1
1902; GFX9-NEXT:    v_or3_b32 v1, v3, v16, v9
1903; GFX9-NEXT:    v_or3_b32 v3, v4, v17, v11
1904; GFX9-NEXT:    v_lshlrev_b32_e32 v13, 24, v13
1905; GFX9-NEXT:    v_and_or_b32 v5, v5, s5, v12
1906; GFX9-NEXT:    v_lshlrev_b32_e32 v14, 24, v15
1907; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1908; GFX9-NEXT:    v_or3_b32 v4, v5, v18, v13
1909; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v7
1910; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
1911; GFX9-NEXT:    v_or3_b32 v0, v0, v19, v14
1912; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v7
1913; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1914; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
1915; GFX9-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
1916; GFX9-NEXT:    s_setpc_b64 s[30:31]
1917;
1918; GFX8-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
1919; GFX8:       ; %bb.0:
1920; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1921; GFX8-NEXT:    flat_load_dwordx4 v[3:6], v[0:1]
1922; GFX8-NEXT:    s_movk_i32 s4, 0xff
1923; GFX8-NEXT:    v_mov_b32_e32 v1, 8
1924; GFX8-NEXT:    v_mov_b32_e32 v7, 8
1925; GFX8-NEXT:    v_mov_b32_e32 v8, s4
1926; GFX8-NEXT:    v_mov_b32_e32 v0, 0xff
1927; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 2, v2
1928; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v9
1929; GFX8-NEXT:    v_and_b32_e32 v2, 3, v2
1930; GFX8-NEXT:    s_waitcnt vmcnt(0)
1931; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 8, v3
1932; GFX8-NEXT:    v_lshrrev_b32_e32 v11, 24, v3
1933; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 8, v4
1934; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1935; GFX8-NEXT:    v_lshlrev_b32_e32 v10, 24, v11
1936; GFX8-NEXT:    v_lshrrev_b32_e32 v13, 24, v4
1937; GFX8-NEXT:    v_lshlrev_b32_sdwa v11, v7, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1938; GFX8-NEXT:    v_lshrrev_b32_e32 v14, 8, v5
1939; GFX8-NEXT:    v_and_b32_sdwa v18, v3, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1940; GFX8-NEXT:    v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1941; GFX8-NEXT:    v_lshlrev_b32_e32 v12, 24, v13
1942; GFX8-NEXT:    v_lshrrev_b32_e32 v16, 8, v6
1943; GFX8-NEXT:    v_lshlrev_b32_sdwa v13, v7, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1944; GFX8-NEXT:    v_and_b32_sdwa v8, v4, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1945; GFX8-NEXT:    v_or_b32_sdwa v3, v4, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1946; GFX8-NEXT:    v_or_b32_e32 v1, v1, v18
1947; GFX8-NEXT:    v_or_b32_e32 v3, v3, v8
1948; GFX8-NEXT:    v_lshrrev_b32_e32 v15, 24, v5
1949; GFX8-NEXT:    v_and_b32_sdwa v19, v5, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1950; GFX8-NEXT:    v_or_b32_sdwa v4, v5, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1951; GFX8-NEXT:    v_lshlrev_b32_sdwa v7, v7, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1952; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 24, v6
1953; GFX8-NEXT:    v_lshlrev_b32_e32 v14, 24, v15
1954; GFX8-NEXT:    v_or_b32_e32 v4, v4, v19
1955; GFX8-NEXT:    v_and_b32_sdwa v0, v6, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1956; GFX8-NEXT:    v_or_b32_sdwa v5, v6, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1957; GFX8-NEXT:    v_or_b32_e32 v1, v1, v10
1958; GFX8-NEXT:    v_or_b32_e32 v3, v3, v12
1959; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1960; GFX8-NEXT:    v_lshlrev_b32_e32 v15, 24, v17
1961; GFX8-NEXT:    v_or_b32_e32 v0, v5, v0
1962; GFX8-NEXT:    v_or_b32_e32 v4, v4, v14
1963; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v9
1964; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
1965; GFX8-NEXT:    v_or_b32_e32 v0, v0, v15
1966; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v9
1967; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1968; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
1969; GFX8-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
1970; GFX8-NEXT:    s_setpc_b64 s[30:31]
1971;
1972; GFX7-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
1973; GFX7:       ; %bb.0:
1974; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1975; GFX7-NEXT:    s_mov_b32 s6, 0
1976; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1977; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1978; GFX7-NEXT:    buffer_load_dwordx4 v[3:6], v[0:1], s[4:7], 0 addr64
1979; GFX7-NEXT:    s_movk_i32 s4, 0xff
1980; GFX7-NEXT:    v_mov_b32_e32 v0, 0xff
1981; GFX7-NEXT:    v_lshrrev_b32_e32 v18, 2, v2
1982; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v18
1983; GFX7-NEXT:    v_and_b32_e32 v2, 3, v2
1984; GFX7-NEXT:    s_waitcnt vmcnt(0)
1985; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 8, v3
1986; GFX7-NEXT:    v_lshrrev_b32_e32 v9, 8, v4
1987; GFX7-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
1988; GFX7-NEXT:    v_lshrrev_b32_e32 v10, 16, v4
1989; GFX7-NEXT:    v_lshrrev_b32_e32 v12, 8, v5
1990; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
1991; GFX7-NEXT:    v_and_b32_e32 v9, s4, v9
1992; GFX7-NEXT:    v_lshrrev_b32_e32 v8, 24, v3
1993; GFX7-NEXT:    v_lshrrev_b32_e32 v11, 24, v4
1994; GFX7-NEXT:    v_lshrrev_b32_e32 v13, 16, v5
1995; GFX7-NEXT:    v_lshrrev_b32_e32 v15, 8, v6
1996; GFX7-NEXT:    v_and_b32_e32 v7, s4, v7
1997; GFX7-NEXT:    v_and_b32_e32 v10, s4, v10
1998; GFX7-NEXT:    v_and_b32_e32 v12, s4, v12
1999; GFX7-NEXT:    v_and_b32_e32 v3, s4, v3
2000; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
2001; GFX7-NEXT:    v_and_b32_e32 v4, s4, v4
2002; GFX7-NEXT:    v_lshlrev_b32_e32 v9, 8, v9
2003; GFX7-NEXT:    v_lshrrev_b32_e32 v14, 24, v5
2004; GFX7-NEXT:    v_and_b32_e32 v13, v13, v0
2005; GFX7-NEXT:    v_and_b32_e32 v15, v15, v0
2006; GFX7-NEXT:    v_or_b32_e32 v1, v3, v1
2007; GFX7-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
2008; GFX7-NEXT:    v_lshrrev_b32_e32 v16, 16, v6
2009; GFX7-NEXT:    v_and_b32_e32 v5, s4, v5
2010; GFX7-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
2011; GFX7-NEXT:    v_or_b32_e32 v3, v4, v9
2012; GFX7-NEXT:    v_lshlrev_b32_e32 v12, 8, v12
2013; GFX7-NEXT:    v_lshrrev_b32_e32 v17, 24, v6
2014; GFX7-NEXT:    v_and_b32_e32 v6, v6, v0
2015; GFX7-NEXT:    v_and_b32_e32 v0, v16, v0
2016; GFX7-NEXT:    v_lshlrev_b32_e32 v8, 24, v8
2017; GFX7-NEXT:    v_or_b32_e32 v1, v1, v7
2018; GFX7-NEXT:    v_lshlrev_b32_e32 v11, 24, v11
2019; GFX7-NEXT:    v_or_b32_e32 v3, v3, v10
2020; GFX7-NEXT:    v_lshlrev_b32_e32 v13, 16, v13
2021; GFX7-NEXT:    v_or_b32_e32 v4, v5, v12
2022; GFX7-NEXT:    v_lshlrev_b32_e32 v15, 8, v15
2023; GFX7-NEXT:    v_lshlrev_b32_e32 v14, 24, v14
2024; GFX7-NEXT:    v_or_b32_e32 v4, v4, v13
2025; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
2026; GFX7-NEXT:    v_or_b32_e32 v5, v6, v15
2027; GFX7-NEXT:    v_or_b32_e32 v1, v1, v8
2028; GFX7-NEXT:    v_or_b32_e32 v3, v3, v11
2029; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2030; GFX7-NEXT:    v_lshlrev_b32_e32 v16, 24, v17
2031; GFX7-NEXT:    v_or_b32_e32 v0, v5, v0
2032; GFX7-NEXT:    v_or_b32_e32 v4, v4, v14
2033; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v18
2034; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
2035; GFX7-NEXT:    v_or_b32_e32 v0, v0, v16
2036; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v18
2037; GFX7-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
2038; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
2039; GFX7-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
2040; GFX7-NEXT:    s_setpc_b64 s[30:31]
2041  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
2042  %element = extractelement <16 x i8> %vector, i32 %idx
2043  ret i8 %element
2044}
2045
2046define amdgpu_ps i8 @extractelement_sgpr_v16i8_vgpr_idx(<16 x i8> addrspace(4)* inreg %ptr, i32 %idx) {
2047; GCN-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
2048; GCN:       ; %bb.0:
2049; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
2050; GCN-NEXT:    s_movk_i32 s16, 0xff
2051; GCN-NEXT:    v_lshrrev_b32_e32 v1, 2, v0
2052; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2053; GCN-NEXT:    v_and_b32_e32 v0, 3, v0
2054; GCN-NEXT:    s_waitcnt lgkmcnt(0)
2055; GCN-NEXT:    s_lshr_b32 s4, s0, 8
2056; GCN-NEXT:    s_and_b32 s4, s4, s16
2057; GCN-NEXT:    s_lshr_b32 s5, s0, 16
2058; GCN-NEXT:    s_lshr_b32 s6, s0, 24
2059; GCN-NEXT:    s_and_b32 s0, s0, s16
2060; GCN-NEXT:    s_lshl_b32 s4, s4, 8
2061; GCN-NEXT:    s_or_b32 s0, s0, s4
2062; GCN-NEXT:    s_and_b32 s4, s5, s16
2063; GCN-NEXT:    s_lshl_b32 s4, s4, 16
2064; GCN-NEXT:    s_or_b32 s0, s0, s4
2065; GCN-NEXT:    s_lshl_b32 s4, s6, 24
2066; GCN-NEXT:    s_lshr_b32 s7, s1, 8
2067; GCN-NEXT:    s_or_b32 s0, s0, s4
2068; GCN-NEXT:    s_and_b32 s4, s7, s16
2069; GCN-NEXT:    s_lshr_b32 s8, s1, 16
2070; GCN-NEXT:    s_lshr_b32 s9, s1, 24
2071; GCN-NEXT:    s_and_b32 s1, s1, s16
2072; GCN-NEXT:    s_lshl_b32 s4, s4, 8
2073; GCN-NEXT:    s_or_b32 s1, s1, s4
2074; GCN-NEXT:    s_and_b32 s4, s8, s16
2075; GCN-NEXT:    s_lshl_b32 s4, s4, 16
2076; GCN-NEXT:    s_or_b32 s1, s1, s4
2077; GCN-NEXT:    s_lshl_b32 s4, s9, 24
2078; GCN-NEXT:    s_lshr_b32 s10, s2, 8
2079; GCN-NEXT:    s_or_b32 s1, s1, s4
2080; GCN-NEXT:    s_and_b32 s4, s10, s16
2081; GCN-NEXT:    s_lshr_b32 s11, s2, 16
2082; GCN-NEXT:    s_lshr_b32 s12, s2, 24
2083; GCN-NEXT:    s_and_b32 s2, s2, s16
2084; GCN-NEXT:    s_lshl_b32 s4, s4, 8
2085; GCN-NEXT:    s_or_b32 s2, s2, s4
2086; GCN-NEXT:    s_and_b32 s4, s11, s16
2087; GCN-NEXT:    s_lshl_b32 s4, s4, 16
2088; GCN-NEXT:    s_or_b32 s2, s2, s4
2089; GCN-NEXT:    s_lshl_b32 s4, s12, 24
2090; GCN-NEXT:    s_lshr_b32 s13, s3, 8
2091; GCN-NEXT:    s_or_b32 s2, s2, s4
2092; GCN-NEXT:    s_and_b32 s4, s13, s16
2093; GCN-NEXT:    s_lshr_b32 s14, s3, 16
2094; GCN-NEXT:    s_lshr_b32 s15, s3, 24
2095; GCN-NEXT:    s_and_b32 s3, s3, s16
2096; GCN-NEXT:    s_lshl_b32 s4, s4, 8
2097; GCN-NEXT:    s_or_b32 s3, s3, s4
2098; GCN-NEXT:    s_and_b32 s4, s14, s16
2099; GCN-NEXT:    s_lshl_b32 s4, s4, 16
2100; GCN-NEXT:    s_or_b32 s3, s3, s4
2101; GCN-NEXT:    s_lshl_b32 s4, s15, 24
2102; GCN-NEXT:    v_mov_b32_e32 v2, s0
2103; GCN-NEXT:    v_mov_b32_e32 v3, s1
2104; GCN-NEXT:    s_or_b32 s3, s3, s4
2105; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
2106; GCN-NEXT:    v_mov_b32_e32 v4, s2
2107; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v1
2108; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2109; GCN-NEXT:    v_mov_b32_e32 v5, s3
2110; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v1
2111; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v5, vcc
2112; GCN-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
2113; GCN-NEXT:    v_lshrrev_b32_e32 v0, v0, v1
2114; GCN-NEXT:    v_readfirstlane_b32 s0, v0
2115; GCN-NEXT:    ; return to shader part epilog
2116  %vector = load <16 x i8>, <16 x i8> addrspace(4)* %ptr
2117  %element = extractelement <16 x i8> %vector, i32 %idx
2118  ret i8 %element
2119}
2120
2121define i8 @extractelement_vgpr_v16i8_idx0(<16 x i8> addrspace(1)* %ptr) {
2122; GFX9-LABEL: extractelement_vgpr_v16i8_idx0:
2123; GFX9:       ; %bb.0:
2124; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2125; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2126; GFX9-NEXT:    s_mov_b32 s4, 8
2127; GFX9-NEXT:    s_movk_i32 s5, 0xff
2128; GFX9-NEXT:    s_waitcnt vmcnt(0)
2129; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
2130; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
2131; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2132; GFX9-NEXT:    v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2133; GFX9-NEXT:    v_and_or_b32 v0, v0, s5, v1
2134; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
2135; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
2136; GFX9-NEXT:    s_setpc_b64 s[30:31]
2137;
2138; GFX8-LABEL: extractelement_vgpr_v16i8_idx0:
2139; GFX8:       ; %bb.0:
2140; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2141; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2142; GFX8-NEXT:    s_movk_i32 s4, 0xff
2143; GFX8-NEXT:    s_waitcnt vmcnt(0)
2144; GFX8-NEXT:    v_mov_b32_e32 v1, 8
2145; GFX8-NEXT:    v_mov_b32_e32 v2, s4
2146; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
2147; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2148; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
2149; GFX8-NEXT:    v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2150; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2151; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2152; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
2153; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2154; GFX8-NEXT:    s_setpc_b64 s[30:31]
2155;
2156; GFX7-LABEL: extractelement_vgpr_v16i8_idx0:
2157; GFX7:       ; %bb.0:
2158; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2159; GFX7-NEXT:    s_mov_b32 s6, 0
2160; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2161; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2162; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2163; GFX7-NEXT:    s_movk_i32 s4, 0xff
2164; GFX7-NEXT:    s_waitcnt vmcnt(0)
2165; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
2166; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
2167; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
2168; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
2169; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2170; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
2171; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
2172; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2173; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2174; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
2175; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2176; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2177; GFX7-NEXT:    s_setpc_b64 s[30:31]
2178  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
2179  %element = extractelement <16 x i8> %vector, i32 0
2180  ret i8 %element
2181}
2182
2183define i8 @extractelement_vgpr_v16i8_idx1(<16 x i8> addrspace(1)* %ptr) {
2184; GFX9-LABEL: extractelement_vgpr_v16i8_idx1:
2185; GFX9:       ; %bb.0:
2186; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2187; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2188; GFX9-NEXT:    s_mov_b32 s4, 8
2189; GFX9-NEXT:    s_movk_i32 s5, 0xff
2190; GFX9-NEXT:    s_waitcnt vmcnt(0)
2191; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
2192; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
2193; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2194; GFX9-NEXT:    v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2195; GFX9-NEXT:    v_and_or_b32 v0, v0, s5, v1
2196; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
2197; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
2198; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2199; GFX9-NEXT:    s_setpc_b64 s[30:31]
2200;
2201; GFX8-LABEL: extractelement_vgpr_v16i8_idx1:
2202; GFX8:       ; %bb.0:
2203; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2204; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2205; GFX8-NEXT:    s_movk_i32 s4, 0xff
2206; GFX8-NEXT:    s_waitcnt vmcnt(0)
2207; GFX8-NEXT:    v_mov_b32_e32 v1, 8
2208; GFX8-NEXT:    v_mov_b32_e32 v2, s4
2209; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
2210; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2211; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
2212; GFX8-NEXT:    v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2213; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2214; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2215; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
2216; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2217; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2218; GFX8-NEXT:    s_setpc_b64 s[30:31]
2219;
2220; GFX7-LABEL: extractelement_vgpr_v16i8_idx1:
2221; GFX7:       ; %bb.0:
2222; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2223; GFX7-NEXT:    s_mov_b32 s6, 0
2224; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2225; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2226; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2227; GFX7-NEXT:    s_movk_i32 s4, 0xff
2228; GFX7-NEXT:    s_waitcnt vmcnt(0)
2229; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
2230; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
2231; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
2232; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
2233; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2234; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
2235; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
2236; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2237; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2238; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
2239; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2240; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2241; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2242; GFX7-NEXT:    s_setpc_b64 s[30:31]
2243  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
2244  %element = extractelement <16 x i8> %vector, i32 1
2245  ret i8 %element
2246}
2247
2248define i8 @extractelement_vgpr_v16i8_idx2(<16 x i8> addrspace(1)* %ptr) {
2249; GFX9-LABEL: extractelement_vgpr_v16i8_idx2:
2250; GFX9:       ; %bb.0:
2251; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2252; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2253; GFX9-NEXT:    s_mov_b32 s4, 8
2254; GFX9-NEXT:    s_movk_i32 s5, 0xff
2255; GFX9-NEXT:    s_waitcnt vmcnt(0)
2256; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
2257; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
2258; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2259; GFX9-NEXT:    v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2260; GFX9-NEXT:    v_and_or_b32 v0, v0, s5, v1
2261; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
2262; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
2263; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2264; GFX9-NEXT:    s_setpc_b64 s[30:31]
2265;
2266; GFX8-LABEL: extractelement_vgpr_v16i8_idx2:
2267; GFX8:       ; %bb.0:
2268; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2269; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2270; GFX8-NEXT:    s_movk_i32 s4, 0xff
2271; GFX8-NEXT:    s_waitcnt vmcnt(0)
2272; GFX8-NEXT:    v_mov_b32_e32 v1, 8
2273; GFX8-NEXT:    v_mov_b32_e32 v2, s4
2274; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
2275; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2276; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
2277; GFX8-NEXT:    v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2278; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2279; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2280; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
2281; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2282; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2283; GFX8-NEXT:    s_setpc_b64 s[30:31]
2284;
2285; GFX7-LABEL: extractelement_vgpr_v16i8_idx2:
2286; GFX7:       ; %bb.0:
2287; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2288; GFX7-NEXT:    s_mov_b32 s6, 0
2289; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2290; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2291; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2292; GFX7-NEXT:    s_movk_i32 s4, 0xff
2293; GFX7-NEXT:    s_waitcnt vmcnt(0)
2294; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
2295; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
2296; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
2297; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
2298; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2299; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
2300; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
2301; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2302; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2303; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
2304; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2305; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2306; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2307; GFX7-NEXT:    s_setpc_b64 s[30:31]
2308  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
2309  %element = extractelement <16 x i8> %vector, i32 2
2310  ret i8 %element
2311}
2312
2313define i8 @extractelement_vgpr_v16i8_idx3(<16 x i8> addrspace(1)* %ptr) {
2314; GFX9-LABEL: extractelement_vgpr_v16i8_idx3:
2315; GFX9:       ; %bb.0:
2316; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2317; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2318; GFX9-NEXT:    s_mov_b32 s4, 8
2319; GFX9-NEXT:    s_movk_i32 s5, 0xff
2320; GFX9-NEXT:    s_waitcnt vmcnt(0)
2321; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
2322; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
2323; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2324; GFX9-NEXT:    v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2325; GFX9-NEXT:    v_and_or_b32 v0, v0, s5, v1
2326; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
2327; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
2328; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2329; GFX9-NEXT:    s_setpc_b64 s[30:31]
2330;
2331; GFX8-LABEL: extractelement_vgpr_v16i8_idx3:
2332; GFX8:       ; %bb.0:
2333; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2334; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2335; GFX8-NEXT:    s_movk_i32 s4, 0xff
2336; GFX8-NEXT:    s_waitcnt vmcnt(0)
2337; GFX8-NEXT:    v_mov_b32_e32 v1, 8
2338; GFX8-NEXT:    v_mov_b32_e32 v2, s4
2339; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
2340; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2341; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
2342; GFX8-NEXT:    v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2343; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2344; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2345; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
2346; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2347; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2348; GFX8-NEXT:    s_setpc_b64 s[30:31]
2349;
2350; GFX7-LABEL: extractelement_vgpr_v16i8_idx3:
2351; GFX7:       ; %bb.0:
2352; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2353; GFX7-NEXT:    s_mov_b32 s6, 0
2354; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2355; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2356; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2357; GFX7-NEXT:    s_movk_i32 s4, 0xff
2358; GFX7-NEXT:    s_waitcnt vmcnt(0)
2359; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
2360; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
2361; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
2362; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
2363; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2364; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
2365; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
2366; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2367; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2368; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
2369; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2370; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2371; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2372; GFX7-NEXT:    s_setpc_b64 s[30:31]
2373  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
2374  %element = extractelement <16 x i8> %vector, i32 3
2375  ret i8 %element
2376}
2377
2378define i8 @extractelement_vgpr_v16i8_idx4(<16 x i8> addrspace(1)* %ptr) {
2379; GFX9-LABEL: extractelement_vgpr_v16i8_idx4:
2380; GFX9:       ; %bb.0:
2381; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2382; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2383; GFX9-NEXT:    s_mov_b32 s4, 8
2384; GFX9-NEXT:    s_movk_i32 s5, 0xff
2385; GFX9-NEXT:    s_waitcnt vmcnt(0)
2386; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v1
2387; GFX9-NEXT:    v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2388; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v1
2389; GFX9-NEXT:    v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2390; GFX9-NEXT:    v_and_or_b32 v0, v1, s5, v0
2391; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
2392; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
2393; GFX9-NEXT:    s_setpc_b64 s[30:31]
2394;
2395; GFX8-LABEL: extractelement_vgpr_v16i8_idx4:
2396; GFX8:       ; %bb.0:
2397; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2398; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2399; GFX8-NEXT:    s_movk_i32 s4, 0xff
2400; GFX8-NEXT:    s_waitcnt vmcnt(0)
2401; GFX8-NEXT:    v_mov_b32_e32 v0, 8
2402; GFX8-NEXT:    v_mov_b32_e32 v2, s4
2403; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v1
2404; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2405; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v1
2406; GFX8-NEXT:    v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2407; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2408; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2409; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
2410; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2411; GFX8-NEXT:    s_setpc_b64 s[30:31]
2412;
2413; GFX7-LABEL: extractelement_vgpr_v16i8_idx4:
2414; GFX7:       ; %bb.0:
2415; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2416; GFX7-NEXT:    s_mov_b32 s6, 0
2417; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2418; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2419; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2420; GFX7-NEXT:    s_movk_i32 s4, 0xff
2421; GFX7-NEXT:    s_waitcnt vmcnt(0)
2422; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v1
2423; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
2424; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
2425; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
2426; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
2427; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
2428; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
2429; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
2430; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2431; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
2432; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2433; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2434; GFX7-NEXT:    s_setpc_b64 s[30:31]
2435  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
2436  %element = extractelement <16 x i8> %vector, i32 4
2437  ret i8 %element
2438}
2439
2440define i8 @extractelement_vgpr_v16i8_idx5(<16 x i8> addrspace(1)* %ptr) {
2441; GFX9-LABEL: extractelement_vgpr_v16i8_idx5:
2442; GFX9:       ; %bb.0:
2443; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2444; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2445; GFX9-NEXT:    s_mov_b32 s4, 8
2446; GFX9-NEXT:    s_movk_i32 s5, 0xff
2447; GFX9-NEXT:    s_waitcnt vmcnt(0)
2448; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v1
2449; GFX9-NEXT:    v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2450; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v1
2451; GFX9-NEXT:    v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2452; GFX9-NEXT:    v_and_or_b32 v0, v1, s5, v0
2453; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
2454; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
2455; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2456; GFX9-NEXT:    s_setpc_b64 s[30:31]
2457;
2458; GFX8-LABEL: extractelement_vgpr_v16i8_idx5:
2459; GFX8:       ; %bb.0:
2460; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2461; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2462; GFX8-NEXT:    s_movk_i32 s4, 0xff
2463; GFX8-NEXT:    s_waitcnt vmcnt(0)
2464; GFX8-NEXT:    v_mov_b32_e32 v0, 8
2465; GFX8-NEXT:    v_mov_b32_e32 v2, s4
2466; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v1
2467; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2468; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v1
2469; GFX8-NEXT:    v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2470; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2471; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2472; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
2473; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2474; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2475; GFX8-NEXT:    s_setpc_b64 s[30:31]
2476;
2477; GFX7-LABEL: extractelement_vgpr_v16i8_idx5:
2478; GFX7:       ; %bb.0:
2479; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2480; GFX7-NEXT:    s_mov_b32 s6, 0
2481; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2482; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2483; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2484; GFX7-NEXT:    s_movk_i32 s4, 0xff
2485; GFX7-NEXT:    s_waitcnt vmcnt(0)
2486; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v1
2487; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
2488; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
2489; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
2490; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
2491; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
2492; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
2493; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
2494; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2495; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
2496; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2497; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2498; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2499; GFX7-NEXT:    s_setpc_b64 s[30:31]
2500  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
2501  %element = extractelement <16 x i8> %vector, i32 5
2502  ret i8 %element
2503}
2504
2505define i8 @extractelement_vgpr_v16i8_idx6(<16 x i8> addrspace(1)* %ptr) {
2506; GFX9-LABEL: extractelement_vgpr_v16i8_idx6:
2507; GFX9:       ; %bb.0:
2508; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2509; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2510; GFX9-NEXT:    s_mov_b32 s4, 8
2511; GFX9-NEXT:    s_movk_i32 s5, 0xff
2512; GFX9-NEXT:    s_waitcnt vmcnt(0)
2513; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v1
2514; GFX9-NEXT:    v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2515; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v1
2516; GFX9-NEXT:    v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2517; GFX9-NEXT:    v_and_or_b32 v0, v1, s5, v0
2518; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
2519; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
2520; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2521; GFX9-NEXT:    s_setpc_b64 s[30:31]
2522;
2523; GFX8-LABEL: extractelement_vgpr_v16i8_idx6:
2524; GFX8:       ; %bb.0:
2525; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2526; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2527; GFX8-NEXT:    s_movk_i32 s4, 0xff
2528; GFX8-NEXT:    s_waitcnt vmcnt(0)
2529; GFX8-NEXT:    v_mov_b32_e32 v0, 8
2530; GFX8-NEXT:    v_mov_b32_e32 v2, s4
2531; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v1
2532; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2533; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v1
2534; GFX8-NEXT:    v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2535; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2536; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2537; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
2538; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2539; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2540; GFX8-NEXT:    s_setpc_b64 s[30:31]
2541;
2542; GFX7-LABEL: extractelement_vgpr_v16i8_idx6:
2543; GFX7:       ; %bb.0:
2544; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2545; GFX7-NEXT:    s_mov_b32 s6, 0
2546; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2547; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2548; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2549; GFX7-NEXT:    s_movk_i32 s4, 0xff
2550; GFX7-NEXT:    s_waitcnt vmcnt(0)
2551; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v1
2552; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
2553; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
2554; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
2555; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
2556; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
2557; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
2558; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
2559; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2560; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
2561; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2562; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2563; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2564; GFX7-NEXT:    s_setpc_b64 s[30:31]
2565  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
2566  %element = extractelement <16 x i8> %vector, i32 6
2567  ret i8 %element
2568}
2569
2570define i8 @extractelement_vgpr_v16i8_idx7(<16 x i8> addrspace(1)* %ptr) {
2571; GFX9-LABEL: extractelement_vgpr_v16i8_idx7:
2572; GFX9:       ; %bb.0:
2573; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2574; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2575; GFX9-NEXT:    s_mov_b32 s4, 8
2576; GFX9-NEXT:    s_movk_i32 s5, 0xff
2577; GFX9-NEXT:    s_waitcnt vmcnt(0)
2578; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v1
2579; GFX9-NEXT:    v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2580; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v1
2581; GFX9-NEXT:    v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2582; GFX9-NEXT:    v_and_or_b32 v0, v1, s5, v0
2583; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
2584; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
2585; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2586; GFX9-NEXT:    s_setpc_b64 s[30:31]
2587;
2588; GFX8-LABEL: extractelement_vgpr_v16i8_idx7:
2589; GFX8:       ; %bb.0:
2590; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2591; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2592; GFX8-NEXT:    s_movk_i32 s4, 0xff
2593; GFX8-NEXT:    s_waitcnt vmcnt(0)
2594; GFX8-NEXT:    v_mov_b32_e32 v0, 8
2595; GFX8-NEXT:    v_mov_b32_e32 v2, s4
2596; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v1
2597; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2598; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v1
2599; GFX8-NEXT:    v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2600; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2601; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2602; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
2603; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2604; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2605; GFX8-NEXT:    s_setpc_b64 s[30:31]
2606;
2607; GFX7-LABEL: extractelement_vgpr_v16i8_idx7:
2608; GFX7:       ; %bb.0:
2609; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2610; GFX7-NEXT:    s_mov_b32 s6, 0
2611; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2612; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2613; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2614; GFX7-NEXT:    s_movk_i32 s4, 0xff
2615; GFX7-NEXT:    s_waitcnt vmcnt(0)
2616; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v1
2617; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
2618; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
2619; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
2620; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
2621; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
2622; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
2623; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
2624; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2625; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
2626; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2627; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2628; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2629; GFX7-NEXT:    s_setpc_b64 s[30:31]
2630  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
2631  %element = extractelement <16 x i8> %vector, i32 7
2632  ret i8 %element
2633}
2634
2635define i8 @extractelement_vgpr_v16i8_idx8(<16 x i8> addrspace(1)* %ptr) {
2636; GFX9-LABEL: extractelement_vgpr_v16i8_idx8:
2637; GFX9:       ; %bb.0:
2638; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2639; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2640; GFX9-NEXT:    s_mov_b32 s4, 8
2641; GFX9-NEXT:    s_movk_i32 s5, 0xff
2642; GFX9-NEXT:    s_waitcnt vmcnt(0)
2643; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v2
2644; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 24, v2
2645; GFX9-NEXT:    v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2646; GFX9-NEXT:    v_and_b32_sdwa v3, v2, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2647; GFX9-NEXT:    v_and_or_b32 v0, v2, s5, v0
2648; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
2649; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
2650; GFX9-NEXT:    s_setpc_b64 s[30:31]
2651;
2652; GFX8-LABEL: extractelement_vgpr_v16i8_idx8:
2653; GFX8:       ; %bb.0:
2654; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2655; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2656; GFX8-NEXT:    s_movk_i32 s4, 0xff
2657; GFX8-NEXT:    s_waitcnt vmcnt(0)
2658; GFX8-NEXT:    v_mov_b32_e32 v0, 8
2659; GFX8-NEXT:    v_mov_b32_e32 v1, s4
2660; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v2
2661; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2662; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v2
2663; GFX8-NEXT:    v_and_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2664; GFX8-NEXT:    v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2665; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2666; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
2667; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2668; GFX8-NEXT:    s_setpc_b64 s[30:31]
2669;
2670; GFX7-LABEL: extractelement_vgpr_v16i8_idx8:
2671; GFX7:       ; %bb.0:
2672; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2673; GFX7-NEXT:    s_mov_b32 s6, 0
2674; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2675; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2676; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2677; GFX7-NEXT:    s_movk_i32 s4, 0xff
2678; GFX7-NEXT:    s_waitcnt vmcnt(0)
2679; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v2
2680; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
2681; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
2682; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
2683; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
2684; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
2685; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
2686; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
2687; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
2688; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2689; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2690; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2691; GFX7-NEXT:    s_setpc_b64 s[30:31]
2692  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
2693  %element = extractelement <16 x i8> %vector, i32 8
2694  ret i8 %element
2695}
2696
2697define i8 @extractelement_vgpr_v16i8_idx9(<16 x i8> addrspace(1)* %ptr) {
2698; GFX9-LABEL: extractelement_vgpr_v16i8_idx9:
2699; GFX9:       ; %bb.0:
2700; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2701; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2702; GFX9-NEXT:    s_mov_b32 s4, 8
2703; GFX9-NEXT:    s_movk_i32 s5, 0xff
2704; GFX9-NEXT:    s_waitcnt vmcnt(0)
2705; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v2
2706; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 24, v2
2707; GFX9-NEXT:    v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2708; GFX9-NEXT:    v_and_b32_sdwa v3, v2, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2709; GFX9-NEXT:    v_and_or_b32 v0, v2, s5, v0
2710; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
2711; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
2712; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2713; GFX9-NEXT:    s_setpc_b64 s[30:31]
2714;
2715; GFX8-LABEL: extractelement_vgpr_v16i8_idx9:
2716; GFX8:       ; %bb.0:
2717; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2718; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2719; GFX8-NEXT:    s_movk_i32 s4, 0xff
2720; GFX8-NEXT:    s_waitcnt vmcnt(0)
2721; GFX8-NEXT:    v_mov_b32_e32 v0, 8
2722; GFX8-NEXT:    v_mov_b32_e32 v1, s4
2723; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v2
2724; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2725; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v2
2726; GFX8-NEXT:    v_and_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2727; GFX8-NEXT:    v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2728; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2729; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
2730; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2731; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2732; GFX8-NEXT:    s_setpc_b64 s[30:31]
2733;
2734; GFX7-LABEL: extractelement_vgpr_v16i8_idx9:
2735; GFX7:       ; %bb.0:
2736; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2737; GFX7-NEXT:    s_mov_b32 s6, 0
2738; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2739; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2740; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2741; GFX7-NEXT:    s_movk_i32 s4, 0xff
2742; GFX7-NEXT:    s_waitcnt vmcnt(0)
2743; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v2
2744; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
2745; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
2746; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
2747; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
2748; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
2749; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
2750; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
2751; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
2752; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2753; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2754; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2755; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2756; GFX7-NEXT:    s_setpc_b64 s[30:31]
2757  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
2758  %element = extractelement <16 x i8> %vector, i32 9
2759  ret i8 %element
2760}
2761
2762define i8 @extractelement_vgpr_v16i8_idx10(<16 x i8> addrspace(1)* %ptr) {
2763; GFX9-LABEL: extractelement_vgpr_v16i8_idx10:
2764; GFX9:       ; %bb.0:
2765; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2766; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2767; GFX9-NEXT:    s_mov_b32 s4, 8
2768; GFX9-NEXT:    s_movk_i32 s5, 0xff
2769; GFX9-NEXT:    s_waitcnt vmcnt(0)
2770; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v2
2771; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 24, v2
2772; GFX9-NEXT:    v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2773; GFX9-NEXT:    v_and_b32_sdwa v3, v2, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2774; GFX9-NEXT:    v_and_or_b32 v0, v2, s5, v0
2775; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
2776; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
2777; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2778; GFX9-NEXT:    s_setpc_b64 s[30:31]
2779;
2780; GFX8-LABEL: extractelement_vgpr_v16i8_idx10:
2781; GFX8:       ; %bb.0:
2782; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2783; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2784; GFX8-NEXT:    s_movk_i32 s4, 0xff
2785; GFX8-NEXT:    s_waitcnt vmcnt(0)
2786; GFX8-NEXT:    v_mov_b32_e32 v0, 8
2787; GFX8-NEXT:    v_mov_b32_e32 v1, s4
2788; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v2
2789; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2790; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v2
2791; GFX8-NEXT:    v_and_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2792; GFX8-NEXT:    v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2793; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2794; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
2795; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2796; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2797; GFX8-NEXT:    s_setpc_b64 s[30:31]
2798;
2799; GFX7-LABEL: extractelement_vgpr_v16i8_idx10:
2800; GFX7:       ; %bb.0:
2801; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2802; GFX7-NEXT:    s_mov_b32 s6, 0
2803; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2804; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2805; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2806; GFX7-NEXT:    s_movk_i32 s4, 0xff
2807; GFX7-NEXT:    s_waitcnt vmcnt(0)
2808; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v2
2809; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
2810; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
2811; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
2812; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
2813; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
2814; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
2815; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
2816; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
2817; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2818; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2819; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2820; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2821; GFX7-NEXT:    s_setpc_b64 s[30:31]
2822  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
2823  %element = extractelement <16 x i8> %vector, i32 10
2824  ret i8 %element
2825}
2826
2827define i8 @extractelement_vgpr_v16i8_idx11(<16 x i8> addrspace(1)* %ptr) {
2828; GFX9-LABEL: extractelement_vgpr_v16i8_idx11:
2829; GFX9:       ; %bb.0:
2830; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2831; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2832; GFX9-NEXT:    s_mov_b32 s4, 8
2833; GFX9-NEXT:    s_movk_i32 s5, 0xff
2834; GFX9-NEXT:    s_waitcnt vmcnt(0)
2835; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v2
2836; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 24, v2
2837; GFX9-NEXT:    v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2838; GFX9-NEXT:    v_and_b32_sdwa v3, v2, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2839; GFX9-NEXT:    v_and_or_b32 v0, v2, s5, v0
2840; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
2841; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
2842; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2843; GFX9-NEXT:    s_setpc_b64 s[30:31]
2844;
2845; GFX8-LABEL: extractelement_vgpr_v16i8_idx11:
2846; GFX8:       ; %bb.0:
2847; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2848; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2849; GFX8-NEXT:    s_movk_i32 s4, 0xff
2850; GFX8-NEXT:    s_waitcnt vmcnt(0)
2851; GFX8-NEXT:    v_mov_b32_e32 v0, 8
2852; GFX8-NEXT:    v_mov_b32_e32 v1, s4
2853; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v2
2854; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2855; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v2
2856; GFX8-NEXT:    v_and_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2857; GFX8-NEXT:    v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2858; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2859; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
2860; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2861; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2862; GFX8-NEXT:    s_setpc_b64 s[30:31]
2863;
2864; GFX7-LABEL: extractelement_vgpr_v16i8_idx11:
2865; GFX7:       ; %bb.0:
2866; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2867; GFX7-NEXT:    s_mov_b32 s6, 0
2868; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2869; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2870; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2871; GFX7-NEXT:    s_movk_i32 s4, 0xff
2872; GFX7-NEXT:    s_waitcnt vmcnt(0)
2873; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v2
2874; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
2875; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
2876; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
2877; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
2878; GFX7-NEXT:    v_and_b32_e32 v2, s4, v2
2879; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
2880; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
2881; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
2882; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2883; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2884; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2885; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2886; GFX7-NEXT:    s_setpc_b64 s[30:31]
2887  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
2888  %element = extractelement <16 x i8> %vector, i32 11
2889  ret i8 %element
2890}
2891
2892define i8 @extractelement_vgpr_v16i8_idx12(<16 x i8> addrspace(1)* %ptr) {
2893; GFX9-LABEL: extractelement_vgpr_v16i8_idx12:
2894; GFX9:       ; %bb.0:
2895; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2896; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2897; GFX9-NEXT:    s_mov_b32 s4, 8
2898; GFX9-NEXT:    s_movk_i32 s5, 0xff
2899; GFX9-NEXT:    s_waitcnt vmcnt(0)
2900; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v3
2901; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 24, v3
2902; GFX9-NEXT:    v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2903; GFX9-NEXT:    v_and_b32_sdwa v2, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2904; GFX9-NEXT:    v_and_or_b32 v0, v3, s5, v0
2905; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
2906; GFX9-NEXT:    v_or3_b32 v0, v0, v2, v1
2907; GFX9-NEXT:    s_setpc_b64 s[30:31]
2908;
2909; GFX8-LABEL: extractelement_vgpr_v16i8_idx12:
2910; GFX8:       ; %bb.0:
2911; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2912; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2913; GFX8-NEXT:    s_movk_i32 s4, 0xff
2914; GFX8-NEXT:    s_waitcnt vmcnt(0)
2915; GFX8-NEXT:    v_mov_b32_e32 v0, 8
2916; GFX8-NEXT:    v_mov_b32_e32 v1, s4
2917; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 8, v3
2918; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2919; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v3
2920; GFX8-NEXT:    v_and_b32_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2921; GFX8-NEXT:    v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2922; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2923; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
2924; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2925; GFX8-NEXT:    s_setpc_b64 s[30:31]
2926;
2927; GFX7-LABEL: extractelement_vgpr_v16i8_idx12:
2928; GFX7:       ; %bb.0:
2929; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2930; GFX7-NEXT:    s_mov_b32 s6, 0
2931; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2932; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2933; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2934; GFX7-NEXT:    s_movk_i32 s4, 0xff
2935; GFX7-NEXT:    s_waitcnt vmcnt(0)
2936; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v3
2937; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
2938; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
2939; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
2940; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
2941; GFX7-NEXT:    v_and_b32_e32 v3, s4, v3
2942; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
2943; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
2944; GFX7-NEXT:    v_or_b32_e32 v0, v3, v0
2945; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2946; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
2947; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2948; GFX7-NEXT:    s_setpc_b64 s[30:31]
2949  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
2950  %element = extractelement <16 x i8> %vector, i32 12
2951  ret i8 %element
2952}
2953
2954define i8 @extractelement_vgpr_v16i8_idx13(<16 x i8> addrspace(1)* %ptr) {
2955; GFX9-LABEL: extractelement_vgpr_v16i8_idx13:
2956; GFX9:       ; %bb.0:
2957; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2958; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2959; GFX9-NEXT:    s_mov_b32 s4, 8
2960; GFX9-NEXT:    s_movk_i32 s5, 0xff
2961; GFX9-NEXT:    s_waitcnt vmcnt(0)
2962; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v3
2963; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 24, v3
2964; GFX9-NEXT:    v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2965; GFX9-NEXT:    v_and_b32_sdwa v2, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2966; GFX9-NEXT:    v_and_or_b32 v0, v3, s5, v0
2967; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
2968; GFX9-NEXT:    v_or3_b32 v0, v0, v2, v1
2969; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2970; GFX9-NEXT:    s_setpc_b64 s[30:31]
2971;
2972; GFX8-LABEL: extractelement_vgpr_v16i8_idx13:
2973; GFX8:       ; %bb.0:
2974; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2975; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2976; GFX8-NEXT:    s_movk_i32 s4, 0xff
2977; GFX8-NEXT:    s_waitcnt vmcnt(0)
2978; GFX8-NEXT:    v_mov_b32_e32 v0, 8
2979; GFX8-NEXT:    v_mov_b32_e32 v1, s4
2980; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 8, v3
2981; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
2982; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v3
2983; GFX8-NEXT:    v_and_b32_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2984; GFX8-NEXT:    v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2985; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2986; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
2987; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2988; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2989; GFX8-NEXT:    s_setpc_b64 s[30:31]
2990;
2991; GFX7-LABEL: extractelement_vgpr_v16i8_idx13:
2992; GFX7:       ; %bb.0:
2993; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2994; GFX7-NEXT:    s_mov_b32 s6, 0
2995; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2996; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2997; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2998; GFX7-NEXT:    s_movk_i32 s4, 0xff
2999; GFX7-NEXT:    s_waitcnt vmcnt(0)
3000; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v3
3001; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
3002; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
3003; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
3004; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
3005; GFX7-NEXT:    v_and_b32_e32 v3, s4, v3
3006; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
3007; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3008; GFX7-NEXT:    v_or_b32_e32 v0, v3, v0
3009; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
3010; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
3011; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
3012; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
3013; GFX7-NEXT:    s_setpc_b64 s[30:31]
3014  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
3015  %element = extractelement <16 x i8> %vector, i32 13
3016  ret i8 %element
3017}
3018
3019define i8 @extractelement_vgpr_v16i8_idx14(<16 x i8> addrspace(1)* %ptr) {
3020; GFX9-LABEL: extractelement_vgpr_v16i8_idx14:
3021; GFX9:       ; %bb.0:
3022; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3023; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3024; GFX9-NEXT:    s_mov_b32 s4, 8
3025; GFX9-NEXT:    s_movk_i32 s5, 0xff
3026; GFX9-NEXT:    s_waitcnt vmcnt(0)
3027; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v3
3028; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 24, v3
3029; GFX9-NEXT:    v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
3030; GFX9-NEXT:    v_and_b32_sdwa v2, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
3031; GFX9-NEXT:    v_and_or_b32 v0, v3, s5, v0
3032; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
3033; GFX9-NEXT:    v_or3_b32 v0, v0, v2, v1
3034; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3035; GFX9-NEXT:    s_setpc_b64 s[30:31]
3036;
3037; GFX8-LABEL: extractelement_vgpr_v16i8_idx14:
3038; GFX8:       ; %bb.0:
3039; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3040; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3041; GFX8-NEXT:    s_movk_i32 s4, 0xff
3042; GFX8-NEXT:    s_waitcnt vmcnt(0)
3043; GFX8-NEXT:    v_mov_b32_e32 v0, 8
3044; GFX8-NEXT:    v_mov_b32_e32 v1, s4
3045; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 8, v3
3046; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
3047; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v3
3048; GFX8-NEXT:    v_and_b32_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
3049; GFX8-NEXT:    v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3050; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3051; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
3052; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3053; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3054; GFX8-NEXT:    s_setpc_b64 s[30:31]
3055;
3056; GFX7-LABEL: extractelement_vgpr_v16i8_idx14:
3057; GFX7:       ; %bb.0:
3058; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3059; GFX7-NEXT:    s_mov_b32 s6, 0
3060; GFX7-NEXT:    s_mov_b32 s7, 0xf000
3061; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3062; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
3063; GFX7-NEXT:    s_movk_i32 s4, 0xff
3064; GFX7-NEXT:    s_waitcnt vmcnt(0)
3065; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v3
3066; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
3067; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
3068; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
3069; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
3070; GFX7-NEXT:    v_and_b32_e32 v3, s4, v3
3071; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
3072; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3073; GFX7-NEXT:    v_or_b32_e32 v0, v3, v0
3074; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
3075; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
3076; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
3077; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3078; GFX7-NEXT:    s_setpc_b64 s[30:31]
3079  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
3080  %element = extractelement <16 x i8> %vector, i32 14
3081  ret i8 %element
3082}
3083
3084define i8 @extractelement_vgpr_v16i8_idx15(<16 x i8> addrspace(1)* %ptr) {
3085; GFX9-LABEL: extractelement_vgpr_v16i8_idx15:
3086; GFX9:       ; %bb.0:
3087; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3088; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3089; GFX9-NEXT:    s_mov_b32 s4, 8
3090; GFX9-NEXT:    s_movk_i32 s5, 0xff
3091; GFX9-NEXT:    s_waitcnt vmcnt(0)
3092; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v3
3093; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 24, v3
3094; GFX9-NEXT:    v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
3095; GFX9-NEXT:    v_and_b32_sdwa v2, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
3096; GFX9-NEXT:    v_and_or_b32 v0, v3, s5, v0
3097; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
3098; GFX9-NEXT:    v_or3_b32 v0, v0, v2, v1
3099; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3100; GFX9-NEXT:    s_setpc_b64 s[30:31]
3101;
3102; GFX8-LABEL: extractelement_vgpr_v16i8_idx15:
3103; GFX8:       ; %bb.0:
3104; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3105; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3106; GFX8-NEXT:    s_movk_i32 s4, 0xff
3107; GFX8-NEXT:    s_waitcnt vmcnt(0)
3108; GFX8-NEXT:    v_mov_b32_e32 v0, 8
3109; GFX8-NEXT:    v_mov_b32_e32 v1, s4
3110; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 8, v3
3111; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
3112; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v3
3113; GFX8-NEXT:    v_and_b32_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
3114; GFX8-NEXT:    v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3115; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3116; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
3117; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3118; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3119; GFX8-NEXT:    s_setpc_b64 s[30:31]
3120;
3121; GFX7-LABEL: extractelement_vgpr_v16i8_idx15:
3122; GFX7:       ; %bb.0:
3123; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3124; GFX7-NEXT:    s_mov_b32 s6, 0
3125; GFX7-NEXT:    s_mov_b32 s7, 0xf000
3126; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3127; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
3128; GFX7-NEXT:    s_movk_i32 s4, 0xff
3129; GFX7-NEXT:    s_waitcnt vmcnt(0)
3130; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v3
3131; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
3132; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
3133; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
3134; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
3135; GFX7-NEXT:    v_and_b32_e32 v3, s4, v3
3136; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
3137; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3138; GFX7-NEXT:    v_or_b32_e32 v0, v3, v0
3139; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
3140; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
3141; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
3142; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3143; GFX7-NEXT:    s_setpc_b64 s[30:31]
3144  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
3145  %element = extractelement <16 x i8> %vector, i32 15
3146  ret i8 %element
3147}
3148