1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s
5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
6
7define amdgpu_ps i8 @extractelement_sgpr_v4i8_sgpr_idx(<4 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) {
8; GCN-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
9; GCN:       ; %bb.0:
10; GCN-NEXT:    s_load_dword s0, s[2:3], 0x0
11; GCN-NEXT:    s_waitcnt lgkmcnt(0)
12; GCN-NEXT:    s_bfe_u32 s3, s0, 0x80008
13; GCN-NEXT:    s_lshr_b32 s1, s0, 24
14; GCN-NEXT:    s_and_b32 s2, s0, 0xff
15; GCN-NEXT:    s_lshl_b32 s3, s3, 8
16; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
17; GCN-NEXT:    s_or_b32 s2, s2, s3
18; GCN-NEXT:    s_lshl_b32 s0, s0, 16
19; GCN-NEXT:    s_or_b32 s0, s2, s0
20; GCN-NEXT:    s_lshl_b32 s1, s1, 24
21; GCN-NEXT:    s_or_b32 s0, s0, s1
22; GCN-NEXT:    s_and_b32 s1, s4, 3
23; GCN-NEXT:    s_lshl_b32 s1, s1, 3
24; GCN-NEXT:    s_lshr_b32 s0, s0, s1
25; GCN-NEXT:    ; return to shader part epilog
26;
27; GFX10-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
28; GFX10:       ; %bb.0:
29; GFX10-NEXT:    s_load_dword s0, s[2:3], 0x0
30; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
31; GFX10-NEXT:    s_bfe_u32 s3, s0, 0x80008
32; GFX10-NEXT:    s_lshr_b32 s1, s0, 24
33; GFX10-NEXT:    s_and_b32 s2, s0, 0xff
34; GFX10-NEXT:    s_bfe_u32 s0, s0, 0x80010
35; GFX10-NEXT:    s_lshl_b32 s3, s3, 8
36; GFX10-NEXT:    s_lshl_b32 s0, s0, 16
37; GFX10-NEXT:    s_or_b32 s2, s2, s3
38; GFX10-NEXT:    s_lshl_b32 s1, s1, 24
39; GFX10-NEXT:    s_or_b32 s0, s2, s0
40; GFX10-NEXT:    s_and_b32 s2, s4, 3
41; GFX10-NEXT:    s_or_b32 s0, s0, s1
42; GFX10-NEXT:    s_lshl_b32 s1, s2, 3
43; GFX10-NEXT:    s_lshr_b32 s0, s0, s1
44; GFX10-NEXT:    ; return to shader part epilog
45  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
46  %element = extractelement <4 x i8> %vector, i32 %idx
47  ret i8 %element
48}
49
50define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 inreg %idx) {
51; GFX9-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
52; GFX9:       ; %bb.0:
53; GFX9-NEXT:    global_load_dword v0, v[0:1], off
54; GFX9-NEXT:    v_mov_b32_e32 v2, 8
55; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
56; GFX9-NEXT:    v_mov_b32_e32 v3, 16
57; GFX9-NEXT:    s_and_b32 s0, s2, 3
58; GFX9-NEXT:    s_lshl_b32 s0, s0, 3
59; GFX9-NEXT:    s_waitcnt vmcnt(0)
60; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
61; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
62; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
63; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v2
64; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
65; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
66; GFX9-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
67; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
68; GFX9-NEXT:    ; return to shader part epilog
69;
70; GFX8-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
71; GFX8:       ; %bb.0:
72; GFX8-NEXT:    flat_load_dword v0, v[0:1]
73; GFX8-NEXT:    v_mov_b32_e32 v1, 8
74; GFX8-NEXT:    v_mov_b32_e32 v2, 16
75; GFX8-NEXT:    s_and_b32 s0, s2, 3
76; GFX8-NEXT:    s_lshl_b32 s0, s0, 3
77; GFX8-NEXT:    s_waitcnt vmcnt(0)
78; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
79; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
80; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
81; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
82; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
83; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
84; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
85; GFX8-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
86; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
87; GFX8-NEXT:    ; return to shader part epilog
88;
89; GFX7-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
90; GFX7:       ; %bb.0:
91; GFX7-NEXT:    s_mov_b32 s6, 0
92; GFX7-NEXT:    s_mov_b32 s7, 0xf000
93; GFX7-NEXT:    s_mov_b64 s[4:5], 0
94; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
95; GFX7-NEXT:    s_and_b32 s0, s2, 3
96; GFX7-NEXT:    s_lshl_b32 s0, s0, 3
97; GFX7-NEXT:    s_waitcnt vmcnt(0)
98; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
99; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
100; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
101; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
102; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
103; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
104; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
105; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
106; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
107; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
108; GFX7-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
109; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
110; GFX7-NEXT:    ; return to shader part epilog
111;
112; GFX10-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
113; GFX10:       ; %bb.0:
114; GFX10-NEXT:    global_load_dword v0, v[0:1], off
115; GFX10-NEXT:    v_mov_b32_e32 v1, 8
116; GFX10-NEXT:    v_mov_b32_e32 v2, 16
117; GFX10-NEXT:    s_and_b32 s0, s2, 3
118; GFX10-NEXT:    s_lshl_b32 s0, s0, 3
119; GFX10-NEXT:    s_waitcnt vmcnt(0)
120; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
121; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
122; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
123; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
124; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
125; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
126; GFX10-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
127; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
128; GFX10-NEXT:    ; return to shader part epilog
129  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
130  %element = extractelement <4 x i8> %vector, i32 %idx
131  ret i8 %element
132}
133
134define i8 @extractelement_vgpr_v4i8_vgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 %idx) {
135; GFX9-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
136; GFX9:       ; %bb.0:
137; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138; GFX9-NEXT:    global_load_dword v0, v[0:1], off
139; GFX9-NEXT:    v_mov_b32_e32 v3, 8
140; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
141; GFX9-NEXT:    v_mov_b32_e32 v4, 16
142; GFX9-NEXT:    v_and_b32_e32 v2, 3, v2
143; GFX9-NEXT:    s_waitcnt vmcnt(0)
144; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 24, v0
145; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
146; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
147; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v3
148; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v5
149; GFX9-NEXT:    v_or3_b32 v0, v0, v4, v1
150; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
151; GFX9-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
152; GFX9-NEXT:    s_setpc_b64 s[30:31]
153;
154; GFX8-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
155; GFX8:       ; %bb.0:
156; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157; GFX8-NEXT:    flat_load_dword v0, v[0:1]
158; GFX8-NEXT:    v_mov_b32_e32 v1, 8
159; GFX8-NEXT:    v_mov_b32_e32 v3, 16
160; GFX8-NEXT:    v_and_b32_e32 v2, 3, v2
161; GFX8-NEXT:    s_waitcnt vmcnt(0)
162; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
163; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
164; GFX8-NEXT:    v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
165; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
166; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
167; GFX8-NEXT:    v_or_b32_e32 v0, v0, v3
168; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
169; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
170; GFX8-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
171; GFX8-NEXT:    s_setpc_b64 s[30:31]
172;
173; GFX7-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
174; GFX7:       ; %bb.0:
175; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176; GFX7-NEXT:    s_mov_b32 s6, 0
177; GFX7-NEXT:    s_mov_b32 s7, 0xf000
178; GFX7-NEXT:    s_mov_b64 s[4:5], 0
179; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
180; GFX7-NEXT:    v_and_b32_e32 v1, 3, v2
181; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 3, v1
182; GFX7-NEXT:    s_waitcnt vmcnt(0)
183; GFX7-NEXT:    v_bfe_u32 v4, v0, 8, 8
184; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
185; GFX7-NEXT:    v_and_b32_e32 v3, 0xff, v0
186; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
187; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
188; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
189; GFX7-NEXT:    v_or_b32_e32 v3, v3, v4
190; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
191; GFX7-NEXT:    v_or_b32_e32 v0, v3, v0
192; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
193; GFX7-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
194; GFX7-NEXT:    s_setpc_b64 s[30:31]
195;
196; GFX10-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
197; GFX10:       ; %bb.0:
198; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
199; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
200; GFX10-NEXT:    global_load_dword v0, v[0:1], off
201; GFX10-NEXT:    v_mov_b32_e32 v1, 8
202; GFX10-NEXT:    v_mov_b32_e32 v3, 16
203; GFX10-NEXT:    v_and_b32_e32 v2, 3, v2
204; GFX10-NEXT:    s_waitcnt vmcnt(0)
205; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
206; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
207; GFX10-NEXT:    v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
208; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
209; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
210; GFX10-NEXT:    v_or3_b32 v0, v0, v3, v1
211; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
212; GFX10-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
213; GFX10-NEXT:    s_setpc_b64 s[30:31]
214  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
215  %element = extractelement <4 x i8> %vector, i32 %idx
216  ret i8 %element
217}
218
219define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(<4 x i8> addrspace(4)* inreg %ptr, i32 %idx) {
220; GFX9-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
221; GFX9:       ; %bb.0:
222; GFX9-NEXT:    s_load_dword s0, s[2:3], 0x0
223; GFX9-NEXT:    v_and_b32_e32 v0, 3, v0
224; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
225; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
226; GFX9-NEXT:    s_bfe_u32 s3, s0, 0x80008
227; GFX9-NEXT:    s_lshr_b32 s1, s0, 24
228; GFX9-NEXT:    s_and_b32 s2, s0, 0xff
229; GFX9-NEXT:    s_lshl_b32 s3, s3, 8
230; GFX9-NEXT:    s_bfe_u32 s0, s0, 0x80010
231; GFX9-NEXT:    s_or_b32 s2, s2, s3
232; GFX9-NEXT:    s_lshl_b32 s0, s0, 16
233; GFX9-NEXT:    s_or_b32 s0, s2, s0
234; GFX9-NEXT:    s_lshl_b32 s1, s1, 24
235; GFX9-NEXT:    s_or_b32 s0, s0, s1
236; GFX9-NEXT:    v_lshrrev_b32_e64 v0, v0, s0
237; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
238; GFX9-NEXT:    ; return to shader part epilog
239;
240; GFX8-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
241; GFX8:       ; %bb.0:
242; GFX8-NEXT:    s_load_dword s0, s[2:3], 0x0
243; GFX8-NEXT:    v_and_b32_e32 v0, 3, v0
244; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
245; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
246; GFX8-NEXT:    s_bfe_u32 s3, s0, 0x80008
247; GFX8-NEXT:    s_lshr_b32 s1, s0, 24
248; GFX8-NEXT:    s_and_b32 s2, s0, 0xff
249; GFX8-NEXT:    s_lshl_b32 s3, s3, 8
250; GFX8-NEXT:    s_bfe_u32 s0, s0, 0x80010
251; GFX8-NEXT:    s_or_b32 s2, s2, s3
252; GFX8-NEXT:    s_lshl_b32 s0, s0, 16
253; GFX8-NEXT:    s_or_b32 s0, s2, s0
254; GFX8-NEXT:    s_lshl_b32 s1, s1, 24
255; GFX8-NEXT:    s_or_b32 s0, s0, s1
256; GFX8-NEXT:    v_lshrrev_b32_e64 v0, v0, s0
257; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
258; GFX8-NEXT:    ; return to shader part epilog
259;
260; GFX7-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
261; GFX7:       ; %bb.0:
262; GFX7-NEXT:    s_load_dword s0, s[2:3], 0x0
263; GFX7-NEXT:    v_and_b32_e32 v0, 3, v0
264; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
265; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
266; GFX7-NEXT:    s_bfe_u32 s3, s0, 0x80008
267; GFX7-NEXT:    s_lshr_b32 s1, s0, 24
268; GFX7-NEXT:    s_and_b32 s2, s0, 0xff
269; GFX7-NEXT:    s_lshl_b32 s3, s3, 8
270; GFX7-NEXT:    s_bfe_u32 s0, s0, 0x80010
271; GFX7-NEXT:    s_or_b32 s2, s2, s3
272; GFX7-NEXT:    s_lshl_b32 s0, s0, 16
273; GFX7-NEXT:    s_or_b32 s0, s2, s0
274; GFX7-NEXT:    s_lshl_b32 s1, s1, 24
275; GFX7-NEXT:    s_or_b32 s0, s0, s1
276; GFX7-NEXT:    v_lshr_b32_e32 v0, s0, v0
277; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
278; GFX7-NEXT:    ; return to shader part epilog
279;
280; GFX10-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
281; GFX10:       ; %bb.0:
282; GFX10-NEXT:    s_load_dword s0, s[2:3], 0x0
283; GFX10-NEXT:    v_and_b32_e32 v0, 3, v0
284; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
285; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
286; GFX10-NEXT:    s_bfe_u32 s2, s0, 0x80008
287; GFX10-NEXT:    s_and_b32 s1, s0, 0xff
288; GFX10-NEXT:    s_bfe_u32 s3, s0, 0x80010
289; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
290; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
291; GFX10-NEXT:    s_or_b32 s1, s1, s2
292; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
293; GFX10-NEXT:    s_or_b32 s1, s1, s3
294; GFX10-NEXT:    s_lshl_b32 s0, s0, 24
295; GFX10-NEXT:    s_or_b32 s0, s1, s0
296; GFX10-NEXT:    v_lshrrev_b32_e64 v0, v0, s0
297; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
298; GFX10-NEXT:    ; return to shader part epilog
299  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
300  %element = extractelement <4 x i8> %vector, i32 %idx
301  ret i8 %element
302}
303
304define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx0(<4 x i8> addrspace(4)* inreg %ptr) {
305; GCN-LABEL: extractelement_sgpr_v4i8_idx0:
306; GCN:       ; %bb.0:
307; GCN-NEXT:    s_load_dword s0, s[2:3], 0x0
308; GCN-NEXT:    s_waitcnt lgkmcnt(0)
309; GCN-NEXT:    s_bfe_u32 s3, s0, 0x80008
310; GCN-NEXT:    s_lshr_b32 s1, s0, 24
311; GCN-NEXT:    s_and_b32 s2, s0, 0xff
312; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
313; GCN-NEXT:    s_lshl_b32 s3, s3, 8
314; GCN-NEXT:    s_or_b32 s2, s2, s3
315; GCN-NEXT:    s_lshl_b32 s0, s0, 16
316; GCN-NEXT:    s_or_b32 s0, s2, s0
317; GCN-NEXT:    s_lshl_b32 s1, s1, 24
318; GCN-NEXT:    s_or_b32 s0, s0, s1
319; GCN-NEXT:    ; return to shader part epilog
320;
321; GFX10-LABEL: extractelement_sgpr_v4i8_idx0:
322; GFX10:       ; %bb.0:
323; GFX10-NEXT:    s_load_dword s0, s[2:3], 0x0
324; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
325; GFX10-NEXT:    s_bfe_u32 s2, s0, 0x80008
326; GFX10-NEXT:    s_and_b32 s1, s0, 0xff
327; GFX10-NEXT:    s_bfe_u32 s3, s0, 0x80010
328; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
329; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
330; GFX10-NEXT:    s_or_b32 s1, s1, s2
331; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
332; GFX10-NEXT:    s_or_b32 s1, s1, s3
333; GFX10-NEXT:    s_lshl_b32 s0, s0, 24
334; GFX10-NEXT:    s_or_b32 s0, s1, s0
335; GFX10-NEXT:    ; return to shader part epilog
336  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
337  %element = extractelement <4 x i8> %vector, i32 0
338  ret i8 %element
339}
340
341define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx1(<4 x i8> addrspace(4)* inreg %ptr) {
342; GCN-LABEL: extractelement_sgpr_v4i8_idx1:
343; GCN:       ; %bb.0:
344; GCN-NEXT:    s_load_dword s0, s[2:3], 0x0
345; GCN-NEXT:    s_waitcnt lgkmcnt(0)
346; GCN-NEXT:    s_bfe_u32 s3, s0, 0x80008
347; GCN-NEXT:    s_lshr_b32 s1, s0, 24
348; GCN-NEXT:    s_and_b32 s2, s0, 0xff
349; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
350; GCN-NEXT:    s_lshl_b32 s3, s3, 8
351; GCN-NEXT:    s_or_b32 s2, s2, s3
352; GCN-NEXT:    s_lshl_b32 s0, s0, 16
353; GCN-NEXT:    s_or_b32 s0, s2, s0
354; GCN-NEXT:    s_lshl_b32 s1, s1, 24
355; GCN-NEXT:    s_or_b32 s0, s0, s1
356; GCN-NEXT:    s_lshr_b32 s0, s0, 8
357; GCN-NEXT:    ; return to shader part epilog
358;
359; GFX10-LABEL: extractelement_sgpr_v4i8_idx1:
360; GFX10:       ; %bb.0:
361; GFX10-NEXT:    s_load_dword s0, s[2:3], 0x0
362; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
363; GFX10-NEXT:    s_bfe_u32 s2, s0, 0x80008
364; GFX10-NEXT:    s_and_b32 s1, s0, 0xff
365; GFX10-NEXT:    s_bfe_u32 s3, s0, 0x80010
366; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
367; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
368; GFX10-NEXT:    s_or_b32 s1, s1, s2
369; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
370; GFX10-NEXT:    s_or_b32 s1, s1, s3
371; GFX10-NEXT:    s_lshl_b32 s0, s0, 24
372; GFX10-NEXT:    s_or_b32 s0, s1, s0
373; GFX10-NEXT:    s_lshr_b32 s0, s0, 8
374; GFX10-NEXT:    ; return to shader part epilog
375  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
376  %element = extractelement <4 x i8> %vector, i32 1
377  ret i8 %element
378}
379
380define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx2(<4 x i8> addrspace(4)* inreg %ptr) {
381; GCN-LABEL: extractelement_sgpr_v4i8_idx2:
382; GCN:       ; %bb.0:
383; GCN-NEXT:    s_load_dword s0, s[2:3], 0x0
384; GCN-NEXT:    s_waitcnt lgkmcnt(0)
385; GCN-NEXT:    s_bfe_u32 s3, s0, 0x80008
386; GCN-NEXT:    s_lshr_b32 s1, s0, 24
387; GCN-NEXT:    s_and_b32 s2, s0, 0xff
388; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
389; GCN-NEXT:    s_lshl_b32 s3, s3, 8
390; GCN-NEXT:    s_or_b32 s2, s2, s3
391; GCN-NEXT:    s_lshl_b32 s0, s0, 16
392; GCN-NEXT:    s_or_b32 s0, s2, s0
393; GCN-NEXT:    s_lshl_b32 s1, s1, 24
394; GCN-NEXT:    s_or_b32 s0, s0, s1
395; GCN-NEXT:    s_lshr_b32 s0, s0, 16
396; GCN-NEXT:    ; return to shader part epilog
397;
398; GFX10-LABEL: extractelement_sgpr_v4i8_idx2:
399; GFX10:       ; %bb.0:
400; GFX10-NEXT:    s_load_dword s0, s[2:3], 0x0
401; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
402; GFX10-NEXT:    s_bfe_u32 s2, s0, 0x80008
403; GFX10-NEXT:    s_and_b32 s1, s0, 0xff
404; GFX10-NEXT:    s_bfe_u32 s3, s0, 0x80010
405; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
406; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
407; GFX10-NEXT:    s_or_b32 s1, s1, s2
408; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
409; GFX10-NEXT:    s_or_b32 s1, s1, s3
410; GFX10-NEXT:    s_lshl_b32 s0, s0, 24
411; GFX10-NEXT:    s_or_b32 s0, s1, s0
412; GFX10-NEXT:    s_lshr_b32 s0, s0, 16
413; GFX10-NEXT:    ; return to shader part epilog
414  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
415  %element = extractelement <4 x i8> %vector, i32 2
416  ret i8 %element
417}
418
419define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx3(<4 x i8> addrspace(4)* inreg %ptr) {
420; GCN-LABEL: extractelement_sgpr_v4i8_idx3:
421; GCN:       ; %bb.0:
422; GCN-NEXT:    s_load_dword s0, s[2:3], 0x0
423; GCN-NEXT:    s_waitcnt lgkmcnt(0)
424; GCN-NEXT:    s_bfe_u32 s3, s0, 0x80008
425; GCN-NEXT:    s_lshr_b32 s1, s0, 24
426; GCN-NEXT:    s_and_b32 s2, s0, 0xff
427; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
428; GCN-NEXT:    s_lshl_b32 s3, s3, 8
429; GCN-NEXT:    s_or_b32 s2, s2, s3
430; GCN-NEXT:    s_lshl_b32 s0, s0, 16
431; GCN-NEXT:    s_or_b32 s0, s2, s0
432; GCN-NEXT:    s_lshl_b32 s1, s1, 24
433; GCN-NEXT:    s_or_b32 s0, s0, s1
434; GCN-NEXT:    s_lshr_b32 s0, s0, 24
435; GCN-NEXT:    ; return to shader part epilog
436;
437; GFX10-LABEL: extractelement_sgpr_v4i8_idx3:
438; GFX10:       ; %bb.0:
439; GFX10-NEXT:    s_load_dword s0, s[2:3], 0x0
440; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
441; GFX10-NEXT:    s_bfe_u32 s2, s0, 0x80008
442; GFX10-NEXT:    s_and_b32 s1, s0, 0xff
443; GFX10-NEXT:    s_bfe_u32 s3, s0, 0x80010
444; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
445; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
446; GFX10-NEXT:    s_or_b32 s1, s1, s2
447; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
448; GFX10-NEXT:    s_or_b32 s1, s1, s3
449; GFX10-NEXT:    s_lshl_b32 s0, s0, 24
450; GFX10-NEXT:    s_or_b32 s0, s1, s0
451; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
452; GFX10-NEXT:    ; return to shader part epilog
453  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
454  %element = extractelement <4 x i8> %vector, i32 3
455  ret i8 %element
456}
457
458define i8 @extractelement_vgpr_v4i8_idx0(<4 x i8> addrspace(1)* %ptr) {
459; GFX9-LABEL: extractelement_vgpr_v4i8_idx0:
460; GFX9:       ; %bb.0:
461; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
462; GFX9-NEXT:    global_load_dword v0, v[0:1], off
463; GFX9-NEXT:    v_mov_b32_e32 v2, 8
464; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
465; GFX9-NEXT:    v_mov_b32_e32 v3, 16
466; GFX9-NEXT:    s_waitcnt vmcnt(0)
467; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
468; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
469; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
470; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v2
471; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
472; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
473; GFX9-NEXT:    s_setpc_b64 s[30:31]
474;
475; GFX8-LABEL: extractelement_vgpr_v4i8_idx0:
476; GFX8:       ; %bb.0:
477; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
478; GFX8-NEXT:    flat_load_dword v0, v[0:1]
479; GFX8-NEXT:    v_mov_b32_e32 v1, 8
480; GFX8-NEXT:    v_mov_b32_e32 v2, 16
481; GFX8-NEXT:    s_waitcnt vmcnt(0)
482; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
483; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
484; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
485; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
486; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
487; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
488; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
489; GFX8-NEXT:    s_setpc_b64 s[30:31]
490;
491; GFX7-LABEL: extractelement_vgpr_v4i8_idx0:
492; GFX7:       ; %bb.0:
493; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
494; GFX7-NEXT:    s_mov_b32 s6, 0
495; GFX7-NEXT:    s_mov_b32 s7, 0xf000
496; GFX7-NEXT:    s_mov_b64 s[4:5], 0
497; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
498; GFX7-NEXT:    s_waitcnt vmcnt(0)
499; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
500; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
501; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
502; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
503; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
504; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
505; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
506; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
507; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
508; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
509; GFX7-NEXT:    s_setpc_b64 s[30:31]
510;
511; GFX10-LABEL: extractelement_vgpr_v4i8_idx0:
512; GFX10:       ; %bb.0:
513; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
514; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
515; GFX10-NEXT:    global_load_dword v0, v[0:1], off
516; GFX10-NEXT:    v_mov_b32_e32 v1, 8
517; GFX10-NEXT:    v_mov_b32_e32 v2, 16
518; GFX10-NEXT:    s_waitcnt vmcnt(0)
519; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
520; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
521; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
522; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
523; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
524; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
525; GFX10-NEXT:    s_setpc_b64 s[30:31]
526  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
527  %element = extractelement <4 x i8> %vector, i32 0
528  ret i8 %element
529}
530
531define i8 @extractelement_vgpr_v4i8_idx1(<4 x i8> addrspace(1)* %ptr) {
532; GFX9-LABEL: extractelement_vgpr_v4i8_idx1:
533; GFX9:       ; %bb.0:
534; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
535; GFX9-NEXT:    global_load_dword v0, v[0:1], off
536; GFX9-NEXT:    s_mov_b32 s4, 8
537; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
538; GFX9-NEXT:    v_mov_b32_e32 v2, 16
539; GFX9-NEXT:    s_waitcnt vmcnt(0)
540; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
541; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
542; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
543; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v4
544; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
545; GFX9-NEXT:    v_or3_b32 v0, v0, v2, v1
546; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
547; GFX9-NEXT:    s_setpc_b64 s[30:31]
548;
549; GFX8-LABEL: extractelement_vgpr_v4i8_idx1:
550; GFX8:       ; %bb.0:
551; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
552; GFX8-NEXT:    flat_load_dword v0, v[0:1]
553; GFX8-NEXT:    v_mov_b32_e32 v1, 8
554; GFX8-NEXT:    v_mov_b32_e32 v2, 16
555; GFX8-NEXT:    s_waitcnt vmcnt(0)
556; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
557; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
558; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
559; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
560; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
561; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
562; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
563; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
564; GFX8-NEXT:    s_setpc_b64 s[30:31]
565;
566; GFX7-LABEL: extractelement_vgpr_v4i8_idx1:
567; GFX7:       ; %bb.0:
568; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
569; GFX7-NEXT:    s_mov_b32 s6, 0
570; GFX7-NEXT:    s_mov_b32 s7, 0xf000
571; GFX7-NEXT:    s_mov_b64 s[4:5], 0
572; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
573; GFX7-NEXT:    s_waitcnt vmcnt(0)
574; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
575; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
576; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
577; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
578; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
579; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
580; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
581; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
582; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
583; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
584; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
585; GFX7-NEXT:    s_setpc_b64 s[30:31]
586;
587; GFX10-LABEL: extractelement_vgpr_v4i8_idx1:
588; GFX10:       ; %bb.0:
589; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
590; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
591; GFX10-NEXT:    global_load_dword v0, v[0:1], off
592; GFX10-NEXT:    s_mov_b32 s4, 8
593; GFX10-NEXT:    v_mov_b32_e32 v1, 16
594; GFX10-NEXT:    s_waitcnt vmcnt(0)
595; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
596; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
597; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
598; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v2
599; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
600; GFX10-NEXT:    v_or3_b32 v0, v0, v1, v2
601; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
602; GFX10-NEXT:    s_setpc_b64 s[30:31]
603  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
604  %element = extractelement <4 x i8> %vector, i32 1
605  ret i8 %element
606}
607
608define i8 @extractelement_vgpr_v4i8_idx2(<4 x i8> addrspace(1)* %ptr) {
609; GFX9-LABEL: extractelement_vgpr_v4i8_idx2:
610; GFX9:       ; %bb.0:
611; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
612; GFX9-NEXT:    global_load_dword v0, v[0:1], off
613; GFX9-NEXT:    v_mov_b32_e32 v2, 8
614; GFX9-NEXT:    s_mov_b32 s4, 16
615; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
616; GFX9-NEXT:    s_waitcnt vmcnt(0)
617; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
618; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
619; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
620; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v2
621; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
622; GFX9-NEXT:    v_or3_b32 v0, v0, v4, v1
623; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
624; GFX9-NEXT:    s_setpc_b64 s[30:31]
625;
626; GFX8-LABEL: extractelement_vgpr_v4i8_idx2:
627; GFX8:       ; %bb.0:
628; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
629; GFX8-NEXT:    flat_load_dword v0, v[0:1]
630; GFX8-NEXT:    v_mov_b32_e32 v1, 8
631; GFX8-NEXT:    v_mov_b32_e32 v2, 16
632; GFX8-NEXT:    s_waitcnt vmcnt(0)
633; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
634; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
635; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
636; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
637; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
638; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
639; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
640; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
641; GFX8-NEXT:    s_setpc_b64 s[30:31]
642;
643; GFX7-LABEL: extractelement_vgpr_v4i8_idx2:
644; GFX7:       ; %bb.0:
645; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
646; GFX7-NEXT:    s_mov_b32 s6, 0
647; GFX7-NEXT:    s_mov_b32 s7, 0xf000
648; GFX7-NEXT:    s_mov_b64 s[4:5], 0
649; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
650; GFX7-NEXT:    s_waitcnt vmcnt(0)
651; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
652; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
653; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
654; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
655; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
656; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
657; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
658; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
659; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
660; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
661; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
662; GFX7-NEXT:    s_setpc_b64 s[30:31]
663;
664; GFX10-LABEL: extractelement_vgpr_v4i8_idx2:
665; GFX10:       ; %bb.0:
666; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
667; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
668; GFX10-NEXT:    global_load_dword v0, v[0:1], off
669; GFX10-NEXT:    v_mov_b32_e32 v1, 8
670; GFX10-NEXT:    s_mov_b32 s4, 16
671; GFX10-NEXT:    s_waitcnt vmcnt(0)
672; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
673; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
674; GFX10-NEXT:    v_lshlrev_b32_sdwa v3, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
675; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
676; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
677; GFX10-NEXT:    v_or3_b32 v0, v0, v3, v1
678; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
679; GFX10-NEXT:    s_setpc_b64 s[30:31]
680  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
681  %element = extractelement <4 x i8> %vector, i32 2
682  ret i8 %element
683}
684
685define i8 @extractelement_vgpr_v4i8_idx3(<4 x i8> addrspace(1)* %ptr) {
686; GFX9-LABEL: extractelement_vgpr_v4i8_idx3:
687; GFX9:       ; %bb.0:
688; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
689; GFX9-NEXT:    global_load_dword v0, v[0:1], off
690; GFX9-NEXT:    v_mov_b32_e32 v2, 8
691; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
692; GFX9-NEXT:    v_mov_b32_e32 v3, 16
693; GFX9-NEXT:    s_waitcnt vmcnt(0)
694; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
695; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
696; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
697; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v2
698; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
699; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
700; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
701; GFX9-NEXT:    s_setpc_b64 s[30:31]
702;
703; GFX8-LABEL: extractelement_vgpr_v4i8_idx3:
704; GFX8:       ; %bb.0:
705; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
706; GFX8-NEXT:    flat_load_dword v0, v[0:1]
707; GFX8-NEXT:    v_mov_b32_e32 v1, 8
708; GFX8-NEXT:    v_mov_b32_e32 v2, 16
709; GFX8-NEXT:    s_waitcnt vmcnt(0)
710; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
711; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
712; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
713; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
714; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
715; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
716; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
717; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
718; GFX8-NEXT:    s_setpc_b64 s[30:31]
719;
720; GFX7-LABEL: extractelement_vgpr_v4i8_idx3:
721; GFX7:       ; %bb.0:
722; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
723; GFX7-NEXT:    s_mov_b32 s6, 0
724; GFX7-NEXT:    s_mov_b32 s7, 0xf000
725; GFX7-NEXT:    s_mov_b64 s[4:5], 0
726; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
727; GFX7-NEXT:    s_waitcnt vmcnt(0)
728; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
729; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
730; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
731; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
732; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
733; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
734; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
735; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
736; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
737; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
738; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
739; GFX7-NEXT:    s_setpc_b64 s[30:31]
740;
741; GFX10-LABEL: extractelement_vgpr_v4i8_idx3:
742; GFX10:       ; %bb.0:
743; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
744; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
745; GFX10-NEXT:    global_load_dword v0, v[0:1], off
746; GFX10-NEXT:    v_mov_b32_e32 v1, 8
747; GFX10-NEXT:    v_mov_b32_e32 v2, 16
748; GFX10-NEXT:    s_waitcnt vmcnt(0)
749; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
750; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
751; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
752; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
753; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
754; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
755; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
756; GFX10-NEXT:    s_setpc_b64 s[30:31]
757  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
758  %element = extractelement <4 x i8> %vector, i32 3
759  ret i8 %element
760}
761
762define amdgpu_ps i8 @extractelement_sgpr_v8i8_sgpr_idx(<8 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) {
763; GCN-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
764; GCN:       ; %bb.0:
765; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
766; GCN-NEXT:    s_mov_b32 s7, 0x80008
767; GCN-NEXT:    s_movk_i32 s5, 0xff
768; GCN-NEXT:    s_waitcnt lgkmcnt(0)
769; GCN-NEXT:    s_bfe_u32 s8, s0, s7
770; GCN-NEXT:    s_and_b32 s6, s0, s5
771; GCN-NEXT:    s_lshl_b32 s8, s8, 8
772; GCN-NEXT:    s_or_b32 s6, s6, s8
773; GCN-NEXT:    s_mov_b32 s8, 0x80010
774; GCN-NEXT:    s_lshr_b32 s2, s0, 24
775; GCN-NEXT:    s_bfe_u32 s0, s0, s8
776; GCN-NEXT:    s_lshl_b32 s0, s0, 16
777; GCN-NEXT:    s_or_b32 s0, s6, s0
778; GCN-NEXT:    s_lshl_b32 s2, s2, 24
779; GCN-NEXT:    s_or_b32 s0, s0, s2
780; GCN-NEXT:    s_and_b32 s2, s1, s5
781; GCN-NEXT:    s_bfe_u32 s5, s1, s7
782; GCN-NEXT:    s_lshr_b32 s3, s1, 24
783; GCN-NEXT:    s_lshl_b32 s5, s5, 8
784; GCN-NEXT:    s_bfe_u32 s1, s1, s8
785; GCN-NEXT:    s_or_b32 s2, s2, s5
786; GCN-NEXT:    s_lshl_b32 s1, s1, 16
787; GCN-NEXT:    s_or_b32 s1, s2, s1
788; GCN-NEXT:    s_lshl_b32 s2, s3, 24
789; GCN-NEXT:    s_or_b32 s1, s1, s2
790; GCN-NEXT:    s_lshr_b32 s2, s4, 2
791; GCN-NEXT:    s_cmp_eq_u32 s2, 1
792; GCN-NEXT:    s_cselect_b32 s0, s1, s0
793; GCN-NEXT:    s_and_b32 s1, s4, 3
794; GCN-NEXT:    s_lshl_b32 s1, s1, 3
795; GCN-NEXT:    s_lshr_b32 s0, s0, s1
796; GCN-NEXT:    ; return to shader part epilog
797;
798; GFX10-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
799; GFX10:       ; %bb.0:
800; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
801; GFX10-NEXT:    s_mov_b32 s3, 0x80008
802; GFX10-NEXT:    s_movk_i32 s2, 0xff
803; GFX10-NEXT:    s_mov_b32 s5, 0x80010
804; GFX10-NEXT:    s_lshr_b32 s6, s4, 2
805; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
806; GFX10-NEXT:    s_bfe_u32 s10, s0, s3
807; GFX10-NEXT:    s_bfe_u32 s3, s1, s3
808; GFX10-NEXT:    s_lshr_b32 s7, s0, 24
809; GFX10-NEXT:    s_lshr_b32 s8, s1, 24
810; GFX10-NEXT:    s_and_b32 s9, s0, s2
811; GFX10-NEXT:    s_bfe_u32 s0, s0, s5
812; GFX10-NEXT:    s_and_b32 s2, s1, s2
813; GFX10-NEXT:    s_bfe_u32 s1, s1, s5
814; GFX10-NEXT:    s_lshl_b32 s5, s10, 8
815; GFX10-NEXT:    s_lshl_b32 s3, s3, 8
816; GFX10-NEXT:    s_lshl_b32 s0, s0, 16
817; GFX10-NEXT:    s_lshl_b32 s1, s1, 16
818; GFX10-NEXT:    s_or_b32 s5, s9, s5
819; GFX10-NEXT:    s_or_b32 s2, s2, s3
820; GFX10-NEXT:    s_lshl_b32 s7, s7, 24
821; GFX10-NEXT:    s_lshl_b32 s8, s8, 24
822; GFX10-NEXT:    s_or_b32 s0, s5, s0
823; GFX10-NEXT:    s_or_b32 s1, s2, s1
824; GFX10-NEXT:    s_or_b32 s0, s0, s7
825; GFX10-NEXT:    s_or_b32 s1, s1, s8
826; GFX10-NEXT:    s_cmp_eq_u32 s6, 1
827; GFX10-NEXT:    s_cselect_b32 s0, s1, s0
828; GFX10-NEXT:    s_and_b32 s1, s4, 3
829; GFX10-NEXT:    s_lshl_b32 s1, s1, 3
830; GFX10-NEXT:    s_lshr_b32 s0, s0, s1
831; GFX10-NEXT:    ; return to shader part epilog
832  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
833  %element = extractelement <8 x i8> %vector, i32 %idx
834  ret i8 %element
835}
836
837define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 inreg %idx) {
838; GFX9-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
839; GFX9:       ; %bb.0:
840; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
841; GFX9-NEXT:    s_mov_b32 s0, 8
842; GFX9-NEXT:    s_mov_b32 s1, 16
843; GFX9-NEXT:    s_movk_i32 s3, 0xff
844; GFX9-NEXT:    s_lshr_b32 s4, s2, 2
845; GFX9-NEXT:    s_and_b32 s2, s2, 3
846; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, s4, 1
847; GFX9-NEXT:    s_waitcnt vmcnt(0)
848; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
849; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
850; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
851; GFX9-NEXT:    v_lshlrev_b32_sdwa v6, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
852; GFX9-NEXT:    v_lshlrev_b32_sdwa v5, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
853; GFX9-NEXT:    v_lshlrev_b32_sdwa v7, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
854; GFX9-NEXT:    v_and_or_b32 v0, v0, s3, v4
855; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
856; GFX9-NEXT:    v_and_or_b32 v1, v1, s3, v6
857; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
858; GFX9-NEXT:    v_or3_b32 v0, v0, v5, v2
859; GFX9-NEXT:    v_or3_b32 v1, v1, v7, v3
860; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
861; GFX9-NEXT:    s_lshl_b32 s0, s2, 3
862; GFX9-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
863; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
864; GFX9-NEXT:    ; return to shader part epilog
865;
866; GFX8-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
867; GFX8:       ; %bb.0:
868; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
869; GFX8-NEXT:    v_mov_b32_e32 v2, 8
870; GFX8-NEXT:    v_mov_b32_e32 v3, 16
871; GFX8-NEXT:    s_lshr_b32 s0, s2, 2
872; GFX8-NEXT:    s_and_b32 s1, s2, 3
873; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 1
874; GFX8-NEXT:    s_lshl_b32 s0, s1, 3
875; GFX8-NEXT:    s_waitcnt vmcnt(0)
876; GFX8-NEXT:    v_lshlrev_b32_sdwa v6, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
877; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
878; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
879; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 24, v1
880; GFX8-NEXT:    v_lshlrev_b32_sdwa v7, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
881; GFX8-NEXT:    v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
882; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
883; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
884; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 24, v4
885; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 24, v5
886; GFX8-NEXT:    v_or_b32_e32 v0, v0, v7
887; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
888; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
889; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
890; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
891; GFX8-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
892; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
893; GFX8-NEXT:    ; return to shader part epilog
894;
895; GFX7-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
896; GFX7:       ; %bb.0:
897; GFX7-NEXT:    s_mov_b32 s6, 0
898; GFX7-NEXT:    s_mov_b32 s7, 0xf000
899; GFX7-NEXT:    s_mov_b64 s[4:5], 0
900; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
901; GFX7-NEXT:    s_movk_i32 s0, 0xff
902; GFX7-NEXT:    s_lshr_b32 s1, s2, 2
903; GFX7-NEXT:    s_and_b32 s2, s2, 3
904; GFX7-NEXT:    v_cmp_eq_u32_e64 vcc, s1, 1
905; GFX7-NEXT:    s_waitcnt vmcnt(0)
906; GFX7-NEXT:    v_bfe_u32 v5, v0, 8, 8
907; GFX7-NEXT:    v_bfe_u32 v7, v1, 8, 8
908; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
909; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
910; GFX7-NEXT:    v_and_b32_e32 v4, s0, v0
911; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
912; GFX7-NEXT:    v_and_b32_e32 v6, s0, v1
913; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
914; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 8, v5
915; GFX7-NEXT:    v_lshlrev_b32_e32 v7, 8, v7
916; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
917; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
918; GFX7-NEXT:    v_or_b32_e32 v4, v4, v5
919; GFX7-NEXT:    v_or_b32_e32 v5, v6, v7
920; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
921; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
922; GFX7-NEXT:    v_or_b32_e32 v0, v4, v0
923; GFX7-NEXT:    v_or_b32_e32 v1, v5, v1
924; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
925; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
926; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
927; GFX7-NEXT:    s_lshl_b32 s0, s2, 3
928; GFX7-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
929; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
930; GFX7-NEXT:    ; return to shader part epilog
931;
932; GFX10-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
933; GFX10:       ; %bb.0:
934; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
935; GFX10-NEXT:    s_mov_b32 s0, 8
936; GFX10-NEXT:    s_mov_b32 s1, 16
937; GFX10-NEXT:    s_movk_i32 s3, 0xff
938; GFX10-NEXT:    s_waitcnt vmcnt(0)
939; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
940; GFX10-NEXT:    v_lshlrev_b32_sdwa v3, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
941; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 24, v1
942; GFX10-NEXT:    v_lshlrev_b32_sdwa v5, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
943; GFX10-NEXT:    v_lshlrev_b32_sdwa v6, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
944; GFX10-NEXT:    v_lshlrev_b32_sdwa v7, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
945; GFX10-NEXT:    v_and_or_b32 v0, v0, s3, v3
946; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
947; GFX10-NEXT:    v_and_or_b32 v1, v1, s3, v5
948; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 24, v4
949; GFX10-NEXT:    s_lshr_b32 s0, s2, 2
950; GFX10-NEXT:    v_or3_b32 v0, v0, v6, v2
951; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s0, 1
952; GFX10-NEXT:    v_or3_b32 v1, v1, v7, v3
953; GFX10-NEXT:    s_and_b32 s0, s2, 3
954; GFX10-NEXT:    s_lshl_b32 s0, s0, 3
955; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
956; GFX10-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
957; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
958; GFX10-NEXT:    ; return to shader part epilog
959  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
960  %element = extractelement <8 x i8> %vector, i32 %idx
961  ret i8 %element
962}
963
964define i8 @extractelement_vgpr_v8i8_vgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 %idx) {
965; GFX9-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
966; GFX9:       ; %bb.0:
967; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
968; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
969; GFX9-NEXT:    s_mov_b32 s4, 8
970; GFX9-NEXT:    s_mov_b32 s5, 16
971; GFX9-NEXT:    s_movk_i32 s6, 0xff
972; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 2, v2
973; GFX9-NEXT:    v_and_b32_e32 v2, 3, v2
974; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v3
975; GFX9-NEXT:    s_waitcnt vmcnt(0)
976; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
977; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 24, v1
978; GFX9-NEXT:    v_lshlrev_b32_sdwa v6, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
979; GFX9-NEXT:    v_lshlrev_b32_sdwa v8, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
980; GFX9-NEXT:    v_lshlrev_b32_sdwa v7, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
981; GFX9-NEXT:    v_lshlrev_b32_sdwa v9, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
982; GFX9-NEXT:    v_and_or_b32 v0, v0, s6, v6
983; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 24, v4
984; GFX9-NEXT:    v_and_or_b32 v1, v1, s6, v8
985; GFX9-NEXT:    v_lshlrev_b32_e32 v5, 24, v5
986; GFX9-NEXT:    v_or3_b32 v0, v0, v7, v4
987; GFX9-NEXT:    v_or3_b32 v1, v1, v9, v5
988; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
989; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
990; GFX9-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
991; GFX9-NEXT:    s_setpc_b64 s[30:31]
992;
993; GFX8-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
994; GFX8:       ; %bb.0:
995; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
996; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
997; GFX8-NEXT:    v_mov_b32_e32 v3, 8
998; GFX8-NEXT:    v_mov_b32_e32 v4, 16
999; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 2, v2
1000; GFX8-NEXT:    v_and_b32_e32 v2, 3, v2
1001; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v5
1002; GFX8-NEXT:    s_waitcnt vmcnt(0)
1003; GFX8-NEXT:    v_lshlrev_b32_sdwa v8, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1004; GFX8-NEXT:    v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1005; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 24, v0
1006; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
1007; GFX8-NEXT:    v_lshlrev_b32_sdwa v9, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1008; GFX8-NEXT:    v_lshlrev_b32_sdwa v4, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1009; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1010; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1011; GFX8-NEXT:    v_lshlrev_b32_e32 v6, 24, v6
1012; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 24, v7
1013; GFX8-NEXT:    v_or_b32_e32 v0, v0, v9
1014; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
1015; GFX8-NEXT:    v_or_b32_e32 v0, v0, v6
1016; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
1017; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1018; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
1019; GFX8-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
1020; GFX8-NEXT:    s_setpc_b64 s[30:31]
1021;
1022; GFX7-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
1023; GFX7:       ; %bb.0:
1024; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1025; GFX7-NEXT:    s_mov_b32 s6, 0
1026; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1027; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1028; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1029; GFX7-NEXT:    s_movk_i32 s4, 0xff
1030; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 2, v2
1031; GFX7-NEXT:    v_and_b32_e32 v2, 3, v2
1032; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v3
1033; GFX7-NEXT:    s_waitcnt vmcnt(0)
1034; GFX7-NEXT:    v_bfe_u32 v7, v0, 8, 8
1035; GFX7-NEXT:    v_bfe_u32 v9, v1, 8, 8
1036; GFX7-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
1037; GFX7-NEXT:    v_lshrrev_b32_e32 v5, 24, v1
1038; GFX7-NEXT:    v_and_b32_e32 v6, s4, v0
1039; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
1040; GFX7-NEXT:    v_and_b32_e32 v8, s4, v1
1041; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
1042; GFX7-NEXT:    v_lshlrev_b32_e32 v7, 8, v7
1043; GFX7-NEXT:    v_lshlrev_b32_e32 v9, 8, v9
1044; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1045; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1046; GFX7-NEXT:    v_or_b32_e32 v6, v6, v7
1047; GFX7-NEXT:    v_or_b32_e32 v7, v8, v9
1048; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 24, v4
1049; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 24, v5
1050; GFX7-NEXT:    v_or_b32_e32 v0, v6, v0
1051; GFX7-NEXT:    v_or_b32_e32 v1, v7, v1
1052; GFX7-NEXT:    v_or_b32_e32 v0, v0, v4
1053; GFX7-NEXT:    v_or_b32_e32 v1, v1, v5
1054; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1055; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
1056; GFX7-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
1057; GFX7-NEXT:    s_setpc_b64 s[30:31]
1058;
1059; GFX10-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
1060; GFX10:       ; %bb.0:
1061; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1062; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1063; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1064; GFX10-NEXT:    s_mov_b32 s4, 8
1065; GFX10-NEXT:    s_mov_b32 s5, 16
1066; GFX10-NEXT:    s_movk_i32 s6, 0xff
1067; GFX10-NEXT:    s_waitcnt vmcnt(0)
1068; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
1069; GFX10-NEXT:    v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1070; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 24, v1
1071; GFX10-NEXT:    v_lshlrev_b32_sdwa v6, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1072; GFX10-NEXT:    v_lshlrev_b32_sdwa v7, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1073; GFX10-NEXT:    v_lshlrev_b32_sdwa v8, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1074; GFX10-NEXT:    v_and_or_b32 v0, v0, s6, v4
1075; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
1076; GFX10-NEXT:    v_and_or_b32 v1, v1, s6, v6
1077; GFX10-NEXT:    v_lshlrev_b32_e32 v4, 24, v5
1078; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 2, v2
1079; GFX10-NEXT:    v_and_b32_e32 v2, 3, v2
1080; GFX10-NEXT:    v_or3_b32 v0, v0, v7, v3
1081; GFX10-NEXT:    v_or3_b32 v1, v1, v8, v4
1082; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v5
1083; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1084; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
1085; GFX10-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
1086; GFX10-NEXT:    s_setpc_b64 s[30:31]
1087  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
1088  %element = extractelement <8 x i8> %vector, i32 %idx
1089  ret i8 %element
1090}
1091
1092define amdgpu_ps i8 @extractelement_sgpr_v8i8_vgpr_idx(<8 x i8> addrspace(4)* inreg %ptr, i32 %idx) {
1093; GCN-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
1094; GCN:       ; %bb.0:
1095; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1096; GCN-NEXT:    s_mov_b32 s6, 0x80008
1097; GCN-NEXT:    s_movk_i32 s4, 0xff
1098; GCN-NEXT:    v_lshrrev_b32_e32 v1, 2, v0
1099; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
1100; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1101; GCN-NEXT:    s_bfe_u32 s7, s0, s6
1102; GCN-NEXT:    s_and_b32 s5, s0, s4
1103; GCN-NEXT:    s_lshl_b32 s7, s7, 8
1104; GCN-NEXT:    s_or_b32 s5, s5, s7
1105; GCN-NEXT:    s_mov_b32 s7, 0x80010
1106; GCN-NEXT:    s_lshr_b32 s2, s0, 24
1107; GCN-NEXT:    s_bfe_u32 s0, s0, s7
1108; GCN-NEXT:    s_lshl_b32 s0, s0, 16
1109; GCN-NEXT:    s_or_b32 s0, s5, s0
1110; GCN-NEXT:    s_lshl_b32 s2, s2, 24
1111; GCN-NEXT:    s_or_b32 s0, s0, s2
1112; GCN-NEXT:    s_and_b32 s2, s1, s4
1113; GCN-NEXT:    s_bfe_u32 s4, s1, s6
1114; GCN-NEXT:    s_lshr_b32 s3, s1, 24
1115; GCN-NEXT:    s_lshl_b32 s4, s4, 8
1116; GCN-NEXT:    s_bfe_u32 s1, s1, s7
1117; GCN-NEXT:    s_or_b32 s2, s2, s4
1118; GCN-NEXT:    s_lshl_b32 s1, s1, 16
1119; GCN-NEXT:    s_or_b32 s1, s2, s1
1120; GCN-NEXT:    s_lshl_b32 s2, s3, 24
1121; GCN-NEXT:    s_or_b32 s1, s1, s2
1122; GCN-NEXT:    v_mov_b32_e32 v2, s0
1123; GCN-NEXT:    v_mov_b32_e32 v3, s1
1124; GCN-NEXT:    v_and_b32_e32 v0, 3, v0
1125; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
1126; GCN-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
1127; GCN-NEXT:    v_lshrrev_b32_e32 v0, v0, v1
1128; GCN-NEXT:    v_readfirstlane_b32 s0, v0
1129; GCN-NEXT:    ; return to shader part epilog
1130;
1131; GFX10-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
1132; GFX10:       ; %bb.0:
1133; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1134; GFX10-NEXT:    s_mov_b32 s3, 0x80008
1135; GFX10-NEXT:    s_movk_i32 s2, 0xff
1136; GFX10-NEXT:    s_mov_b32 s4, 0x80010
1137; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 2, v0
1138; GFX10-NEXT:    v_and_b32_e32 v0, 3, v0
1139; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
1140; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
1141; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1142; GFX10-NEXT:    s_bfe_u32 s8, s0, s3
1143; GFX10-NEXT:    s_bfe_u32 s3, s1, s3
1144; GFX10-NEXT:    s_lshr_b32 s6, s1, 24
1145; GFX10-NEXT:    s_and_b32 s7, s0, s2
1146; GFX10-NEXT:    s_and_b32 s2, s1, s2
1147; GFX10-NEXT:    s_bfe_u32 s1, s1, s4
1148; GFX10-NEXT:    s_lshl_b32 s3, s3, 8
1149; GFX10-NEXT:    s_lshl_b32 s1, s1, 16
1150; GFX10-NEXT:    s_or_b32 s2, s2, s3
1151; GFX10-NEXT:    s_lshl_b32 s3, s6, 24
1152; GFX10-NEXT:    s_or_b32 s1, s2, s1
1153; GFX10-NEXT:    s_lshr_b32 s5, s0, 24
1154; GFX10-NEXT:    s_bfe_u32 s0, s0, s4
1155; GFX10-NEXT:    s_lshl_b32 s4, s8, 8
1156; GFX10-NEXT:    s_or_b32 s1, s1, s3
1157; GFX10-NEXT:    s_lshl_b32 s0, s0, 16
1158; GFX10-NEXT:    s_or_b32 s3, s7, s4
1159; GFX10-NEXT:    v_mov_b32_e32 v2, s1
1160; GFX10-NEXT:    s_lshl_b32 s2, s5, 24
1161; GFX10-NEXT:    s_or_b32 s0, s3, s0
1162; GFX10-NEXT:    s_or_b32 s0, s0, s2
1163; GFX10-NEXT:    v_cndmask_b32_e32 v1, s0, v2, vcc_lo
1164; GFX10-NEXT:    v_lshrrev_b32_e32 v0, v0, v1
1165; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1166; GFX10-NEXT:    ; return to shader part epilog
1167  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1168  %element = extractelement <8 x i8> %vector, i32 %idx
1169  ret i8 %element
1170}
1171
1172define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx0(<8 x i8> addrspace(4)* inreg %ptr) {
1173; GCN-LABEL: extractelement_sgpr_v8i8_idx0:
1174; GCN:       ; %bb.0:
1175; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1176; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1177; GCN-NEXT:    s_bfe_u32 s3, s0, 0x80008
1178; GCN-NEXT:    s_lshr_b32 s1, s0, 24
1179; GCN-NEXT:    s_and_b32 s2, s0, 0xff
1180; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
1181; GCN-NEXT:    s_lshl_b32 s3, s3, 8
1182; GCN-NEXT:    s_or_b32 s2, s2, s3
1183; GCN-NEXT:    s_lshl_b32 s0, s0, 16
1184; GCN-NEXT:    s_or_b32 s0, s2, s0
1185; GCN-NEXT:    s_lshl_b32 s1, s1, 24
1186; GCN-NEXT:    s_or_b32 s0, s0, s1
1187; GCN-NEXT:    ; return to shader part epilog
1188;
1189; GFX10-LABEL: extractelement_sgpr_v8i8_idx0:
1190; GFX10:       ; %bb.0:
1191; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1192; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1193; GFX10-NEXT:    s_bfe_u32 s2, s0, 0x80008
1194; GFX10-NEXT:    s_and_b32 s1, s0, 0xff
1195; GFX10-NEXT:    s_bfe_u32 s3, s0, 0x80010
1196; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
1197; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
1198; GFX10-NEXT:    s_or_b32 s1, s1, s2
1199; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
1200; GFX10-NEXT:    s_or_b32 s1, s1, s3
1201; GFX10-NEXT:    s_lshl_b32 s0, s0, 24
1202; GFX10-NEXT:    s_or_b32 s0, s1, s0
1203; GFX10-NEXT:    ; return to shader part epilog
1204  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1205  %element = extractelement <8 x i8> %vector, i32 0
1206  ret i8 %element
1207}
1208
1209define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx1(<8 x i8> addrspace(4)* inreg %ptr) {
1210; GCN-LABEL: extractelement_sgpr_v8i8_idx1:
1211; GCN:       ; %bb.0:
1212; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1213; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1214; GCN-NEXT:    s_bfe_u32 s3, s0, 0x80008
1215; GCN-NEXT:    s_lshr_b32 s1, s0, 24
1216; GCN-NEXT:    s_and_b32 s2, s0, 0xff
1217; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
1218; GCN-NEXT:    s_lshl_b32 s3, s3, 8
1219; GCN-NEXT:    s_or_b32 s2, s2, s3
1220; GCN-NEXT:    s_lshl_b32 s0, s0, 16
1221; GCN-NEXT:    s_or_b32 s0, s2, s0
1222; GCN-NEXT:    s_lshl_b32 s1, s1, 24
1223; GCN-NEXT:    s_or_b32 s0, s0, s1
1224; GCN-NEXT:    s_lshr_b32 s0, s0, 8
1225; GCN-NEXT:    ; return to shader part epilog
1226;
1227; GFX10-LABEL: extractelement_sgpr_v8i8_idx1:
1228; GFX10:       ; %bb.0:
1229; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1230; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1231; GFX10-NEXT:    s_bfe_u32 s2, s0, 0x80008
1232; GFX10-NEXT:    s_and_b32 s1, s0, 0xff
1233; GFX10-NEXT:    s_bfe_u32 s3, s0, 0x80010
1234; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
1235; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
1236; GFX10-NEXT:    s_or_b32 s1, s1, s2
1237; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
1238; GFX10-NEXT:    s_or_b32 s1, s1, s3
1239; GFX10-NEXT:    s_lshl_b32 s0, s0, 24
1240; GFX10-NEXT:    s_or_b32 s0, s1, s0
1241; GFX10-NEXT:    s_lshr_b32 s0, s0, 8
1242; GFX10-NEXT:    ; return to shader part epilog
1243  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1244  %element = extractelement <8 x i8> %vector, i32 1
1245  ret i8 %element
1246}
1247
1248define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx2(<8 x i8> addrspace(4)* inreg %ptr) {
1249; GCN-LABEL: extractelement_sgpr_v8i8_idx2:
1250; GCN:       ; %bb.0:
1251; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1252; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1253; GCN-NEXT:    s_bfe_u32 s3, s0, 0x80008
1254; GCN-NEXT:    s_lshr_b32 s1, s0, 24
1255; GCN-NEXT:    s_and_b32 s2, s0, 0xff
1256; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
1257; GCN-NEXT:    s_lshl_b32 s3, s3, 8
1258; GCN-NEXT:    s_or_b32 s2, s2, s3
1259; GCN-NEXT:    s_lshl_b32 s0, s0, 16
1260; GCN-NEXT:    s_or_b32 s0, s2, s0
1261; GCN-NEXT:    s_lshl_b32 s1, s1, 24
1262; GCN-NEXT:    s_or_b32 s0, s0, s1
1263; GCN-NEXT:    s_lshr_b32 s0, s0, 16
1264; GCN-NEXT:    ; return to shader part epilog
1265;
1266; GFX10-LABEL: extractelement_sgpr_v8i8_idx2:
1267; GFX10:       ; %bb.0:
1268; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1269; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1270; GFX10-NEXT:    s_bfe_u32 s2, s0, 0x80008
1271; GFX10-NEXT:    s_and_b32 s1, s0, 0xff
1272; GFX10-NEXT:    s_bfe_u32 s3, s0, 0x80010
1273; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
1274; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
1275; GFX10-NEXT:    s_or_b32 s1, s1, s2
1276; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
1277; GFX10-NEXT:    s_or_b32 s1, s1, s3
1278; GFX10-NEXT:    s_lshl_b32 s0, s0, 24
1279; GFX10-NEXT:    s_or_b32 s0, s1, s0
1280; GFX10-NEXT:    s_lshr_b32 s0, s0, 16
1281; GFX10-NEXT:    ; return to shader part epilog
1282  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1283  %element = extractelement <8 x i8> %vector, i32 2
1284  ret i8 %element
1285}
1286
1287define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx3(<8 x i8> addrspace(4)* inreg %ptr) {
1288; GCN-LABEL: extractelement_sgpr_v8i8_idx3:
1289; GCN:       ; %bb.0:
1290; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1291; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1292; GCN-NEXT:    s_bfe_u32 s3, s0, 0x80008
1293; GCN-NEXT:    s_lshr_b32 s1, s0, 24
1294; GCN-NEXT:    s_and_b32 s2, s0, 0xff
1295; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
1296; GCN-NEXT:    s_lshl_b32 s3, s3, 8
1297; GCN-NEXT:    s_or_b32 s2, s2, s3
1298; GCN-NEXT:    s_lshl_b32 s0, s0, 16
1299; GCN-NEXT:    s_or_b32 s0, s2, s0
1300; GCN-NEXT:    s_lshl_b32 s1, s1, 24
1301; GCN-NEXT:    s_or_b32 s0, s0, s1
1302; GCN-NEXT:    s_lshr_b32 s0, s0, 24
1303; GCN-NEXT:    ; return to shader part epilog
1304;
1305; GFX10-LABEL: extractelement_sgpr_v8i8_idx3:
1306; GFX10:       ; %bb.0:
1307; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1308; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1309; GFX10-NEXT:    s_bfe_u32 s2, s0, 0x80008
1310; GFX10-NEXT:    s_and_b32 s1, s0, 0xff
1311; GFX10-NEXT:    s_bfe_u32 s3, s0, 0x80010
1312; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
1313; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
1314; GFX10-NEXT:    s_or_b32 s1, s1, s2
1315; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
1316; GFX10-NEXT:    s_or_b32 s1, s1, s3
1317; GFX10-NEXT:    s_lshl_b32 s0, s0, 24
1318; GFX10-NEXT:    s_or_b32 s0, s1, s0
1319; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
1320; GFX10-NEXT:    ; return to shader part epilog
1321  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1322  %element = extractelement <8 x i8> %vector, i32 3
1323  ret i8 %element
1324}
1325
1326define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx4(<8 x i8> addrspace(4)* inreg %ptr) {
1327; GCN-LABEL: extractelement_sgpr_v8i8_idx4:
1328; GCN:       ; %bb.0:
1329; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1330; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1331; GCN-NEXT:    s_bfe_u32 s3, s1, 0x80008
1332; GCN-NEXT:    s_lshr_b32 s0, s1, 24
1333; GCN-NEXT:    s_and_b32 s2, s1, 0xff
1334; GCN-NEXT:    s_bfe_u32 s1, s1, 0x80010
1335; GCN-NEXT:    s_lshl_b32 s3, s3, 8
1336; GCN-NEXT:    s_or_b32 s2, s2, s3
1337; GCN-NEXT:    s_lshl_b32 s1, s1, 16
1338; GCN-NEXT:    s_or_b32 s1, s2, s1
1339; GCN-NEXT:    s_lshl_b32 s0, s0, 24
1340; GCN-NEXT:    s_or_b32 s0, s1, s0
1341; GCN-NEXT:    ; return to shader part epilog
1342;
1343; GFX10-LABEL: extractelement_sgpr_v8i8_idx4:
1344; GFX10:       ; %bb.0:
1345; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1346; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1347; GFX10-NEXT:    s_bfe_u32 s2, s1, 0x80008
1348; GFX10-NEXT:    s_and_b32 s0, s1, 0xff
1349; GFX10-NEXT:    s_bfe_u32 s3, s1, 0x80010
1350; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
1351; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
1352; GFX10-NEXT:    s_or_b32 s0, s0, s2
1353; GFX10-NEXT:    s_lshr_b32 s1, s1, 24
1354; GFX10-NEXT:    s_or_b32 s0, s0, s3
1355; GFX10-NEXT:    s_lshl_b32 s1, s1, 24
1356; GFX10-NEXT:    s_or_b32 s0, s0, s1
1357; GFX10-NEXT:    ; return to shader part epilog
1358  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1359  %element = extractelement <8 x i8> %vector, i32 4
1360  ret i8 %element
1361}
1362
1363define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx5(<8 x i8> addrspace(4)* inreg %ptr) {
1364; GCN-LABEL: extractelement_sgpr_v8i8_idx5:
1365; GCN:       ; %bb.0:
1366; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1367; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1368; GCN-NEXT:    s_bfe_u32 s3, s1, 0x80008
1369; GCN-NEXT:    s_lshr_b32 s0, s1, 24
1370; GCN-NEXT:    s_and_b32 s2, s1, 0xff
1371; GCN-NEXT:    s_bfe_u32 s1, s1, 0x80010
1372; GCN-NEXT:    s_lshl_b32 s3, s3, 8
1373; GCN-NEXT:    s_or_b32 s2, s2, s3
1374; GCN-NEXT:    s_lshl_b32 s1, s1, 16
1375; GCN-NEXT:    s_or_b32 s1, s2, s1
1376; GCN-NEXT:    s_lshl_b32 s0, s0, 24
1377; GCN-NEXT:    s_or_b32 s0, s1, s0
1378; GCN-NEXT:    s_lshr_b32 s0, s0, 8
1379; GCN-NEXT:    ; return to shader part epilog
1380;
1381; GFX10-LABEL: extractelement_sgpr_v8i8_idx5:
1382; GFX10:       ; %bb.0:
1383; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1384; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1385; GFX10-NEXT:    s_bfe_u32 s2, s1, 0x80008
1386; GFX10-NEXT:    s_and_b32 s0, s1, 0xff
1387; GFX10-NEXT:    s_bfe_u32 s3, s1, 0x80010
1388; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
1389; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
1390; GFX10-NEXT:    s_or_b32 s0, s0, s2
1391; GFX10-NEXT:    s_lshr_b32 s1, s1, 24
1392; GFX10-NEXT:    s_or_b32 s0, s0, s3
1393; GFX10-NEXT:    s_lshl_b32 s1, s1, 24
1394; GFX10-NEXT:    s_or_b32 s0, s0, s1
1395; GFX10-NEXT:    s_lshr_b32 s0, s0, 8
1396; GFX10-NEXT:    ; return to shader part epilog
1397  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1398  %element = extractelement <8 x i8> %vector, i32 5
1399  ret i8 %element
1400}
1401
1402define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx6(<8 x i8> addrspace(4)* inreg %ptr) {
1403; GCN-LABEL: extractelement_sgpr_v8i8_idx6:
1404; GCN:       ; %bb.0:
1405; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1406; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1407; GCN-NEXT:    s_bfe_u32 s3, s1, 0x80008
1408; GCN-NEXT:    s_lshr_b32 s0, s1, 24
1409; GCN-NEXT:    s_and_b32 s2, s1, 0xff
1410; GCN-NEXT:    s_bfe_u32 s1, s1, 0x80010
1411; GCN-NEXT:    s_lshl_b32 s3, s3, 8
1412; GCN-NEXT:    s_or_b32 s2, s2, s3
1413; GCN-NEXT:    s_lshl_b32 s1, s1, 16
1414; GCN-NEXT:    s_or_b32 s1, s2, s1
1415; GCN-NEXT:    s_lshl_b32 s0, s0, 24
1416; GCN-NEXT:    s_or_b32 s0, s1, s0
1417; GCN-NEXT:    s_lshr_b32 s0, s0, 16
1418; GCN-NEXT:    ; return to shader part epilog
1419;
1420; GFX10-LABEL: extractelement_sgpr_v8i8_idx6:
1421; GFX10:       ; %bb.0:
1422; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1423; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1424; GFX10-NEXT:    s_bfe_u32 s2, s1, 0x80008
1425; GFX10-NEXT:    s_and_b32 s0, s1, 0xff
1426; GFX10-NEXT:    s_bfe_u32 s3, s1, 0x80010
1427; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
1428; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
1429; GFX10-NEXT:    s_or_b32 s0, s0, s2
1430; GFX10-NEXT:    s_lshr_b32 s1, s1, 24
1431; GFX10-NEXT:    s_or_b32 s0, s0, s3
1432; GFX10-NEXT:    s_lshl_b32 s1, s1, 24
1433; GFX10-NEXT:    s_or_b32 s0, s0, s1
1434; GFX10-NEXT:    s_lshr_b32 s0, s0, 16
1435; GFX10-NEXT:    ; return to shader part epilog
1436  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1437  %element = extractelement <8 x i8> %vector, i32 6
1438  ret i8 %element
1439}
1440
1441define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx7(<8 x i8> addrspace(4)* inreg %ptr) {
1442; GCN-LABEL: extractelement_sgpr_v8i8_idx7:
1443; GCN:       ; %bb.0:
1444; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1445; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1446; GCN-NEXT:    s_bfe_u32 s3, s1, 0x80008
1447; GCN-NEXT:    s_lshr_b32 s0, s1, 24
1448; GCN-NEXT:    s_and_b32 s2, s1, 0xff
1449; GCN-NEXT:    s_bfe_u32 s1, s1, 0x80010
1450; GCN-NEXT:    s_lshl_b32 s3, s3, 8
1451; GCN-NEXT:    s_or_b32 s2, s2, s3
1452; GCN-NEXT:    s_lshl_b32 s1, s1, 16
1453; GCN-NEXT:    s_or_b32 s1, s2, s1
1454; GCN-NEXT:    s_lshl_b32 s0, s0, 24
1455; GCN-NEXT:    s_or_b32 s0, s1, s0
1456; GCN-NEXT:    s_lshr_b32 s0, s0, 24
1457; GCN-NEXT:    ; return to shader part epilog
1458;
1459; GFX10-LABEL: extractelement_sgpr_v8i8_idx7:
1460; GFX10:       ; %bb.0:
1461; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1462; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1463; GFX10-NEXT:    s_bfe_u32 s2, s1, 0x80008
1464; GFX10-NEXT:    s_and_b32 s0, s1, 0xff
1465; GFX10-NEXT:    s_bfe_u32 s3, s1, 0x80010
1466; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
1467; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
1468; GFX10-NEXT:    s_or_b32 s0, s0, s2
1469; GFX10-NEXT:    s_lshr_b32 s1, s1, 24
1470; GFX10-NEXT:    s_or_b32 s0, s0, s3
1471; GFX10-NEXT:    s_lshl_b32 s1, s1, 24
1472; GFX10-NEXT:    s_or_b32 s0, s0, s1
1473; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
1474; GFX10-NEXT:    ; return to shader part epilog
1475  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1476  %element = extractelement <8 x i8> %vector, i32 7
1477  ret i8 %element
1478}
1479
1480define i8 @extractelement_vgpr_v8i8_idx0(<8 x i8> addrspace(1)* %ptr) {
1481; GFX9-LABEL: extractelement_vgpr_v8i8_idx0:
1482; GFX9:       ; %bb.0:
1483; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1484; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1485; GFX9-NEXT:    v_mov_b32_e32 v2, 8
1486; GFX9-NEXT:    s_waitcnt vmcnt(0)
1487; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
1488; GFX9-NEXT:    v_mov_b32_e32 v3, 16
1489; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
1490; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1491; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1492; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v2
1493; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
1494; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
1495; GFX9-NEXT:    s_setpc_b64 s[30:31]
1496;
1497; GFX8-LABEL: extractelement_vgpr_v8i8_idx0:
1498; GFX8:       ; %bb.0:
1499; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1500; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
1501; GFX8-NEXT:    s_waitcnt vmcnt(0)
1502; GFX8-NEXT:    v_mov_b32_e32 v1, 8
1503; GFX8-NEXT:    v_mov_b32_e32 v2, 16
1504; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1505; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
1506; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1507; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1508; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
1509; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1510; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1511; GFX8-NEXT:    s_setpc_b64 s[30:31]
1512;
1513; GFX7-LABEL: extractelement_vgpr_v8i8_idx0:
1514; GFX7:       ; %bb.0:
1515; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1516; GFX7-NEXT:    s_mov_b32 s6, 0
1517; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1518; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1519; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1520; GFX7-NEXT:    s_waitcnt vmcnt(0)
1521; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
1522; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
1523; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
1524; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
1525; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
1526; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1527; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
1528; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
1529; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
1530; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
1531; GFX7-NEXT:    s_setpc_b64 s[30:31]
1532;
1533; GFX10-LABEL: extractelement_vgpr_v8i8_idx0:
1534; GFX10:       ; %bb.0:
1535; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1536; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1537; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1538; GFX10-NEXT:    s_waitcnt vmcnt(0)
1539; GFX10-NEXT:    v_mov_b32_e32 v1, 8
1540; GFX10-NEXT:    v_mov_b32_e32 v2, 16
1541; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1542; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
1543; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1544; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
1545; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1546; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
1547; GFX10-NEXT:    s_setpc_b64 s[30:31]
1548  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
1549  %element = extractelement <8 x i8> %vector, i32 0
1550  ret i8 %element
1551}
1552
1553define i8 @extractelement_vgpr_v8i8_idx1(<8 x i8> addrspace(1)* %ptr) {
1554; GFX9-LABEL: extractelement_vgpr_v8i8_idx1:
1555; GFX9:       ; %bb.0:
1556; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1557; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1558; GFX9-NEXT:    s_mov_b32 s4, 8
1559; GFX9-NEXT:    s_waitcnt vmcnt(0)
1560; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
1561; GFX9-NEXT:    v_mov_b32_e32 v2, 16
1562; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
1563; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1564; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1565; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v4
1566; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1567; GFX9-NEXT:    v_or3_b32 v0, v0, v2, v1
1568; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
1569; GFX9-NEXT:    s_setpc_b64 s[30:31]
1570;
1571; GFX8-LABEL: extractelement_vgpr_v8i8_idx1:
1572; GFX8:       ; %bb.0:
1573; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1574; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
1575; GFX8-NEXT:    s_waitcnt vmcnt(0)
1576; GFX8-NEXT:    v_mov_b32_e32 v1, 8
1577; GFX8-NEXT:    v_mov_b32_e32 v2, 16
1578; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1579; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
1580; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1581; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1582; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
1583; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1584; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1585; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
1586; GFX8-NEXT:    s_setpc_b64 s[30:31]
1587;
1588; GFX7-LABEL: extractelement_vgpr_v8i8_idx1:
1589; GFX7:       ; %bb.0:
1590; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1591; GFX7-NEXT:    s_mov_b32 s6, 0
1592; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1593; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1594; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1595; GFX7-NEXT:    s_waitcnt vmcnt(0)
1596; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
1597; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
1598; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
1599; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
1600; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
1601; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1602; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
1603; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
1604; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
1605; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
1606; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
1607; GFX7-NEXT:    s_setpc_b64 s[30:31]
1608;
1609; GFX10-LABEL: extractelement_vgpr_v8i8_idx1:
1610; GFX10:       ; %bb.0:
1611; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1612; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1613; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1614; GFX10-NEXT:    s_mov_b32 s4, 8
1615; GFX10-NEXT:    s_waitcnt vmcnt(0)
1616; GFX10-NEXT:    v_mov_b32_e32 v1, 16
1617; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1618; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
1619; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1620; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v2
1621; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
1622; GFX10-NEXT:    v_or3_b32 v0, v0, v1, v2
1623; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
1624; GFX10-NEXT:    s_setpc_b64 s[30:31]
1625  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
1626  %element = extractelement <8 x i8> %vector, i32 1
1627  ret i8 %element
1628}
1629
1630define i8 @extractelement_vgpr_v8i8_idx2(<8 x i8> addrspace(1)* %ptr) {
1631; GFX9-LABEL: extractelement_vgpr_v8i8_idx2:
1632; GFX9:       ; %bb.0:
1633; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1634; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1635; GFX9-NEXT:    v_mov_b32_e32 v2, 8
1636; GFX9-NEXT:    s_mov_b32 s4, 16
1637; GFX9-NEXT:    s_waitcnt vmcnt(0)
1638; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
1639; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
1640; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1641; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1642; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v2
1643; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1644; GFX9-NEXT:    v_or3_b32 v0, v0, v4, v1
1645; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1646; GFX9-NEXT:    s_setpc_b64 s[30:31]
1647;
1648; GFX8-LABEL: extractelement_vgpr_v8i8_idx2:
1649; GFX8:       ; %bb.0:
1650; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1651; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
1652; GFX8-NEXT:    s_waitcnt vmcnt(0)
1653; GFX8-NEXT:    v_mov_b32_e32 v1, 8
1654; GFX8-NEXT:    v_mov_b32_e32 v2, 16
1655; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1656; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
1657; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1658; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1659; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
1660; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1661; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1662; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1663; GFX8-NEXT:    s_setpc_b64 s[30:31]
1664;
1665; GFX7-LABEL: extractelement_vgpr_v8i8_idx2:
1666; GFX7:       ; %bb.0:
1667; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1668; GFX7-NEXT:    s_mov_b32 s6, 0
1669; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1670; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1671; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1672; GFX7-NEXT:    s_waitcnt vmcnt(0)
1673; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
1674; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
1675; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
1676; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
1677; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
1678; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1679; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
1680; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
1681; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
1682; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
1683; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1684; GFX7-NEXT:    s_setpc_b64 s[30:31]
1685;
1686; GFX10-LABEL: extractelement_vgpr_v8i8_idx2:
1687; GFX10:       ; %bb.0:
1688; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1689; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1690; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1691; GFX10-NEXT:    s_waitcnt vmcnt(0)
1692; GFX10-NEXT:    v_mov_b32_e32 v1, 8
1693; GFX10-NEXT:    s_mov_b32 s4, 16
1694; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1695; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
1696; GFX10-NEXT:    v_lshlrev_b32_sdwa v3, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1697; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
1698; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
1699; GFX10-NEXT:    v_or3_b32 v0, v0, v3, v1
1700; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1701; GFX10-NEXT:    s_setpc_b64 s[30:31]
1702  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
1703  %element = extractelement <8 x i8> %vector, i32 2
1704  ret i8 %element
1705}
1706
1707define i8 @extractelement_vgpr_v8i8_idx3(<8 x i8> addrspace(1)* %ptr) {
1708; GFX9-LABEL: extractelement_vgpr_v8i8_idx3:
1709; GFX9:       ; %bb.0:
1710; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1711; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1712; GFX9-NEXT:    v_mov_b32_e32 v2, 8
1713; GFX9-NEXT:    s_waitcnt vmcnt(0)
1714; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
1715; GFX9-NEXT:    v_mov_b32_e32 v3, 16
1716; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
1717; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1718; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1719; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v2
1720; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
1721; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
1722; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
1723; GFX9-NEXT:    s_setpc_b64 s[30:31]
1724;
1725; GFX8-LABEL: extractelement_vgpr_v8i8_idx3:
1726; GFX8:       ; %bb.0:
1727; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1728; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
1729; GFX8-NEXT:    s_waitcnt vmcnt(0)
1730; GFX8-NEXT:    v_mov_b32_e32 v1, 8
1731; GFX8-NEXT:    v_mov_b32_e32 v2, 16
1732; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1733; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
1734; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1735; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1736; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
1737; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1738; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1739; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
1740; GFX8-NEXT:    s_setpc_b64 s[30:31]
1741;
1742; GFX7-LABEL: extractelement_vgpr_v8i8_idx3:
1743; GFX7:       ; %bb.0:
1744; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1745; GFX7-NEXT:    s_mov_b32 s6, 0
1746; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1747; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1748; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1749; GFX7-NEXT:    s_waitcnt vmcnt(0)
1750; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
1751; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
1752; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
1753; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
1754; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
1755; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1756; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
1757; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
1758; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
1759; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
1760; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
1761; GFX7-NEXT:    s_setpc_b64 s[30:31]
1762;
1763; GFX10-LABEL: extractelement_vgpr_v8i8_idx3:
1764; GFX10:       ; %bb.0:
1765; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1766; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1767; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1768; GFX10-NEXT:    s_waitcnt vmcnt(0)
1769; GFX10-NEXT:    v_mov_b32_e32 v1, 8
1770; GFX10-NEXT:    v_mov_b32_e32 v2, 16
1771; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1772; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
1773; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1774; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
1775; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1776; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
1777; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
1778; GFX10-NEXT:    s_setpc_b64 s[30:31]
1779  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
1780  %element = extractelement <8 x i8> %vector, i32 3
1781  ret i8 %element
1782}
1783
1784define i8 @extractelement_vgpr_v8i8_idx4(<8 x i8> addrspace(1)* %ptr) {
1785; GFX9-LABEL: extractelement_vgpr_v8i8_idx4:
1786; GFX9:       ; %bb.0:
1787; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1788; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1789; GFX9-NEXT:    v_mov_b32_e32 v2, 8
1790; GFX9-NEXT:    s_waitcnt vmcnt(0)
1791; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
1792; GFX9-NEXT:    v_mov_b32_e32 v3, 16
1793; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v1
1794; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1795; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1796; GFX9-NEXT:    v_and_or_b32 v0, v1, v0, v2
1797; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
1798; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
1799; GFX9-NEXT:    s_setpc_b64 s[30:31]
1800;
1801; GFX8-LABEL: extractelement_vgpr_v8i8_idx4:
1802; GFX8:       ; %bb.0:
1803; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1804; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
1805; GFX8-NEXT:    s_waitcnt vmcnt(0)
1806; GFX8-NEXT:    v_mov_b32_e32 v0, 8
1807; GFX8-NEXT:    v_mov_b32_e32 v2, 16
1808; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1809; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
1810; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1811; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1812; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
1813; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1814; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1815; GFX8-NEXT:    s_setpc_b64 s[30:31]
1816;
1817; GFX7-LABEL: extractelement_vgpr_v8i8_idx4:
1818; GFX7:       ; %bb.0:
1819; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1820; GFX7-NEXT:    s_mov_b32 s6, 0
1821; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1822; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1823; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1824; GFX7-NEXT:    s_waitcnt vmcnt(0)
1825; GFX7-NEXT:    v_bfe_u32 v3, v1, 8, 8
1826; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v1
1827; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v1
1828; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
1829; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
1830; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1831; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
1832; GFX7-NEXT:    v_or_b32_e32 v1, v2, v1
1833; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
1834; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
1835; GFX7-NEXT:    s_setpc_b64 s[30:31]
1836;
1837; GFX10-LABEL: extractelement_vgpr_v8i8_idx4:
1838; GFX10:       ; %bb.0:
1839; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1840; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1841; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1842; GFX10-NEXT:    s_waitcnt vmcnt(0)
1843; GFX10-NEXT:    v_mov_b32_e32 v0, 8
1844; GFX10-NEXT:    v_mov_b32_e32 v2, 16
1845; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1846; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
1847; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1848; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v1, v0
1849; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1850; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
1851; GFX10-NEXT:    s_setpc_b64 s[30:31]
1852  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
1853  %element = extractelement <8 x i8> %vector, i32 4
1854  ret i8 %element
1855}
1856
1857define i8 @extractelement_vgpr_v8i8_idx5(<8 x i8> addrspace(1)* %ptr) {
1858; GFX9-LABEL: extractelement_vgpr_v8i8_idx5:
1859; GFX9:       ; %bb.0:
1860; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1861; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1862; GFX9-NEXT:    s_mov_b32 s4, 8
1863; GFX9-NEXT:    s_waitcnt vmcnt(0)
1864; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
1865; GFX9-NEXT:    v_mov_b32_e32 v2, 16
1866; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
1867; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1868; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1869; GFX9-NEXT:    v_and_or_b32 v0, v1, v0, v4
1870; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1871; GFX9-NEXT:    v_or3_b32 v0, v0, v2, v1
1872; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
1873; GFX9-NEXT:    s_setpc_b64 s[30:31]
1874;
1875; GFX8-LABEL: extractelement_vgpr_v8i8_idx5:
1876; GFX8:       ; %bb.0:
1877; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1878; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
1879; GFX8-NEXT:    s_waitcnt vmcnt(0)
1880; GFX8-NEXT:    v_mov_b32_e32 v0, 8
1881; GFX8-NEXT:    v_mov_b32_e32 v2, 16
1882; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1883; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
1884; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1885; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1886; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
1887; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1888; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1889; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
1890; GFX8-NEXT:    s_setpc_b64 s[30:31]
1891;
1892; GFX7-LABEL: extractelement_vgpr_v8i8_idx5:
1893; GFX7:       ; %bb.0:
1894; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1895; GFX7-NEXT:    s_mov_b32 s6, 0
1896; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1897; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1898; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1899; GFX7-NEXT:    s_waitcnt vmcnt(0)
1900; GFX7-NEXT:    v_bfe_u32 v3, v1, 8, 8
1901; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v1
1902; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v1
1903; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
1904; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
1905; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1906; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
1907; GFX7-NEXT:    v_or_b32_e32 v1, v2, v1
1908; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
1909; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
1910; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
1911; GFX7-NEXT:    s_setpc_b64 s[30:31]
1912;
1913; GFX10-LABEL: extractelement_vgpr_v8i8_idx5:
1914; GFX10:       ; %bb.0:
1915; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1916; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1917; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1918; GFX10-NEXT:    s_mov_b32 s4, 8
1919; GFX10-NEXT:    s_waitcnt vmcnt(0)
1920; GFX10-NEXT:    v_mov_b32_e32 v0, 16
1921; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1922; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
1923; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1924; GFX10-NEXT:    v_and_or_b32 v1, 0xff, v1, v2
1925; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
1926; GFX10-NEXT:    v_or3_b32 v0, v1, v0, v2
1927; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
1928; GFX10-NEXT:    s_setpc_b64 s[30:31]
1929  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
1930  %element = extractelement <8 x i8> %vector, i32 5
1931  ret i8 %element
1932}
1933
1934define i8 @extractelement_vgpr_v8i8_idx6(<8 x i8> addrspace(1)* %ptr) {
1935; GFX9-LABEL: extractelement_vgpr_v8i8_idx6:
1936; GFX9:       ; %bb.0:
1937; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1938; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1939; GFX9-NEXT:    v_mov_b32_e32 v2, 8
1940; GFX9-NEXT:    s_mov_b32 s4, 16
1941; GFX9-NEXT:    s_waitcnt vmcnt(0)
1942; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
1943; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
1944; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1945; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1946; GFX9-NEXT:    v_and_or_b32 v0, v1, v0, v2
1947; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1948; GFX9-NEXT:    v_or3_b32 v0, v0, v4, v1
1949; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1950; GFX9-NEXT:    s_setpc_b64 s[30:31]
1951;
1952; GFX8-LABEL: extractelement_vgpr_v8i8_idx6:
1953; GFX8:       ; %bb.0:
1954; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1955; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
1956; GFX8-NEXT:    s_waitcnt vmcnt(0)
1957; GFX8-NEXT:    v_mov_b32_e32 v0, 8
1958; GFX8-NEXT:    v_mov_b32_e32 v2, 16
1959; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1960; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
1961; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1962; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1963; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
1964; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
1965; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1966; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1967; GFX8-NEXT:    s_setpc_b64 s[30:31]
1968;
1969; GFX7-LABEL: extractelement_vgpr_v8i8_idx6:
1970; GFX7:       ; %bb.0:
1971; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1972; GFX7-NEXT:    s_mov_b32 s6, 0
1973; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1974; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1975; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1976; GFX7-NEXT:    s_waitcnt vmcnt(0)
1977; GFX7-NEXT:    v_bfe_u32 v3, v1, 8, 8
1978; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v1
1979; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v1
1980; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
1981; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
1982; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1983; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
1984; GFX7-NEXT:    v_or_b32_e32 v1, v2, v1
1985; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
1986; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
1987; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1988; GFX7-NEXT:    s_setpc_b64 s[30:31]
1989;
1990; GFX10-LABEL: extractelement_vgpr_v8i8_idx6:
1991; GFX10:       ; %bb.0:
1992; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1993; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1994; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1995; GFX10-NEXT:    s_waitcnt vmcnt(0)
1996; GFX10-NEXT:    v_mov_b32_e32 v0, 8
1997; GFX10-NEXT:    s_mov_b32 s4, 16
1998; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1999; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 24, v1
2000; GFX10-NEXT:    v_lshlrev_b32_sdwa v3, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2001; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v1, v0
2002; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
2003; GFX10-NEXT:    v_or3_b32 v0, v0, v3, v1
2004; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2005; GFX10-NEXT:    s_setpc_b64 s[30:31]
2006  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
2007  %element = extractelement <8 x i8> %vector, i32 6
2008  ret i8 %element
2009}
2010
2011define i8 @extractelement_vgpr_v8i8_idx7(<8 x i8> addrspace(1)* %ptr) {
2012; GFX9-LABEL: extractelement_vgpr_v8i8_idx7:
2013; GFX9:       ; %bb.0:
2014; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2015; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
2016; GFX9-NEXT:    v_mov_b32_e32 v2, 8
2017; GFX9-NEXT:    s_waitcnt vmcnt(0)
2018; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
2019; GFX9-NEXT:    v_mov_b32_e32 v3, 16
2020; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v1
2021; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2022; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2023; GFX9-NEXT:    v_and_or_b32 v0, v1, v0, v2
2024; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
2025; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
2026; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2027; GFX9-NEXT:    s_setpc_b64 s[30:31]
2028;
2029; GFX8-LABEL: extractelement_vgpr_v8i8_idx7:
2030; GFX8:       ; %bb.0:
2031; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2032; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
2033; GFX8-NEXT:    s_waitcnt vmcnt(0)
2034; GFX8-NEXT:    v_mov_b32_e32 v0, 8
2035; GFX8-NEXT:    v_mov_b32_e32 v2, 16
2036; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2037; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
2038; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2039; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2040; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2041; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2042; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2043; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2044; GFX8-NEXT:    s_setpc_b64 s[30:31]
2045;
2046; GFX7-LABEL: extractelement_vgpr_v8i8_idx7:
2047; GFX7:       ; %bb.0:
2048; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2049; GFX7-NEXT:    s_mov_b32 s6, 0
2050; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2051; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2052; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
2053; GFX7-NEXT:    s_waitcnt vmcnt(0)
2054; GFX7-NEXT:    v_bfe_u32 v3, v1, 8, 8
2055; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v1
2056; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v1
2057; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
2058; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
2059; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
2060; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
2061; GFX7-NEXT:    v_or_b32_e32 v1, v2, v1
2062; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
2063; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
2064; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2065; GFX7-NEXT:    s_setpc_b64 s[30:31]
2066;
2067; GFX10-LABEL: extractelement_vgpr_v8i8_idx7:
2068; GFX10:       ; %bb.0:
2069; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2070; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2071; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
2072; GFX10-NEXT:    s_waitcnt vmcnt(0)
2073; GFX10-NEXT:    v_mov_b32_e32 v0, 8
2074; GFX10-NEXT:    v_mov_b32_e32 v2, 16
2075; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2076; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
2077; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2078; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v1, v0
2079; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2080; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
2081; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2082; GFX10-NEXT:    s_setpc_b64 s[30:31]
2083  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
2084  %element = extractelement <8 x i8> %vector, i32 7
2085  ret i8 %element
2086}
2087
2088define amdgpu_ps i8 @extractelement_sgpr_v16i8_sgpr_idx(<16 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) {
2089; GCN-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
2090; GCN:       ; %bb.0:
2091; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
2092; GCN-NEXT:    s_mov_b32 s11, 0x80008
2093; GCN-NEXT:    s_movk_i32 s9, 0xff
2094; GCN-NEXT:    s_waitcnt lgkmcnt(0)
2095; GCN-NEXT:    s_bfe_u32 s12, s0, s11
2096; GCN-NEXT:    s_and_b32 s10, s0, s9
2097; GCN-NEXT:    s_lshl_b32 s12, s12, 8
2098; GCN-NEXT:    s_or_b32 s10, s10, s12
2099; GCN-NEXT:    s_mov_b32 s12, 0x80010
2100; GCN-NEXT:    s_lshr_b32 s5, s0, 24
2101; GCN-NEXT:    s_bfe_u32 s0, s0, s12
2102; GCN-NEXT:    s_lshl_b32 s0, s0, 16
2103; GCN-NEXT:    s_or_b32 s0, s10, s0
2104; GCN-NEXT:    s_lshl_b32 s5, s5, 24
2105; GCN-NEXT:    s_bfe_u32 s10, s1, s11
2106; GCN-NEXT:    s_lshr_b32 s6, s1, 24
2107; GCN-NEXT:    s_or_b32 s0, s0, s5
2108; GCN-NEXT:    s_and_b32 s5, s1, s9
2109; GCN-NEXT:    s_lshl_b32 s10, s10, 8
2110; GCN-NEXT:    s_bfe_u32 s1, s1, s12
2111; GCN-NEXT:    s_or_b32 s5, s5, s10
2112; GCN-NEXT:    s_lshl_b32 s1, s1, 16
2113; GCN-NEXT:    s_or_b32 s1, s5, s1
2114; GCN-NEXT:    s_lshl_b32 s5, s6, 24
2115; GCN-NEXT:    s_bfe_u32 s6, s2, s11
2116; GCN-NEXT:    s_lshr_b32 s7, s2, 24
2117; GCN-NEXT:    s_or_b32 s1, s1, s5
2118; GCN-NEXT:    s_and_b32 s5, s2, s9
2119; GCN-NEXT:    s_lshl_b32 s6, s6, 8
2120; GCN-NEXT:    s_bfe_u32 s2, s2, s12
2121; GCN-NEXT:    s_or_b32 s5, s5, s6
2122; GCN-NEXT:    s_lshl_b32 s2, s2, 16
2123; GCN-NEXT:    s_or_b32 s2, s5, s2
2124; GCN-NEXT:    s_lshl_b32 s5, s7, 24
2125; GCN-NEXT:    s_bfe_u32 s6, s3, s11
2126; GCN-NEXT:    s_lshr_b32 s8, s3, 24
2127; GCN-NEXT:    s_or_b32 s2, s2, s5
2128; GCN-NEXT:    s_and_b32 s5, s3, s9
2129; GCN-NEXT:    s_lshl_b32 s6, s6, 8
2130; GCN-NEXT:    s_bfe_u32 s3, s3, s12
2131; GCN-NEXT:    s_or_b32 s5, s5, s6
2132; GCN-NEXT:    s_lshl_b32 s3, s3, 16
2133; GCN-NEXT:    s_or_b32 s3, s5, s3
2134; GCN-NEXT:    s_lshl_b32 s5, s8, 24
2135; GCN-NEXT:    s_or_b32 s3, s3, s5
2136; GCN-NEXT:    s_lshr_b32 s5, s4, 2
2137; GCN-NEXT:    s_cmp_eq_u32 s5, 1
2138; GCN-NEXT:    s_cselect_b32 s0, s1, s0
2139; GCN-NEXT:    s_cmp_eq_u32 s5, 2
2140; GCN-NEXT:    s_cselect_b32 s0, s2, s0
2141; GCN-NEXT:    s_cmp_eq_u32 s5, 3
2142; GCN-NEXT:    s_cselect_b32 s0, s3, s0
2143; GCN-NEXT:    s_and_b32 s1, s4, 3
2144; GCN-NEXT:    s_lshl_b32 s1, s1, 3
2145; GCN-NEXT:    s_lshr_b32 s0, s0, s1
2146; GCN-NEXT:    ; return to shader part epilog
2147;
2148; GFX10-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
2149; GFX10:       ; %bb.0:
2150; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
2151; GFX10-NEXT:    s_mov_b32 s6, 0x80008
2152; GFX10-NEXT:    s_movk_i32 s5, 0xff
2153; GFX10-NEXT:    s_mov_b32 s7, 0x80010
2154; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2155; GFX10-NEXT:    s_bfe_u32 s13, s0, s6
2156; GFX10-NEXT:    s_lshr_b32 s8, s0, 24
2157; GFX10-NEXT:    s_and_b32 s12, s0, s5
2158; GFX10-NEXT:    s_bfe_u32 s0, s0, s7
2159; GFX10-NEXT:    s_lshl_b32 s13, s13, 8
2160; GFX10-NEXT:    s_bfe_u32 s15, s1, s6
2161; GFX10-NEXT:    s_bfe_u32 s17, s2, s6
2162; GFX10-NEXT:    s_lshl_b32 s0, s0, 16
2163; GFX10-NEXT:    s_or_b32 s12, s12, s13
2164; GFX10-NEXT:    s_bfe_u32 s6, s3, s6
2165; GFX10-NEXT:    s_lshr_b32 s9, s1, 24
2166; GFX10-NEXT:    s_lshr_b32 s10, s2, 24
2167; GFX10-NEXT:    s_lshr_b32 s11, s3, 24
2168; GFX10-NEXT:    s_and_b32 s14, s1, s5
2169; GFX10-NEXT:    s_bfe_u32 s1, s1, s7
2170; GFX10-NEXT:    s_and_b32 s16, s2, s5
2171; GFX10-NEXT:    s_lshl_b32 s8, s8, 24
2172; GFX10-NEXT:    s_lshl_b32 s15, s15, 8
2173; GFX10-NEXT:    s_lshl_b32 s17, s17, 8
2174; GFX10-NEXT:    s_or_b32 s0, s12, s0
2175; GFX10-NEXT:    s_bfe_u32 s2, s2, s7
2176; GFX10-NEXT:    s_and_b32 s5, s3, s5
2177; GFX10-NEXT:    s_lshl_b32 s6, s6, 8
2178; GFX10-NEXT:    s_bfe_u32 s3, s3, s7
2179; GFX10-NEXT:    s_lshl_b32 s1, s1, 16
2180; GFX10-NEXT:    s_or_b32 s13, s14, s15
2181; GFX10-NEXT:    s_or_b32 s0, s0, s8
2182; GFX10-NEXT:    s_or_b32 s8, s16, s17
2183; GFX10-NEXT:    s_lshl_b32 s2, s2, 16
2184; GFX10-NEXT:    s_or_b32 s5, s5, s6
2185; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
2186; GFX10-NEXT:    s_lshl_b32 s9, s9, 24
2187; GFX10-NEXT:    s_or_b32 s1, s13, s1
2188; GFX10-NEXT:    s_or_b32 s2, s8, s2
2189; GFX10-NEXT:    s_lshl_b32 s8, s10, 24
2190; GFX10-NEXT:    s_or_b32 s3, s5, s3
2191; GFX10-NEXT:    s_lshl_b32 s5, s11, 24
2192; GFX10-NEXT:    s_lshr_b32 s6, s4, 2
2193; GFX10-NEXT:    s_or_b32 s1, s1, s9
2194; GFX10-NEXT:    s_or_b32 s2, s2, s8
2195; GFX10-NEXT:    s_or_b32 s3, s3, s5
2196; GFX10-NEXT:    s_cmp_eq_u32 s6, 1
2197; GFX10-NEXT:    s_cselect_b32 s0, s1, s0
2198; GFX10-NEXT:    s_cmp_eq_u32 s6, 2
2199; GFX10-NEXT:    s_cselect_b32 s0, s2, s0
2200; GFX10-NEXT:    s_cmp_eq_u32 s6, 3
2201; GFX10-NEXT:    s_cselect_b32 s0, s3, s0
2202; GFX10-NEXT:    s_and_b32 s1, s4, 3
2203; GFX10-NEXT:    s_lshl_b32 s1, s1, 3
2204; GFX10-NEXT:    s_lshr_b32 s0, s0, s1
2205; GFX10-NEXT:    ; return to shader part epilog
2206  %vector = load <16 x i8>, <16 x i8> addrspace(4)* %ptr
2207  %element = extractelement <16 x i8> %vector, i32 %idx
2208  ret i8 %element
2209}
2210
2211define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 inreg %idx) {
2212; GFX9-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
2213; GFX9:       ; %bb.0:
2214; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2215; GFX9-NEXT:    s_mov_b32 s0, 8
2216; GFX9-NEXT:    s_mov_b32 s1, 16
2217; GFX9-NEXT:    s_movk_i32 s3, 0xff
2218; GFX9-NEXT:    v_mov_b32_e32 v5, 8
2219; GFX9-NEXT:    v_mov_b32_e32 v4, 0xff
2220; GFX9-NEXT:    v_mov_b32_e32 v6, 16
2221; GFX9-NEXT:    s_lshr_b32 s4, s2, 2
2222; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, s4, 1
2223; GFX9-NEXT:    s_and_b32 s2, s2, 3
2224; GFX9-NEXT:    s_waitcnt vmcnt(0)
2225; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 24, v0
2226; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 24, v1
2227; GFX9-NEXT:    v_lshlrev_b32_sdwa v11, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2228; GFX9-NEXT:    v_lshlrev_b32_sdwa v13, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2229; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 24, v2
2230; GFX9-NEXT:    v_lshlrev_b32_sdwa v12, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2231; GFX9-NEXT:    v_lshlrev_b32_sdwa v14, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2232; GFX9-NEXT:    v_lshlrev_b32_sdwa v15, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2233; GFX9-NEXT:    v_and_or_b32 v0, v0, s3, v11
2234; GFX9-NEXT:    v_lshlrev_b32_e32 v7, 24, v7
2235; GFX9-NEXT:    v_and_or_b32 v1, v1, s3, v13
2236; GFX9-NEXT:    v_lshlrev_b32_e32 v8, 24, v8
2237; GFX9-NEXT:    v_lshrrev_b32_e32 v10, 24, v3
2238; GFX9-NEXT:    v_lshlrev_b32_sdwa v16, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2239; GFX9-NEXT:    v_lshlrev_b32_sdwa v5, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2240; GFX9-NEXT:    v_and_or_b32 v2, v2, v4, v15
2241; GFX9-NEXT:    v_lshlrev_b32_e32 v9, 24, v9
2242; GFX9-NEXT:    v_or3_b32 v0, v0, v12, v7
2243; GFX9-NEXT:    v_or3_b32 v1, v1, v14, v8
2244; GFX9-NEXT:    v_lshlrev_b32_sdwa v6, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2245; GFX9-NEXT:    v_and_or_b32 v3, v3, v4, v5
2246; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 24, v10
2247; GFX9-NEXT:    v_or3_b32 v2, v2, v16, v9
2248; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2249; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, s4, 2
2250; GFX9-NEXT:    v_or3_b32 v3, v3, v6, v4
2251; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2252; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, s4, 3
2253; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2254; GFX9-NEXT:    s_lshl_b32 s0, s2, 3
2255; GFX9-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
2256; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
2257; GFX9-NEXT:    ; return to shader part epilog
2258;
2259; GFX8-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
2260; GFX8:       ; %bb.0:
2261; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2262; GFX8-NEXT:    v_mov_b32_e32 v4, 8
2263; GFX8-NEXT:    v_mov_b32_e32 v5, 16
2264; GFX8-NEXT:    v_mov_b32_e32 v6, 8
2265; GFX8-NEXT:    v_mov_b32_e32 v7, 16
2266; GFX8-NEXT:    s_lshr_b32 s0, s2, 2
2267; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 1
2268; GFX8-NEXT:    s_and_b32 s1, s2, 3
2269; GFX8-NEXT:    s_waitcnt vmcnt(0)
2270; GFX8-NEXT:    v_lshlrev_b32_sdwa v12, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2271; GFX8-NEXT:    v_lshlrev_b32_sdwa v4, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2272; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 24, v0
2273; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 24, v1
2274; GFX8-NEXT:    v_lshlrev_b32_sdwa v13, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2275; GFX8-NEXT:    v_lshlrev_b32_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2276; GFX8-NEXT:    v_lshlrev_b32_sdwa v14, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2277; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2278; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2279; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 24, v2
2280; GFX8-NEXT:    v_lshlrev_b32_sdwa v15, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2281; GFX8-NEXT:    v_lshlrev_b32_sdwa v6, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2282; GFX8-NEXT:    v_lshlrev_b32_e32 v8, 24, v8
2283; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 24, v9
2284; GFX8-NEXT:    v_or_b32_sdwa v2, v2, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2285; GFX8-NEXT:    v_or_b32_e32 v0, v0, v13
2286; GFX8-NEXT:    v_or_b32_e32 v1, v1, v5
2287; GFX8-NEXT:    v_lshrrev_b32_e32 v11, 24, v3
2288; GFX8-NEXT:    v_lshlrev_b32_sdwa v7, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2289; GFX8-NEXT:    v_lshlrev_b32_e32 v9, 24, v10
2290; GFX8-NEXT:    v_or_b32_sdwa v3, v3, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2291; GFX8-NEXT:    v_or_b32_e32 v2, v2, v15
2292; GFX8-NEXT:    v_or_b32_e32 v0, v0, v8
2293; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
2294; GFX8-NEXT:    v_lshlrev_b32_e32 v6, 24, v11
2295; GFX8-NEXT:    v_or_b32_e32 v3, v3, v7
2296; GFX8-NEXT:    v_or_b32_e32 v2, v2, v9
2297; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2298; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 2
2299; GFX8-NEXT:    v_or_b32_e32 v3, v3, v6
2300; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2301; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 3
2302; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2303; GFX8-NEXT:    s_lshl_b32 s0, s1, 3
2304; GFX8-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
2305; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
2306; GFX8-NEXT:    ; return to shader part epilog
2307;
2308; GFX7-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
2309; GFX7:       ; %bb.0:
2310; GFX7-NEXT:    s_mov_b32 s6, 0
2311; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2312; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2313; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2314; GFX7-NEXT:    s_movk_i32 s0, 0xff
2315; GFX7-NEXT:    v_mov_b32_e32 v4, 0xff
2316; GFX7-NEXT:    s_lshr_b32 s1, s2, 2
2317; GFX7-NEXT:    v_cmp_eq_u32_e64 vcc, s1, 1
2318; GFX7-NEXT:    s_and_b32 s2, s2, 3
2319; GFX7-NEXT:    s_waitcnt vmcnt(0)
2320; GFX7-NEXT:    v_bfe_u32 v10, v0, 8, 8
2321; GFX7-NEXT:    v_bfe_u32 v12, v1, 8, 8
2322; GFX7-NEXT:    v_lshrrev_b32_e32 v5, 24, v0
2323; GFX7-NEXT:    v_lshrrev_b32_e32 v6, 24, v1
2324; GFX7-NEXT:    v_and_b32_e32 v9, s0, v0
2325; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
2326; GFX7-NEXT:    v_and_b32_e32 v11, s0, v1
2327; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
2328; GFX7-NEXT:    v_bfe_u32 v14, v2, 8, 8
2329; GFX7-NEXT:    v_lshlrev_b32_e32 v10, 8, v10
2330; GFX7-NEXT:    v_lshlrev_b32_e32 v12, 8, v12
2331; GFX7-NEXT:    v_lshrrev_b32_e32 v7, 24, v2
2332; GFX7-NEXT:    v_and_b32_e32 v13, v2, v4
2333; GFX7-NEXT:    v_bfe_u32 v2, v2, 16, 8
2334; GFX7-NEXT:    v_bfe_u32 v15, v3, 8, 8
2335; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
2336; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
2337; GFX7-NEXT:    v_lshlrev_b32_e32 v14, 8, v14
2338; GFX7-NEXT:    v_or_b32_e32 v9, v9, v10
2339; GFX7-NEXT:    v_or_b32_e32 v10, v11, v12
2340; GFX7-NEXT:    v_lshrrev_b32_e32 v8, 24, v3
2341; GFX7-NEXT:    v_and_b32_e32 v4, v3, v4
2342; GFX7-NEXT:    v_bfe_u32 v3, v3, 16, 8
2343; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 24, v5
2344; GFX7-NEXT:    v_lshlrev_b32_e32 v6, 24, v6
2345; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2346; GFX7-NEXT:    v_lshlrev_b32_e32 v15, 8, v15
2347; GFX7-NEXT:    v_or_b32_e32 v11, v13, v14
2348; GFX7-NEXT:    v_or_b32_e32 v0, v9, v0
2349; GFX7-NEXT:    v_or_b32_e32 v1, v10, v1
2350; GFX7-NEXT:    v_lshlrev_b32_e32 v7, 24, v7
2351; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
2352; GFX7-NEXT:    v_or_b32_e32 v4, v4, v15
2353; GFX7-NEXT:    v_or_b32_e32 v2, v11, v2
2354; GFX7-NEXT:    v_or_b32_e32 v0, v0, v5
2355; GFX7-NEXT:    v_or_b32_e32 v1, v1, v6
2356; GFX7-NEXT:    v_lshlrev_b32_e32 v8, 24, v8
2357; GFX7-NEXT:    v_or_b32_e32 v3, v4, v3
2358; GFX7-NEXT:    v_or_b32_e32 v2, v2, v7
2359; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2360; GFX7-NEXT:    v_cmp_eq_u32_e64 vcc, s1, 2
2361; GFX7-NEXT:    v_or_b32_e32 v3, v3, v8
2362; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2363; GFX7-NEXT:    v_cmp_eq_u32_e64 vcc, s1, 3
2364; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2365; GFX7-NEXT:    s_lshl_b32 s0, s2, 3
2366; GFX7-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
2367; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
2368; GFX7-NEXT:    ; return to shader part epilog
2369;
2370; GFX10-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
2371; GFX10:       ; %bb.0:
2372; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2373; GFX10-NEXT:    s_mov_b32 s0, 8
2374; GFX10-NEXT:    v_mov_b32_e32 v5, 8
2375; GFX10-NEXT:    s_mov_b32 s1, 16
2376; GFX10-NEXT:    s_movk_i32 s3, 0xff
2377; GFX10-NEXT:    v_mov_b32_e32 v4, 0xff
2378; GFX10-NEXT:    v_mov_b32_e32 v6, 16
2379; GFX10-NEXT:    s_waitcnt vmcnt(0)
2380; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 24, v0
2381; GFX10-NEXT:    v_lshrrev_b32_e32 v8, 24, v1
2382; GFX10-NEXT:    v_lshlrev_b32_sdwa v10, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2383; GFX10-NEXT:    v_lshlrev_b32_sdwa v12, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2384; GFX10-NEXT:    v_lshrrev_b32_e32 v9, 24, v2
2385; GFX10-NEXT:    v_lshlrev_b32_sdwa v11, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2386; GFX10-NEXT:    v_lshlrev_b32_sdwa v13, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2387; GFX10-NEXT:    v_lshlrev_b32_sdwa v14, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2388; GFX10-NEXT:    v_and_or_b32 v0, v0, s3, v10
2389; GFX10-NEXT:    v_lshlrev_b32_e32 v7, 24, v7
2390; GFX10-NEXT:    v_and_or_b32 v1, v1, s3, v12
2391; GFX10-NEXT:    v_lshlrev_b32_e32 v8, 24, v8
2392; GFX10-NEXT:    s_lshr_b32 s0, s2, 2
2393; GFX10-NEXT:    v_lshlrev_b32_sdwa v15, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2394; GFX10-NEXT:    v_and_or_b32 v2, v2, v4, v14
2395; GFX10-NEXT:    v_lshlrev_b32_e32 v9, 24, v9
2396; GFX10-NEXT:    v_lshlrev_b32_sdwa v5, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2397; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 24, v3
2398; GFX10-NEXT:    v_or3_b32 v0, v0, v11, v7
2399; GFX10-NEXT:    v_or3_b32 v1, v1, v13, v8
2400; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s0, 1
2401; GFX10-NEXT:    v_or3_b32 v2, v2, v15, v9
2402; GFX10-NEXT:    v_and_or_b32 v4, v3, v4, v5
2403; GFX10-NEXT:    v_lshlrev_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2404; GFX10-NEXT:    v_lshlrev_b32_e32 v5, 24, v10
2405; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2406; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s0, 2
2407; GFX10-NEXT:    v_or3_b32 v1, v4, v3, v5
2408; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2409; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s0, 3
2410; GFX10-NEXT:    s_and_b32 s0, s2, 3
2411; GFX10-NEXT:    s_lshl_b32 s0, s0, 3
2412; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2413; GFX10-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
2414; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2415; GFX10-NEXT:    ; return to shader part epilog
2416  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
2417  %element = extractelement <16 x i8> %vector, i32 %idx
2418  ret i8 %element
2419}
2420
2421define i8 @extractelement_vgpr_v16i8_vgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 %idx) {
2422; GFX9-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
2423; GFX9:       ; %bb.0:
2424; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2425; GFX9-NEXT:    global_load_dwordx4 v[3:6], v[0:1], off
2426; GFX9-NEXT:    s_mov_b32 s4, 8
2427; GFX9-NEXT:    s_mov_b32 s5, 16
2428; GFX9-NEXT:    s_movk_i32 s6, 0xff
2429; GFX9-NEXT:    v_mov_b32_e32 v1, 8
2430; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
2431; GFX9-NEXT:    v_mov_b32_e32 v7, 16
2432; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 2, v2
2433; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
2434; GFX9-NEXT:    v_and_b32_e32 v2, 3, v2
2435; GFX9-NEXT:    s_waitcnt vmcnt(0)
2436; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 24, v3
2437; GFX9-NEXT:    v_lshrrev_b32_e32 v10, 24, v4
2438; GFX9-NEXT:    v_lshlrev_b32_sdwa v13, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2439; GFX9-NEXT:    v_lshlrev_b32_sdwa v15, s4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2440; GFX9-NEXT:    v_lshrrev_b32_e32 v11, 24, v5
2441; GFX9-NEXT:    v_lshrrev_b32_e32 v12, 24, v6
2442; GFX9-NEXT:    v_lshlrev_b32_sdwa v14, s5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2443; GFX9-NEXT:    v_lshlrev_b32_sdwa v16, s5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2444; GFX9-NEXT:    v_lshlrev_b32_sdwa v17, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2445; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2446; GFX9-NEXT:    v_and_or_b32 v3, v3, s6, v13
2447; GFX9-NEXT:    v_lshlrev_b32_e32 v9, 24, v9
2448; GFX9-NEXT:    v_and_or_b32 v4, v4, s6, v15
2449; GFX9-NEXT:    v_lshlrev_b32_e32 v10, 24, v10
2450; GFX9-NEXT:    v_lshlrev_b32_sdwa v18, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2451; GFX9-NEXT:    v_lshlrev_b32_sdwa v7, v7, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2452; GFX9-NEXT:    v_and_or_b32 v5, v5, v0, v17
2453; GFX9-NEXT:    v_lshlrev_b32_e32 v11, 24, v11
2454; GFX9-NEXT:    v_and_or_b32 v0, v6, v0, v1
2455; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v12
2456; GFX9-NEXT:    v_or3_b32 v3, v3, v14, v9
2457; GFX9-NEXT:    v_or3_b32 v4, v4, v16, v10
2458; GFX9-NEXT:    v_or3_b32 v5, v5, v18, v11
2459; GFX9-NEXT:    v_or3_b32 v0, v0, v7, v1
2460; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
2461; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
2462; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2463; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
2464; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
2465; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
2466; GFX9-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
2467; GFX9-NEXT:    s_setpc_b64 s[30:31]
2468;
2469; GFX8-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
2470; GFX8:       ; %bb.0:
2471; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2472; GFX8-NEXT:    flat_load_dwordx4 v[3:6], v[0:1]
2473; GFX8-NEXT:    v_mov_b32_e32 v0, 8
2474; GFX8-NEXT:    v_mov_b32_e32 v1, 16
2475; GFX8-NEXT:    v_mov_b32_e32 v7, 8
2476; GFX8-NEXT:    v_mov_b32_e32 v8, 16
2477; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 2, v2
2478; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v9
2479; GFX8-NEXT:    v_and_b32_e32 v2, 3, v2
2480; GFX8-NEXT:    s_waitcnt vmcnt(0)
2481; GFX8-NEXT:    v_lshlrev_b32_sdwa v14, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2482; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2483; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 24, v3
2484; GFX8-NEXT:    v_lshrrev_b32_e32 v11, 24, v4
2485; GFX8-NEXT:    v_lshlrev_b32_sdwa v15, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2486; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2487; GFX8-NEXT:    v_lshlrev_b32_sdwa v16, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2488; GFX8-NEXT:    v_or_b32_sdwa v3, v3, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2489; GFX8-NEXT:    v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2490; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 24, v5
2491; GFX8-NEXT:    v_lshlrev_b32_sdwa v17, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2492; GFX8-NEXT:    v_lshlrev_b32_sdwa v7, v7, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2493; GFX8-NEXT:    v_lshlrev_b32_e32 v10, 24, v10
2494; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 24, v11
2495; GFX8-NEXT:    v_or_b32_sdwa v5, v5, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2496; GFX8-NEXT:    v_or_b32_e32 v3, v3, v15
2497; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2498; GFX8-NEXT:    v_lshrrev_b32_e32 v13, 24, v6
2499; GFX8-NEXT:    v_lshlrev_b32_sdwa v8, v8, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2500; GFX8-NEXT:    v_lshlrev_b32_e32 v11, 24, v12
2501; GFX8-NEXT:    v_or_b32_sdwa v6, v6, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2502; GFX8-NEXT:    v_or_b32_e32 v1, v5, v17
2503; GFX8-NEXT:    v_or_b32_e32 v3, v3, v10
2504; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
2505; GFX8-NEXT:    v_lshlrev_b32_e32 v7, 24, v13
2506; GFX8-NEXT:    v_or_b32_e32 v5, v6, v8
2507; GFX8-NEXT:    v_or_b32_e32 v1, v1, v11
2508; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
2509; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v9
2510; GFX8-NEXT:    v_or_b32_e32 v4, v5, v7
2511; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2512; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v9
2513; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2514; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
2515; GFX8-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
2516; GFX8-NEXT:    s_setpc_b64 s[30:31]
2517;
2518; GFX7-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
2519; GFX7:       ; %bb.0:
2520; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2521; GFX7-NEXT:    s_mov_b32 s6, 0
2522; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2523; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2524; GFX7-NEXT:    buffer_load_dwordx4 v[3:6], v[0:1], s[4:7], 0 addr64
2525; GFX7-NEXT:    s_movk_i32 s4, 0xff
2526; GFX7-NEXT:    v_mov_b32_e32 v0, 0xff
2527; GFX7-NEXT:    v_lshrrev_b32_e32 v17, 2, v2
2528; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v17
2529; GFX7-NEXT:    v_and_b32_e32 v2, 3, v2
2530; GFX7-NEXT:    s_waitcnt vmcnt(0)
2531; GFX7-NEXT:    v_bfe_u32 v11, v3, 8, 8
2532; GFX7-NEXT:    v_bfe_u32 v13, v4, 8, 8
2533; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v3
2534; GFX7-NEXT:    v_lshrrev_b32_e32 v7, 24, v4
2535; GFX7-NEXT:    v_and_b32_e32 v10, s4, v3
2536; GFX7-NEXT:    v_bfe_u32 v3, v3, 16, 8
2537; GFX7-NEXT:    v_and_b32_e32 v12, s4, v4
2538; GFX7-NEXT:    v_bfe_u32 v4, v4, 16, 8
2539; GFX7-NEXT:    v_bfe_u32 v15, v5, 8, 8
2540; GFX7-NEXT:    v_lshlrev_b32_e32 v11, 8, v11
2541; GFX7-NEXT:    v_lshlrev_b32_e32 v13, 8, v13
2542; GFX7-NEXT:    v_lshrrev_b32_e32 v8, 24, v5
2543; GFX7-NEXT:    v_and_b32_e32 v14, v5, v0
2544; GFX7-NEXT:    v_bfe_u32 v5, v5, 16, 8
2545; GFX7-NEXT:    v_bfe_u32 v16, v6, 8, 8
2546; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
2547; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
2548; GFX7-NEXT:    v_lshlrev_b32_e32 v15, 8, v15
2549; GFX7-NEXT:    v_or_b32_e32 v10, v10, v11
2550; GFX7-NEXT:    v_or_b32_e32 v11, v12, v13
2551; GFX7-NEXT:    v_lshrrev_b32_e32 v9, 24, v6
2552; GFX7-NEXT:    v_and_b32_e32 v0, v6, v0
2553; GFX7-NEXT:    v_bfe_u32 v6, v6, 16, 8
2554; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
2555; GFX7-NEXT:    v_lshlrev_b32_e32 v7, 24, v7
2556; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
2557; GFX7-NEXT:    v_lshlrev_b32_e32 v16, 8, v16
2558; GFX7-NEXT:    v_or_b32_e32 v12, v14, v15
2559; GFX7-NEXT:    v_or_b32_e32 v3, v10, v3
2560; GFX7-NEXT:    v_or_b32_e32 v4, v11, v4
2561; GFX7-NEXT:    v_lshlrev_b32_e32 v8, 24, v8
2562; GFX7-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
2563; GFX7-NEXT:    v_or_b32_e32 v0, v0, v16
2564; GFX7-NEXT:    v_or_b32_e32 v5, v12, v5
2565; GFX7-NEXT:    v_or_b32_e32 v1, v3, v1
2566; GFX7-NEXT:    v_or_b32_e32 v3, v4, v7
2567; GFX7-NEXT:    v_lshlrev_b32_e32 v9, 24, v9
2568; GFX7-NEXT:    v_or_b32_e32 v0, v0, v6
2569; GFX7-NEXT:    v_or_b32_e32 v4, v5, v8
2570; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2571; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v17
2572; GFX7-NEXT:    v_or_b32_e32 v0, v0, v9
2573; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
2574; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v17
2575; GFX7-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
2576; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
2577; GFX7-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
2578; GFX7-NEXT:    s_setpc_b64 s[30:31]
2579;
2580; GFX10-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
2581; GFX10:       ; %bb.0:
2582; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2583; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2584; GFX10-NEXT:    global_load_dwordx4 v[3:6], v[0:1], off
2585; GFX10-NEXT:    s_mov_b32 s4, 8
2586; GFX10-NEXT:    v_mov_b32_e32 v1, 8
2587; GFX10-NEXT:    s_mov_b32 s5, 16
2588; GFX10-NEXT:    s_movk_i32 s6, 0xff
2589; GFX10-NEXT:    v_mov_b32_e32 v0, 0xff
2590; GFX10-NEXT:    v_mov_b32_e32 v7, 16
2591; GFX10-NEXT:    v_lshrrev_b32_e32 v8, 2, v2
2592; GFX10-NEXT:    v_and_b32_e32 v2, 3, v2
2593; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
2594; GFX10-NEXT:    s_waitcnt vmcnt(0)
2595; GFX10-NEXT:    v_lshrrev_b32_e32 v9, 24, v3
2596; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 24, v4
2597; GFX10-NEXT:    v_lshlrev_b32_sdwa v13, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2598; GFX10-NEXT:    v_lshlrev_b32_sdwa v15, s4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2599; GFX10-NEXT:    v_lshrrev_b32_e32 v11, 24, v5
2600; GFX10-NEXT:    v_lshlrev_b32_sdwa v14, s5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2601; GFX10-NEXT:    v_lshlrev_b32_sdwa v16, s5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2602; GFX10-NEXT:    v_lshlrev_b32_sdwa v17, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2603; GFX10-NEXT:    v_and_or_b32 v3, v3, s6, v13
2604; GFX10-NEXT:    v_lshlrev_b32_e32 v9, 24, v9
2605; GFX10-NEXT:    v_and_or_b32 v4, v4, s6, v15
2606; GFX10-NEXT:    v_lshlrev_b32_e32 v10, 24, v10
2607; GFX10-NEXT:    v_lshrrev_b32_e32 v12, 24, v6
2608; GFX10-NEXT:    v_lshlrev_b32_sdwa v18, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2609; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2610; GFX10-NEXT:    v_and_or_b32 v5, v5, v0, v17
2611; GFX10-NEXT:    v_lshlrev_b32_e32 v11, 24, v11
2612; GFX10-NEXT:    v_or3_b32 v3, v3, v14, v9
2613; GFX10-NEXT:    v_or3_b32 v4, v4, v16, v10
2614; GFX10-NEXT:    v_lshlrev_b32_sdwa v7, v7, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2615; GFX10-NEXT:    v_and_or_b32 v0, v6, v0, v1
2616; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v12
2617; GFX10-NEXT:    v_or3_b32 v5, v5, v18, v11
2618; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc_lo
2619; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v8
2620; GFX10-NEXT:    v_or3_b32 v0, v0, v7, v1
2621; GFX10-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc_lo
2622; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v8
2623; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
2624; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
2625; GFX10-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
2626; GFX10-NEXT:    s_setpc_b64 s[30:31]
2627  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
2628  %element = extractelement <16 x i8> %vector, i32 %idx
2629  ret i8 %element
2630}
2631
2632define amdgpu_ps i8 @extractelement_sgpr_v16i8_vgpr_idx(<16 x i8> addrspace(4)* inreg %ptr, i32 %idx) {
2633; GCN-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
2634; GCN:       ; %bb.0:
2635; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
2636; GCN-NEXT:    s_mov_b32 s10, 0x80008
2637; GCN-NEXT:    s_movk_i32 s8, 0xff
2638; GCN-NEXT:    v_lshrrev_b32_e32 v1, 2, v0
2639; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2640; GCN-NEXT:    s_waitcnt lgkmcnt(0)
2641; GCN-NEXT:    s_bfe_u32 s11, s0, s10
2642; GCN-NEXT:    s_and_b32 s9, s0, s8
2643; GCN-NEXT:    s_lshl_b32 s11, s11, 8
2644; GCN-NEXT:    s_or_b32 s9, s9, s11
2645; GCN-NEXT:    s_mov_b32 s11, 0x80010
2646; GCN-NEXT:    s_lshr_b32 s4, s0, 24
2647; GCN-NEXT:    s_bfe_u32 s0, s0, s11
2648; GCN-NEXT:    s_lshl_b32 s0, s0, 16
2649; GCN-NEXT:    s_or_b32 s0, s9, s0
2650; GCN-NEXT:    s_lshl_b32 s4, s4, 24
2651; GCN-NEXT:    s_bfe_u32 s9, s1, s10
2652; GCN-NEXT:    s_lshr_b32 s5, s1, 24
2653; GCN-NEXT:    s_or_b32 s0, s0, s4
2654; GCN-NEXT:    s_and_b32 s4, s1, s8
2655; GCN-NEXT:    s_lshl_b32 s9, s9, 8
2656; GCN-NEXT:    s_bfe_u32 s1, s1, s11
2657; GCN-NEXT:    s_or_b32 s4, s4, s9
2658; GCN-NEXT:    s_lshl_b32 s1, s1, 16
2659; GCN-NEXT:    s_or_b32 s1, s4, s1
2660; GCN-NEXT:    s_lshl_b32 s4, s5, 24
2661; GCN-NEXT:    s_bfe_u32 s5, s2, s10
2662; GCN-NEXT:    s_lshr_b32 s6, s2, 24
2663; GCN-NEXT:    s_or_b32 s1, s1, s4
2664; GCN-NEXT:    s_and_b32 s4, s2, s8
2665; GCN-NEXT:    s_lshl_b32 s5, s5, 8
2666; GCN-NEXT:    s_bfe_u32 s2, s2, s11
2667; GCN-NEXT:    s_or_b32 s4, s4, s5
2668; GCN-NEXT:    s_lshl_b32 s2, s2, 16
2669; GCN-NEXT:    s_or_b32 s2, s4, s2
2670; GCN-NEXT:    s_lshl_b32 s4, s6, 24
2671; GCN-NEXT:    s_bfe_u32 s5, s3, s10
2672; GCN-NEXT:    s_lshr_b32 s7, s3, 24
2673; GCN-NEXT:    s_or_b32 s2, s2, s4
2674; GCN-NEXT:    s_and_b32 s4, s3, s8
2675; GCN-NEXT:    s_lshl_b32 s5, s5, 8
2676; GCN-NEXT:    s_bfe_u32 s3, s3, s11
2677; GCN-NEXT:    s_or_b32 s4, s4, s5
2678; GCN-NEXT:    s_lshl_b32 s3, s3, 16
2679; GCN-NEXT:    s_or_b32 s3, s4, s3
2680; GCN-NEXT:    s_lshl_b32 s4, s7, 24
2681; GCN-NEXT:    v_mov_b32_e32 v2, s0
2682; GCN-NEXT:    v_mov_b32_e32 v3, s1
2683; GCN-NEXT:    s_or_b32 s3, s3, s4
2684; GCN-NEXT:    v_mov_b32_e32 v4, s2
2685; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
2686; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v1
2687; GCN-NEXT:    v_mov_b32_e32 v5, s3
2688; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2689; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v1
2690; GCN-NEXT:    v_and_b32_e32 v0, 3, v0
2691; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v5, vcc
2692; GCN-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
2693; GCN-NEXT:    v_lshrrev_b32_e32 v0, v0, v1
2694; GCN-NEXT:    v_readfirstlane_b32 s0, v0
2695; GCN-NEXT:    ; return to shader part epilog
2696;
2697; GFX10-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
2698; GFX10:       ; %bb.0:
2699; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
2700; GFX10-NEXT:    s_mov_b32 s5, 0x80008
2701; GFX10-NEXT:    s_movk_i32 s4, 0xff
2702; GFX10-NEXT:    s_mov_b32 s6, 0x80010
2703; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 2, v0
2704; GFX10-NEXT:    v_and_b32_e32 v0, 3, v0
2705; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
2706; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
2707; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2708; GFX10-NEXT:    s_bfe_u32 s12, s0, s5
2709; GFX10-NEXT:    s_bfe_u32 s14, s1, s5
2710; GFX10-NEXT:    s_lshr_b32 s8, s1, 24
2711; GFX10-NEXT:    s_and_b32 s11, s0, s4
2712; GFX10-NEXT:    s_and_b32 s13, s1, s4
2713; GFX10-NEXT:    s_bfe_u32 s1, s1, s6
2714; GFX10-NEXT:    s_lshl_b32 s12, s12, 8
2715; GFX10-NEXT:    s_lshl_b32 s14, s14, 8
2716; GFX10-NEXT:    s_lshl_b32 s1, s1, 16
2717; GFX10-NEXT:    s_or_b32 s11, s11, s12
2718; GFX10-NEXT:    s_or_b32 s12, s13, s14
2719; GFX10-NEXT:    s_lshl_b32 s8, s8, 24
2720; GFX10-NEXT:    s_or_b32 s1, s12, s1
2721; GFX10-NEXT:    s_lshr_b32 s7, s0, 24
2722; GFX10-NEXT:    s_bfe_u32 s0, s0, s6
2723; GFX10-NEXT:    s_or_b32 s1, s1, s8
2724; GFX10-NEXT:    s_lshl_b32 s0, s0, 16
2725; GFX10-NEXT:    s_bfe_u32 s16, s2, s5
2726; GFX10-NEXT:    v_mov_b32_e32 v2, s1
2727; GFX10-NEXT:    s_lshl_b32 s7, s7, 24
2728; GFX10-NEXT:    s_or_b32 s0, s11, s0
2729; GFX10-NEXT:    s_lshr_b32 s9, s2, 24
2730; GFX10-NEXT:    s_and_b32 s15, s2, s4
2731; GFX10-NEXT:    s_lshl_b32 s16, s16, 8
2732; GFX10-NEXT:    s_bfe_u32 s2, s2, s6
2733; GFX10-NEXT:    s_or_b32 s0, s0, s7
2734; GFX10-NEXT:    s_or_b32 s7, s15, s16
2735; GFX10-NEXT:    s_lshl_b32 s2, s2, 16
2736; GFX10-NEXT:    s_bfe_u32 s5, s3, s5
2737; GFX10-NEXT:    v_cndmask_b32_e32 v2, s0, v2, vcc_lo
2738; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v1
2739; GFX10-NEXT:    s_or_b32 s2, s7, s2
2740; GFX10-NEXT:    s_lshl_b32 s7, s9, 24
2741; GFX10-NEXT:    s_and_b32 s4, s3, s4
2742; GFX10-NEXT:    s_lshl_b32 s5, s5, 8
2743; GFX10-NEXT:    s_bfe_u32 s1, s3, s6
2744; GFX10-NEXT:    s_or_b32 s2, s2, s7
2745; GFX10-NEXT:    s_lshr_b32 s10, s3, 24
2746; GFX10-NEXT:    s_or_b32 s3, s4, s5
2747; GFX10-NEXT:    s_lshl_b32 s1, s1, 16
2748; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s2, vcc_lo
2749; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v1
2750; GFX10-NEXT:    s_or_b32 s0, s3, s1
2751; GFX10-NEXT:    s_lshl_b32 s1, s10, 24
2752; GFX10-NEXT:    s_or_b32 s3, s0, s1
2753; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, s3, vcc_lo
2754; GFX10-NEXT:    v_lshrrev_b32_e32 v0, v0, v1
2755; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2756; GFX10-NEXT:    ; return to shader part epilog
2757  %vector = load <16 x i8>, <16 x i8> addrspace(4)* %ptr
2758  %element = extractelement <16 x i8> %vector, i32 %idx
2759  ret i8 %element
2760}
2761
2762define i8 @extractelement_vgpr_v16i8_idx0(<16 x i8> addrspace(1)* %ptr) {
2763; GFX9-LABEL: extractelement_vgpr_v16i8_idx0:
2764; GFX9:       ; %bb.0:
2765; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2766; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2767; GFX9-NEXT:    s_waitcnt vmcnt(0)
2768; GFX9-NEXT:    v_mov_b32_e32 v2, 8
2769; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
2770; GFX9-NEXT:    v_mov_b32_e32 v3, 16
2771; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
2772; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2773; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2774; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v2
2775; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
2776; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
2777; GFX9-NEXT:    s_setpc_b64 s[30:31]
2778;
2779; GFX8-LABEL: extractelement_vgpr_v16i8_idx0:
2780; GFX8:       ; %bb.0:
2781; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2782; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2783; GFX8-NEXT:    s_waitcnt vmcnt(0)
2784; GFX8-NEXT:    v_mov_b32_e32 v1, 8
2785; GFX8-NEXT:    v_mov_b32_e32 v2, 16
2786; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2787; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2788; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2789; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2790; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2791; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2792; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2793; GFX8-NEXT:    s_setpc_b64 s[30:31]
2794;
2795; GFX7-LABEL: extractelement_vgpr_v16i8_idx0:
2796; GFX7:       ; %bb.0:
2797; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2798; GFX7-NEXT:    s_mov_b32 s6, 0
2799; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2800; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2801; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2802; GFX7-NEXT:    s_waitcnt vmcnt(0)
2803; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
2804; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
2805; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
2806; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
2807; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
2808; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
2809; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
2810; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
2811; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
2812; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2813; GFX7-NEXT:    s_setpc_b64 s[30:31]
2814;
2815; GFX10-LABEL: extractelement_vgpr_v16i8_idx0:
2816; GFX10:       ; %bb.0:
2817; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2818; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2819; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2820; GFX10-NEXT:    s_waitcnt vmcnt(0)
2821; GFX10-NEXT:    v_mov_b32_e32 v1, 8
2822; GFX10-NEXT:    v_mov_b32_e32 v2, 16
2823; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2824; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2825; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2826; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
2827; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2828; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
2829; GFX10-NEXT:    s_setpc_b64 s[30:31]
2830  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
2831  %element = extractelement <16 x i8> %vector, i32 0
2832  ret i8 %element
2833}
2834
2835define i8 @extractelement_vgpr_v16i8_idx1(<16 x i8> addrspace(1)* %ptr) {
2836; GFX9-LABEL: extractelement_vgpr_v16i8_idx1:
2837; GFX9:       ; %bb.0:
2838; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2839; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2840; GFX9-NEXT:    s_mov_b32 s4, 8
2841; GFX9-NEXT:    s_waitcnt vmcnt(0)
2842; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
2843; GFX9-NEXT:    v_mov_b32_e32 v2, 16
2844; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2845; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2846; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2847; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v4
2848; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2849; GFX9-NEXT:    v_or3_b32 v0, v0, v2, v1
2850; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2851; GFX9-NEXT:    s_setpc_b64 s[30:31]
2852;
2853; GFX8-LABEL: extractelement_vgpr_v16i8_idx1:
2854; GFX8:       ; %bb.0:
2855; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2856; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2857; GFX8-NEXT:    s_waitcnt vmcnt(0)
2858; GFX8-NEXT:    v_mov_b32_e32 v1, 8
2859; GFX8-NEXT:    v_mov_b32_e32 v2, 16
2860; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2861; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2862; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2863; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2864; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2865; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2866; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2867; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2868; GFX8-NEXT:    s_setpc_b64 s[30:31]
2869;
2870; GFX7-LABEL: extractelement_vgpr_v16i8_idx1:
2871; GFX7:       ; %bb.0:
2872; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2873; GFX7-NEXT:    s_mov_b32 s6, 0
2874; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2875; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2876; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2877; GFX7-NEXT:    s_waitcnt vmcnt(0)
2878; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
2879; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
2880; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
2881; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
2882; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
2883; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
2884; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
2885; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
2886; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
2887; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2888; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2889; GFX7-NEXT:    s_setpc_b64 s[30:31]
2890;
2891; GFX10-LABEL: extractelement_vgpr_v16i8_idx1:
2892; GFX10:       ; %bb.0:
2893; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2894; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2895; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2896; GFX10-NEXT:    s_mov_b32 s4, 8
2897; GFX10-NEXT:    s_waitcnt vmcnt(0)
2898; GFX10-NEXT:    v_mov_b32_e32 v1, 16
2899; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2900; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2901; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2902; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v2
2903; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
2904; GFX10-NEXT:    v_or3_b32 v0, v0, v1, v2
2905; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2906; GFX10-NEXT:    s_setpc_b64 s[30:31]
2907  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
2908  %element = extractelement <16 x i8> %vector, i32 1
2909  ret i8 %element
2910}
2911
2912define i8 @extractelement_vgpr_v16i8_idx2(<16 x i8> addrspace(1)* %ptr) {
2913; GFX9-LABEL: extractelement_vgpr_v16i8_idx2:
2914; GFX9:       ; %bb.0:
2915; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2916; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2917; GFX9-NEXT:    s_waitcnt vmcnt(0)
2918; GFX9-NEXT:    v_mov_b32_e32 v2, 8
2919; GFX9-NEXT:    s_mov_b32 s4, 16
2920; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
2921; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2922; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2923; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2924; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v2
2925; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2926; GFX9-NEXT:    v_or3_b32 v0, v0, v4, v1
2927; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2928; GFX9-NEXT:    s_setpc_b64 s[30:31]
2929;
2930; GFX8-LABEL: extractelement_vgpr_v16i8_idx2:
2931; GFX8:       ; %bb.0:
2932; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2933; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2934; GFX8-NEXT:    s_waitcnt vmcnt(0)
2935; GFX8-NEXT:    v_mov_b32_e32 v1, 8
2936; GFX8-NEXT:    v_mov_b32_e32 v2, 16
2937; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2938; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2939; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2940; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2941; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2942; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2943; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2944; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2945; GFX8-NEXT:    s_setpc_b64 s[30:31]
2946;
2947; GFX7-LABEL: extractelement_vgpr_v16i8_idx2:
2948; GFX7:       ; %bb.0:
2949; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2950; GFX7-NEXT:    s_mov_b32 s6, 0
2951; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2952; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2953; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
2954; GFX7-NEXT:    s_waitcnt vmcnt(0)
2955; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
2956; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
2957; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
2958; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
2959; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
2960; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
2961; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
2962; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
2963; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
2964; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2965; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2966; GFX7-NEXT:    s_setpc_b64 s[30:31]
2967;
2968; GFX10-LABEL: extractelement_vgpr_v16i8_idx2:
2969; GFX10:       ; %bb.0:
2970; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2971; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2972; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2973; GFX10-NEXT:    s_waitcnt vmcnt(0)
2974; GFX10-NEXT:    v_mov_b32_e32 v1, 8
2975; GFX10-NEXT:    s_mov_b32 s4, 16
2976; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2977; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
2978; GFX10-NEXT:    v_lshlrev_b32_sdwa v3, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2979; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
2980; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
2981; GFX10-NEXT:    v_or3_b32 v0, v0, v3, v1
2982; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2983; GFX10-NEXT:    s_setpc_b64 s[30:31]
2984  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
2985  %element = extractelement <16 x i8> %vector, i32 2
2986  ret i8 %element
2987}
2988
2989define i8 @extractelement_vgpr_v16i8_idx3(<16 x i8> addrspace(1)* %ptr) {
2990; GFX9-LABEL: extractelement_vgpr_v16i8_idx3:
2991; GFX9:       ; %bb.0:
2992; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2993; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2994; GFX9-NEXT:    s_waitcnt vmcnt(0)
2995; GFX9-NEXT:    v_mov_b32_e32 v2, 8
2996; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
2997; GFX9-NEXT:    v_mov_b32_e32 v3, 16
2998; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
2999; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3000; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3001; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v2
3002; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
3003; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
3004; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3005; GFX9-NEXT:    s_setpc_b64 s[30:31]
3006;
3007; GFX8-LABEL: extractelement_vgpr_v16i8_idx3:
3008; GFX8:       ; %bb.0:
3009; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3010; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3011; GFX8-NEXT:    s_waitcnt vmcnt(0)
3012; GFX8-NEXT:    v_mov_b32_e32 v1, 8
3013; GFX8-NEXT:    v_mov_b32_e32 v2, 16
3014; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3015; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
3016; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3017; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3018; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
3019; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3020; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3021; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3022; GFX8-NEXT:    s_setpc_b64 s[30:31]
3023;
3024; GFX7-LABEL: extractelement_vgpr_v16i8_idx3:
3025; GFX7:       ; %bb.0:
3026; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3027; GFX7-NEXT:    s_mov_b32 s6, 0
3028; GFX7-NEXT:    s_mov_b32 s7, 0xf000
3029; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3030; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
3031; GFX7-NEXT:    s_waitcnt vmcnt(0)
3032; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
3033; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
3034; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
3035; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
3036; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
3037; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
3038; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
3039; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
3040; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
3041; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
3042; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3043; GFX7-NEXT:    s_setpc_b64 s[30:31]
3044;
3045; GFX10-LABEL: extractelement_vgpr_v16i8_idx3:
3046; GFX10:       ; %bb.0:
3047; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3048; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3049; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3050; GFX10-NEXT:    s_waitcnt vmcnt(0)
3051; GFX10-NEXT:    v_mov_b32_e32 v1, 8
3052; GFX10-NEXT:    v_mov_b32_e32 v2, 16
3053; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3054; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
3055; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3056; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
3057; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3058; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
3059; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3060; GFX10-NEXT:    s_setpc_b64 s[30:31]
3061  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
3062  %element = extractelement <16 x i8> %vector, i32 3
3063  ret i8 %element
3064}
3065
3066define i8 @extractelement_vgpr_v16i8_idx4(<16 x i8> addrspace(1)* %ptr) {
3067; GFX9-LABEL: extractelement_vgpr_v16i8_idx4:
3068; GFX9:       ; %bb.0:
3069; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3070; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3071; GFX9-NEXT:    s_waitcnt vmcnt(0)
3072; GFX9-NEXT:    v_mov_b32_e32 v2, 8
3073; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
3074; GFX9-NEXT:    v_mov_b32_e32 v3, 16
3075; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v1
3076; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3077; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3078; GFX9-NEXT:    v_and_or_b32 v0, v1, v0, v2
3079; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
3080; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
3081; GFX9-NEXT:    s_setpc_b64 s[30:31]
3082;
3083; GFX8-LABEL: extractelement_vgpr_v16i8_idx4:
3084; GFX8:       ; %bb.0:
3085; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3086; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3087; GFX8-NEXT:    s_waitcnt vmcnt(0)
3088; GFX8-NEXT:    v_mov_b32_e32 v0, 8
3089; GFX8-NEXT:    v_mov_b32_e32 v2, 16
3090; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3091; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
3092; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3093; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3094; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
3095; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3096; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3097; GFX8-NEXT:    s_setpc_b64 s[30:31]
3098;
3099; GFX7-LABEL: extractelement_vgpr_v16i8_idx4:
3100; GFX7:       ; %bb.0:
3101; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3102; GFX7-NEXT:    s_mov_b32 s6, 0
3103; GFX7-NEXT:    s_mov_b32 s7, 0xf000
3104; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3105; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
3106; GFX7-NEXT:    s_waitcnt vmcnt(0)
3107; GFX7-NEXT:    v_bfe_u32 v3, v1, 8, 8
3108; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v1
3109; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v1
3110; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
3111; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
3112; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3113; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
3114; GFX7-NEXT:    v_or_b32_e32 v1, v2, v1
3115; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
3116; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
3117; GFX7-NEXT:    s_setpc_b64 s[30:31]
3118;
3119; GFX10-LABEL: extractelement_vgpr_v16i8_idx4:
3120; GFX10:       ; %bb.0:
3121; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3122; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3123; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3124; GFX10-NEXT:    s_waitcnt vmcnt(0)
3125; GFX10-NEXT:    v_mov_b32_e32 v0, 8
3126; GFX10-NEXT:    v_mov_b32_e32 v2, 16
3127; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3128; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
3129; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3130; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v1, v0
3131; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3132; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
3133; GFX10-NEXT:    s_setpc_b64 s[30:31]
3134  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
3135  %element = extractelement <16 x i8> %vector, i32 4
3136  ret i8 %element
3137}
3138
3139define i8 @extractelement_vgpr_v16i8_idx5(<16 x i8> addrspace(1)* %ptr) {
3140; GFX9-LABEL: extractelement_vgpr_v16i8_idx5:
3141; GFX9:       ; %bb.0:
3142; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3143; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3144; GFX9-NEXT:    s_mov_b32 s4, 8
3145; GFX9-NEXT:    s_waitcnt vmcnt(0)
3146; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
3147; GFX9-NEXT:    v_mov_b32_e32 v2, 16
3148; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
3149; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3150; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3151; GFX9-NEXT:    v_and_or_b32 v0, v1, v0, v4
3152; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3153; GFX9-NEXT:    v_or3_b32 v0, v0, v2, v1
3154; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
3155; GFX9-NEXT:    s_setpc_b64 s[30:31]
3156;
3157; GFX8-LABEL: extractelement_vgpr_v16i8_idx5:
3158; GFX8:       ; %bb.0:
3159; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3160; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3161; GFX8-NEXT:    s_waitcnt vmcnt(0)
3162; GFX8-NEXT:    v_mov_b32_e32 v0, 8
3163; GFX8-NEXT:    v_mov_b32_e32 v2, 16
3164; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3165; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
3166; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3167; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3168; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
3169; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3170; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3171; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
3172; GFX8-NEXT:    s_setpc_b64 s[30:31]
3173;
3174; GFX7-LABEL: extractelement_vgpr_v16i8_idx5:
3175; GFX7:       ; %bb.0:
3176; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3177; GFX7-NEXT:    s_mov_b32 s6, 0
3178; GFX7-NEXT:    s_mov_b32 s7, 0xf000
3179; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3180; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
3181; GFX7-NEXT:    s_waitcnt vmcnt(0)
3182; GFX7-NEXT:    v_bfe_u32 v3, v1, 8, 8
3183; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v1
3184; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v1
3185; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
3186; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
3187; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3188; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
3189; GFX7-NEXT:    v_or_b32_e32 v1, v2, v1
3190; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
3191; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
3192; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
3193; GFX7-NEXT:    s_setpc_b64 s[30:31]
3194;
3195; GFX10-LABEL: extractelement_vgpr_v16i8_idx5:
3196; GFX10:       ; %bb.0:
3197; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3198; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3199; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3200; GFX10-NEXT:    s_mov_b32 s4, 8
3201; GFX10-NEXT:    s_waitcnt vmcnt(0)
3202; GFX10-NEXT:    v_mov_b32_e32 v0, 16
3203; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3204; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
3205; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3206; GFX10-NEXT:    v_and_or_b32 v1, 0xff, v1, v2
3207; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
3208; GFX10-NEXT:    v_or3_b32 v0, v1, v0, v2
3209; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
3210; GFX10-NEXT:    s_setpc_b64 s[30:31]
3211  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
3212  %element = extractelement <16 x i8> %vector, i32 5
3213  ret i8 %element
3214}
3215
3216define i8 @extractelement_vgpr_v16i8_idx6(<16 x i8> addrspace(1)* %ptr) {
3217; GFX9-LABEL: extractelement_vgpr_v16i8_idx6:
3218; GFX9:       ; %bb.0:
3219; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3220; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3221; GFX9-NEXT:    s_waitcnt vmcnt(0)
3222; GFX9-NEXT:    v_mov_b32_e32 v2, 8
3223; GFX9-NEXT:    s_mov_b32 s4, 16
3224; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
3225; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
3226; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3227; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3228; GFX9-NEXT:    v_and_or_b32 v0, v1, v0, v2
3229; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3230; GFX9-NEXT:    v_or3_b32 v0, v0, v4, v1
3231; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3232; GFX9-NEXT:    s_setpc_b64 s[30:31]
3233;
3234; GFX8-LABEL: extractelement_vgpr_v16i8_idx6:
3235; GFX8:       ; %bb.0:
3236; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3237; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3238; GFX8-NEXT:    s_waitcnt vmcnt(0)
3239; GFX8-NEXT:    v_mov_b32_e32 v0, 8
3240; GFX8-NEXT:    v_mov_b32_e32 v2, 16
3241; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3242; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
3243; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3244; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3245; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
3246; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3247; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3248; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3249; GFX8-NEXT:    s_setpc_b64 s[30:31]
3250;
3251; GFX7-LABEL: extractelement_vgpr_v16i8_idx6:
3252; GFX7:       ; %bb.0:
3253; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3254; GFX7-NEXT:    s_mov_b32 s6, 0
3255; GFX7-NEXT:    s_mov_b32 s7, 0xf000
3256; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3257; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
3258; GFX7-NEXT:    s_waitcnt vmcnt(0)
3259; GFX7-NEXT:    v_bfe_u32 v3, v1, 8, 8
3260; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v1
3261; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v1
3262; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
3263; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
3264; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3265; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
3266; GFX7-NEXT:    v_or_b32_e32 v1, v2, v1
3267; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
3268; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
3269; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3270; GFX7-NEXT:    s_setpc_b64 s[30:31]
3271;
3272; GFX10-LABEL: extractelement_vgpr_v16i8_idx6:
3273; GFX10:       ; %bb.0:
3274; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3275; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3276; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3277; GFX10-NEXT:    s_waitcnt vmcnt(0)
3278; GFX10-NEXT:    v_mov_b32_e32 v0, 8
3279; GFX10-NEXT:    s_mov_b32 s4, 16
3280; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3281; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 24, v1
3282; GFX10-NEXT:    v_lshlrev_b32_sdwa v3, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3283; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v1, v0
3284; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
3285; GFX10-NEXT:    v_or3_b32 v0, v0, v3, v1
3286; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3287; GFX10-NEXT:    s_setpc_b64 s[30:31]
3288  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
3289  %element = extractelement <16 x i8> %vector, i32 6
3290  ret i8 %element
3291}
3292
3293define i8 @extractelement_vgpr_v16i8_idx7(<16 x i8> addrspace(1)* %ptr) {
3294; GFX9-LABEL: extractelement_vgpr_v16i8_idx7:
3295; GFX9:       ; %bb.0:
3296; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3297; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3298; GFX9-NEXT:    s_waitcnt vmcnt(0)
3299; GFX9-NEXT:    v_mov_b32_e32 v2, 8
3300; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
3301; GFX9-NEXT:    v_mov_b32_e32 v3, 16
3302; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v1
3303; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3304; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3305; GFX9-NEXT:    v_and_or_b32 v0, v1, v0, v2
3306; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
3307; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
3308; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3309; GFX9-NEXT:    s_setpc_b64 s[30:31]
3310;
3311; GFX8-LABEL: extractelement_vgpr_v16i8_idx7:
3312; GFX8:       ; %bb.0:
3313; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3314; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3315; GFX8-NEXT:    s_waitcnt vmcnt(0)
3316; GFX8-NEXT:    v_mov_b32_e32 v0, 8
3317; GFX8-NEXT:    v_mov_b32_e32 v2, 16
3318; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3319; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
3320; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3321; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3322; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
3323; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3324; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3325; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3326; GFX8-NEXT:    s_setpc_b64 s[30:31]
3327;
3328; GFX7-LABEL: extractelement_vgpr_v16i8_idx7:
3329; GFX7:       ; %bb.0:
3330; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3331; GFX7-NEXT:    s_mov_b32 s6, 0
3332; GFX7-NEXT:    s_mov_b32 s7, 0xf000
3333; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3334; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
3335; GFX7-NEXT:    s_waitcnt vmcnt(0)
3336; GFX7-NEXT:    v_bfe_u32 v3, v1, 8, 8
3337; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v1
3338; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v1
3339; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
3340; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
3341; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3342; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
3343; GFX7-NEXT:    v_or_b32_e32 v1, v2, v1
3344; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
3345; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
3346; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3347; GFX7-NEXT:    s_setpc_b64 s[30:31]
3348;
3349; GFX10-LABEL: extractelement_vgpr_v16i8_idx7:
3350; GFX10:       ; %bb.0:
3351; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3352; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3353; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3354; GFX10-NEXT:    s_waitcnt vmcnt(0)
3355; GFX10-NEXT:    v_mov_b32_e32 v0, 8
3356; GFX10-NEXT:    v_mov_b32_e32 v2, 16
3357; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3358; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
3359; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3360; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v1, v0
3361; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3362; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
3363; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3364; GFX10-NEXT:    s_setpc_b64 s[30:31]
3365  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
3366  %element = extractelement <16 x i8> %vector, i32 7
3367  ret i8 %element
3368}
3369
3370define i8 @extractelement_vgpr_v16i8_idx8(<16 x i8> addrspace(1)* %ptr) {
3371; GFX9-LABEL: extractelement_vgpr_v16i8_idx8:
3372; GFX9:       ; %bb.0:
3373; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3374; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3375; GFX9-NEXT:    s_waitcnt vmcnt(0)
3376; GFX9-NEXT:    v_mov_b32_e32 v1, 8
3377; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
3378; GFX9-NEXT:    v_mov_b32_e32 v3, 16
3379; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v2
3380; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3381; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3382; GFX9-NEXT:    v_and_or_b32 v0, v2, v0, v1
3383; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
3384; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
3385; GFX9-NEXT:    s_setpc_b64 s[30:31]
3386;
3387; GFX8-LABEL: extractelement_vgpr_v16i8_idx8:
3388; GFX8:       ; %bb.0:
3389; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3390; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3391; GFX8-NEXT:    s_waitcnt vmcnt(0)
3392; GFX8-NEXT:    v_mov_b32_e32 v0, 8
3393; GFX8-NEXT:    v_mov_b32_e32 v1, 16
3394; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3395; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
3396; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3397; GFX8-NEXT:    v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3398; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3399; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3400; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3401; GFX8-NEXT:    s_setpc_b64 s[30:31]
3402;
3403; GFX7-LABEL: extractelement_vgpr_v16i8_idx8:
3404; GFX7:       ; %bb.0:
3405; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3406; GFX7-NEXT:    s_mov_b32 s6, 0
3407; GFX7-NEXT:    s_mov_b32 s7, 0xf000
3408; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3409; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
3410; GFX7-NEXT:    s_waitcnt vmcnt(0)
3411; GFX7-NEXT:    v_bfe_u32 v3, v2, 8, 8
3412; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v2
3413; GFX7-NEXT:    v_and_b32_e32 v1, 0xff, v2
3414; GFX7-NEXT:    v_bfe_u32 v2, v2, 16, 8
3415; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
3416; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
3417; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
3418; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
3419; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
3420; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
3421; GFX7-NEXT:    s_setpc_b64 s[30:31]
3422;
3423; GFX10-LABEL: extractelement_vgpr_v16i8_idx8:
3424; GFX10:       ; %bb.0:
3425; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3426; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3427; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3428; GFX10-NEXT:    s_waitcnt vmcnt(0)
3429; GFX10-NEXT:    v_mov_b32_e32 v0, 8
3430; GFX10-NEXT:    v_mov_b32_e32 v1, 16
3431; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3432; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
3433; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3434; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v2, v0
3435; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
3436; GFX10-NEXT:    v_or3_b32 v0, v0, v1, v2
3437; GFX10-NEXT:    s_setpc_b64 s[30:31]
3438  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
3439  %element = extractelement <16 x i8> %vector, i32 8
3440  ret i8 %element
3441}
3442
3443define i8 @extractelement_vgpr_v16i8_idx9(<16 x i8> addrspace(1)* %ptr) {
3444; GFX9-LABEL: extractelement_vgpr_v16i8_idx9:
3445; GFX9:       ; %bb.0:
3446; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3447; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3448; GFX9-NEXT:    s_mov_b32 s4, 8
3449; GFX9-NEXT:    s_waitcnt vmcnt(0)
3450; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
3451; GFX9-NEXT:    v_mov_b32_e32 v1, 16
3452; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
3453; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3454; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3455; GFX9-NEXT:    v_and_or_b32 v0, v2, v0, v4
3456; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
3457; GFX9-NEXT:    v_or3_b32 v0, v0, v1, v2
3458; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
3459; GFX9-NEXT:    s_setpc_b64 s[30:31]
3460;
3461; GFX8-LABEL: extractelement_vgpr_v16i8_idx9:
3462; GFX8:       ; %bb.0:
3463; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3464; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3465; GFX8-NEXT:    s_waitcnt vmcnt(0)
3466; GFX8-NEXT:    v_mov_b32_e32 v0, 8
3467; GFX8-NEXT:    v_mov_b32_e32 v1, 16
3468; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3469; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
3470; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3471; GFX8-NEXT:    v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3472; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3473; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3474; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3475; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
3476; GFX8-NEXT:    s_setpc_b64 s[30:31]
3477;
3478; GFX7-LABEL: extractelement_vgpr_v16i8_idx9:
3479; GFX7:       ; %bb.0:
3480; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3481; GFX7-NEXT:    s_mov_b32 s6, 0
3482; GFX7-NEXT:    s_mov_b32 s7, 0xf000
3483; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3484; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
3485; GFX7-NEXT:    s_waitcnt vmcnt(0)
3486; GFX7-NEXT:    v_bfe_u32 v3, v2, 8, 8
3487; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v2
3488; GFX7-NEXT:    v_and_b32_e32 v1, 0xff, v2
3489; GFX7-NEXT:    v_bfe_u32 v2, v2, 16, 8
3490; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
3491; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
3492; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
3493; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
3494; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
3495; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
3496; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
3497; GFX7-NEXT:    s_setpc_b64 s[30:31]
3498;
3499; GFX10-LABEL: extractelement_vgpr_v16i8_idx9:
3500; GFX10:       ; %bb.0:
3501; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3502; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3503; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3504; GFX10-NEXT:    s_mov_b32 s4, 8
3505; GFX10-NEXT:    s_waitcnt vmcnt(0)
3506; GFX10-NEXT:    v_mov_b32_e32 v0, 16
3507; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3508; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
3509; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3510; GFX10-NEXT:    v_and_or_b32 v1, 0xff, v2, v1
3511; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
3512; GFX10-NEXT:    v_or3_b32 v0, v1, v0, v2
3513; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
3514; GFX10-NEXT:    s_setpc_b64 s[30:31]
3515  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
3516  %element = extractelement <16 x i8> %vector, i32 9
3517  ret i8 %element
3518}
3519
3520define i8 @extractelement_vgpr_v16i8_idx10(<16 x i8> addrspace(1)* %ptr) {
3521; GFX9-LABEL: extractelement_vgpr_v16i8_idx10:
3522; GFX9:       ; %bb.0:
3523; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3524; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3525; GFX9-NEXT:    s_waitcnt vmcnt(0)
3526; GFX9-NEXT:    v_mov_b32_e32 v1, 8
3527; GFX9-NEXT:    s_mov_b32 s4, 16
3528; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
3529; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
3530; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3531; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3532; GFX9-NEXT:    v_and_or_b32 v0, v2, v0, v1
3533; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3534; GFX9-NEXT:    v_or3_b32 v0, v0, v4, v1
3535; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3536; GFX9-NEXT:    s_setpc_b64 s[30:31]
3537;
3538; GFX8-LABEL: extractelement_vgpr_v16i8_idx10:
3539; GFX8:       ; %bb.0:
3540; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3541; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3542; GFX8-NEXT:    s_waitcnt vmcnt(0)
3543; GFX8-NEXT:    v_mov_b32_e32 v0, 8
3544; GFX8-NEXT:    v_mov_b32_e32 v1, 16
3545; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3546; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
3547; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3548; GFX8-NEXT:    v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3549; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3550; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3551; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3552; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3553; GFX8-NEXT:    s_setpc_b64 s[30:31]
3554;
3555; GFX7-LABEL: extractelement_vgpr_v16i8_idx10:
3556; GFX7:       ; %bb.0:
3557; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3558; GFX7-NEXT:    s_mov_b32 s6, 0
3559; GFX7-NEXT:    s_mov_b32 s7, 0xf000
3560; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3561; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
3562; GFX7-NEXT:    s_waitcnt vmcnt(0)
3563; GFX7-NEXT:    v_bfe_u32 v3, v2, 8, 8
3564; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v2
3565; GFX7-NEXT:    v_and_b32_e32 v1, 0xff, v2
3566; GFX7-NEXT:    v_bfe_u32 v2, v2, 16, 8
3567; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
3568; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
3569; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
3570; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
3571; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
3572; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
3573; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3574; GFX7-NEXT:    s_setpc_b64 s[30:31]
3575;
3576; GFX10-LABEL: extractelement_vgpr_v16i8_idx10:
3577; GFX10:       ; %bb.0:
3578; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3579; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3580; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3581; GFX10-NEXT:    s_waitcnt vmcnt(0)
3582; GFX10-NEXT:    v_mov_b32_e32 v0, 8
3583; GFX10-NEXT:    s_mov_b32 s4, 16
3584; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3585; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 24, v2
3586; GFX10-NEXT:    v_lshlrev_b32_sdwa v3, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3587; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v2, v0
3588; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
3589; GFX10-NEXT:    v_or3_b32 v0, v0, v3, v1
3590; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3591; GFX10-NEXT:    s_setpc_b64 s[30:31]
3592  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
3593  %element = extractelement <16 x i8> %vector, i32 10
3594  ret i8 %element
3595}
3596
3597define i8 @extractelement_vgpr_v16i8_idx11(<16 x i8> addrspace(1)* %ptr) {
3598; GFX9-LABEL: extractelement_vgpr_v16i8_idx11:
3599; GFX9:       ; %bb.0:
3600; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3601; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3602; GFX9-NEXT:    s_waitcnt vmcnt(0)
3603; GFX9-NEXT:    v_mov_b32_e32 v1, 8
3604; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
3605; GFX9-NEXT:    v_mov_b32_e32 v3, 16
3606; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v2
3607; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3608; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3609; GFX9-NEXT:    v_and_or_b32 v0, v2, v0, v1
3610; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
3611; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
3612; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3613; GFX9-NEXT:    s_setpc_b64 s[30:31]
3614;
3615; GFX8-LABEL: extractelement_vgpr_v16i8_idx11:
3616; GFX8:       ; %bb.0:
3617; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3618; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3619; GFX8-NEXT:    s_waitcnt vmcnt(0)
3620; GFX8-NEXT:    v_mov_b32_e32 v0, 8
3621; GFX8-NEXT:    v_mov_b32_e32 v1, 16
3622; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3623; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
3624; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3625; GFX8-NEXT:    v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3626; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3627; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3628; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3629; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3630; GFX8-NEXT:    s_setpc_b64 s[30:31]
3631;
3632; GFX7-LABEL: extractelement_vgpr_v16i8_idx11:
3633; GFX7:       ; %bb.0:
3634; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3635; GFX7-NEXT:    s_mov_b32 s6, 0
3636; GFX7-NEXT:    s_mov_b32 s7, 0xf000
3637; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3638; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
3639; GFX7-NEXT:    s_waitcnt vmcnt(0)
3640; GFX7-NEXT:    v_bfe_u32 v3, v2, 8, 8
3641; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v2
3642; GFX7-NEXT:    v_and_b32_e32 v1, 0xff, v2
3643; GFX7-NEXT:    v_bfe_u32 v2, v2, 16, 8
3644; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
3645; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
3646; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
3647; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
3648; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
3649; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
3650; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3651; GFX7-NEXT:    s_setpc_b64 s[30:31]
3652;
3653; GFX10-LABEL: extractelement_vgpr_v16i8_idx11:
3654; GFX10:       ; %bb.0:
3655; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3656; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3657; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3658; GFX10-NEXT:    s_waitcnt vmcnt(0)
3659; GFX10-NEXT:    v_mov_b32_e32 v0, 8
3660; GFX10-NEXT:    v_mov_b32_e32 v1, 16
3661; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3662; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
3663; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3664; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v2, v0
3665; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
3666; GFX10-NEXT:    v_or3_b32 v0, v0, v1, v2
3667; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3668; GFX10-NEXT:    s_setpc_b64 s[30:31]
3669  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
3670  %element = extractelement <16 x i8> %vector, i32 11
3671  ret i8 %element
3672}
3673
3674define i8 @extractelement_vgpr_v16i8_idx12(<16 x i8> addrspace(1)* %ptr) {
3675; GFX9-LABEL: extractelement_vgpr_v16i8_idx12:
3676; GFX9:       ; %bb.0:
3677; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3678; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3679; GFX9-NEXT:    s_waitcnt vmcnt(0)
3680; GFX9-NEXT:    v_mov_b32_e32 v1, 8
3681; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
3682; GFX9-NEXT:    v_mov_b32_e32 v2, 16
3683; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v3
3684; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3685; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3686; GFX9-NEXT:    v_and_or_b32 v0, v3, v0, v1
3687; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
3688; GFX9-NEXT:    v_or3_b32 v0, v0, v2, v1
3689; GFX9-NEXT:    s_setpc_b64 s[30:31]
3690;
3691; GFX8-LABEL: extractelement_vgpr_v16i8_idx12:
3692; GFX8:       ; %bb.0:
3693; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3694; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3695; GFX8-NEXT:    s_waitcnt vmcnt(0)
3696; GFX8-NEXT:    v_mov_b32_e32 v0, 8
3697; GFX8-NEXT:    v_mov_b32_e32 v1, 16
3698; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3699; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
3700; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3701; GFX8-NEXT:    v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3702; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3703; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
3704; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3705; GFX8-NEXT:    s_setpc_b64 s[30:31]
3706;
3707; GFX7-LABEL: extractelement_vgpr_v16i8_idx12:
3708; GFX7:       ; %bb.0:
3709; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3710; GFX7-NEXT:    s_mov_b32 s6, 0
3711; GFX7-NEXT:    s_mov_b32 s7, 0xf000
3712; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3713; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
3714; GFX7-NEXT:    s_waitcnt vmcnt(0)
3715; GFX7-NEXT:    v_bfe_u32 v2, v3, 8, 8
3716; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v3
3717; GFX7-NEXT:    v_and_b32_e32 v1, 0xff, v3
3718; GFX7-NEXT:    v_bfe_u32 v3, v3, 16, 8
3719; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
3720; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
3721; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
3722; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
3723; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
3724; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
3725; GFX7-NEXT:    s_setpc_b64 s[30:31]
3726;
3727; GFX10-LABEL: extractelement_vgpr_v16i8_idx12:
3728; GFX10:       ; %bb.0:
3729; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3730; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3731; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3732; GFX10-NEXT:    s_waitcnt vmcnt(0)
3733; GFX10-NEXT:    v_mov_b32_e32 v0, 8
3734; GFX10-NEXT:    v_mov_b32_e32 v1, 16
3735; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3736; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
3737; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3738; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v3, v0
3739; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
3740; GFX10-NEXT:    v_or3_b32 v0, v0, v1, v2
3741; GFX10-NEXT:    s_setpc_b64 s[30:31]
3742  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
3743  %element = extractelement <16 x i8> %vector, i32 12
3744  ret i8 %element
3745}
3746
3747define i8 @extractelement_vgpr_v16i8_idx13(<16 x i8> addrspace(1)* %ptr) {
3748; GFX9-LABEL: extractelement_vgpr_v16i8_idx13:
3749; GFX9:       ; %bb.0:
3750; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3751; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3752; GFX9-NEXT:    s_mov_b32 s4, 8
3753; GFX9-NEXT:    s_waitcnt vmcnt(0)
3754; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
3755; GFX9-NEXT:    v_mov_b32_e32 v1, 16
3756; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
3757; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3758; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3759; GFX9-NEXT:    v_and_or_b32 v0, v3, v0, v4
3760; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
3761; GFX9-NEXT:    v_or3_b32 v0, v0, v1, v2
3762; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
3763; GFX9-NEXT:    s_setpc_b64 s[30:31]
3764;
3765; GFX8-LABEL: extractelement_vgpr_v16i8_idx13:
3766; GFX8:       ; %bb.0:
3767; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3768; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3769; GFX8-NEXT:    s_waitcnt vmcnt(0)
3770; GFX8-NEXT:    v_mov_b32_e32 v0, 8
3771; GFX8-NEXT:    v_mov_b32_e32 v1, 16
3772; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3773; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
3774; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3775; GFX8-NEXT:    v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3776; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3777; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
3778; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3779; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
3780; GFX8-NEXT:    s_setpc_b64 s[30:31]
3781;
3782; GFX7-LABEL: extractelement_vgpr_v16i8_idx13:
3783; GFX7:       ; %bb.0:
3784; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3785; GFX7-NEXT:    s_mov_b32 s6, 0
3786; GFX7-NEXT:    s_mov_b32 s7, 0xf000
3787; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3788; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
3789; GFX7-NEXT:    s_waitcnt vmcnt(0)
3790; GFX7-NEXT:    v_bfe_u32 v2, v3, 8, 8
3791; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v3
3792; GFX7-NEXT:    v_and_b32_e32 v1, 0xff, v3
3793; GFX7-NEXT:    v_bfe_u32 v3, v3, 16, 8
3794; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
3795; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
3796; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
3797; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
3798; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
3799; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
3800; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
3801; GFX7-NEXT:    s_setpc_b64 s[30:31]
3802;
3803; GFX10-LABEL: extractelement_vgpr_v16i8_idx13:
3804; GFX10:       ; %bb.0:
3805; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3806; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3807; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3808; GFX10-NEXT:    s_mov_b32 s4, 8
3809; GFX10-NEXT:    s_waitcnt vmcnt(0)
3810; GFX10-NEXT:    v_mov_b32_e32 v0, 16
3811; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3812; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
3813; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3814; GFX10-NEXT:    v_and_or_b32 v1, 0xff, v3, v1
3815; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
3816; GFX10-NEXT:    v_or3_b32 v0, v1, v0, v2
3817; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
3818; GFX10-NEXT:    s_setpc_b64 s[30:31]
3819  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
3820  %element = extractelement <16 x i8> %vector, i32 13
3821  ret i8 %element
3822}
3823
3824define i8 @extractelement_vgpr_v16i8_idx14(<16 x i8> addrspace(1)* %ptr) {
3825; GFX9-LABEL: extractelement_vgpr_v16i8_idx14:
3826; GFX9:       ; %bb.0:
3827; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3828; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3829; GFX9-NEXT:    s_waitcnt vmcnt(0)
3830; GFX9-NEXT:    v_mov_b32_e32 v1, 8
3831; GFX9-NEXT:    s_mov_b32 s4, 16
3832; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
3833; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
3834; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3835; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3836; GFX9-NEXT:    v_and_or_b32 v0, v3, v0, v1
3837; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
3838; GFX9-NEXT:    v_or3_b32 v0, v0, v4, v1
3839; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3840; GFX9-NEXT:    s_setpc_b64 s[30:31]
3841;
3842; GFX8-LABEL: extractelement_vgpr_v16i8_idx14:
3843; GFX8:       ; %bb.0:
3844; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3845; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3846; GFX8-NEXT:    s_waitcnt vmcnt(0)
3847; GFX8-NEXT:    v_mov_b32_e32 v0, 8
3848; GFX8-NEXT:    v_mov_b32_e32 v1, 16
3849; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3850; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
3851; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3852; GFX8-NEXT:    v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3853; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3854; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
3855; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3856; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3857; GFX8-NEXT:    s_setpc_b64 s[30:31]
3858;
3859; GFX7-LABEL: extractelement_vgpr_v16i8_idx14:
3860; GFX7:       ; %bb.0:
3861; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3862; GFX7-NEXT:    s_mov_b32 s6, 0
3863; GFX7-NEXT:    s_mov_b32 s7, 0xf000
3864; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3865; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
3866; GFX7-NEXT:    s_waitcnt vmcnt(0)
3867; GFX7-NEXT:    v_bfe_u32 v2, v3, 8, 8
3868; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v3
3869; GFX7-NEXT:    v_and_b32_e32 v1, 0xff, v3
3870; GFX7-NEXT:    v_bfe_u32 v3, v3, 16, 8
3871; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
3872; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
3873; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
3874; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
3875; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
3876; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
3877; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3878; GFX7-NEXT:    s_setpc_b64 s[30:31]
3879;
3880; GFX10-LABEL: extractelement_vgpr_v16i8_idx14:
3881; GFX10:       ; %bb.0:
3882; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3883; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3884; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3885; GFX10-NEXT:    s_waitcnt vmcnt(0)
3886; GFX10-NEXT:    v_mov_b32_e32 v0, 8
3887; GFX10-NEXT:    s_mov_b32 s4, 16
3888; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3889; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 24, v3
3890; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3891; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v3, v0
3892; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
3893; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
3894; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3895; GFX10-NEXT:    s_setpc_b64 s[30:31]
3896  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
3897  %element = extractelement <16 x i8> %vector, i32 14
3898  ret i8 %element
3899}
3900
3901define i8 @extractelement_vgpr_v16i8_idx15(<16 x i8> addrspace(1)* %ptr) {
3902; GFX9-LABEL: extractelement_vgpr_v16i8_idx15:
3903; GFX9:       ; %bb.0:
3904; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3905; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3906; GFX9-NEXT:    s_waitcnt vmcnt(0)
3907; GFX9-NEXT:    v_mov_b32_e32 v1, 8
3908; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
3909; GFX9-NEXT:    v_mov_b32_e32 v2, 16
3910; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v3
3911; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3912; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3913; GFX9-NEXT:    v_and_or_b32 v0, v3, v0, v1
3914; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
3915; GFX9-NEXT:    v_or3_b32 v0, v0, v2, v1
3916; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3917; GFX9-NEXT:    s_setpc_b64 s[30:31]
3918;
3919; GFX8-LABEL: extractelement_vgpr_v16i8_idx15:
3920; GFX8:       ; %bb.0:
3921; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3922; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3923; GFX8-NEXT:    s_waitcnt vmcnt(0)
3924; GFX8-NEXT:    v_mov_b32_e32 v0, 8
3925; GFX8-NEXT:    v_mov_b32_e32 v1, 16
3926; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3927; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
3928; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3929; GFX8-NEXT:    v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3930; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3931; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
3932; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3933; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3934; GFX8-NEXT:    s_setpc_b64 s[30:31]
3935;
3936; GFX7-LABEL: extractelement_vgpr_v16i8_idx15:
3937; GFX7:       ; %bb.0:
3938; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3939; GFX7-NEXT:    s_mov_b32 s6, 0
3940; GFX7-NEXT:    s_mov_b32 s7, 0xf000
3941; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3942; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
3943; GFX7-NEXT:    s_waitcnt vmcnt(0)
3944; GFX7-NEXT:    v_bfe_u32 v2, v3, 8, 8
3945; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v3
3946; GFX7-NEXT:    v_and_b32_e32 v1, 0xff, v3
3947; GFX7-NEXT:    v_bfe_u32 v3, v3, 16, 8
3948; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
3949; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
3950; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
3951; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
3952; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
3953; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
3954; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3955; GFX7-NEXT:    s_setpc_b64 s[30:31]
3956;
3957; GFX10-LABEL: extractelement_vgpr_v16i8_idx15:
3958; GFX10:       ; %bb.0:
3959; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3960; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3961; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3962; GFX10-NEXT:    s_waitcnt vmcnt(0)
3963; GFX10-NEXT:    v_mov_b32_e32 v0, 8
3964; GFX10-NEXT:    v_mov_b32_e32 v1, 16
3965; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3966; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
3967; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3968; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v3, v0
3969; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
3970; GFX10-NEXT:    v_or3_b32 v0, v0, v1, v2
3971; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3972; GFX10-NEXT:    s_setpc_b64 s[30:31]
3973  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
3974  %element = extractelement <16 x i8> %vector, i32 15
3975  ret i8 %element
3976}
3977