1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GPRIDX %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MOVREL %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
5
6define float @dyn_extract_v8f32_const_s_v(i32 %sel) {
7; GCN-LABEL: dyn_extract_v8f32_const_s_v:
8; GCN:       ; %bb.0: ; %entry
9; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
11; GCN-NEXT:    v_mov_b32_e32 v1, 0x40400000
12; GCN-NEXT:    v_cndmask_b32_e64 v6, 1.0, 2.0, vcc
13; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
14; GCN-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
15; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
16; GCN-NEXT:    v_mov_b32_e32 v2, 0x40a00000
17; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc
18; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
19; GCN-NEXT:    v_mov_b32_e32 v3, 0x40c00000
20; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
21; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
22; GCN-NEXT:    v_mov_b32_e32 v4, 0x40e00000
23; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
24; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
25; GCN-NEXT:    v_mov_b32_e32 v5, 0x41000000
26; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
27; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
28; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v5, vcc
29; GCN-NEXT:    s_setpc_b64 s[30:31]
30;
31; GFX10-LABEL: dyn_extract_v8f32_const_s_v:
32; GFX10:       ; %bb.0: ; %entry
33; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
35; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
36; GFX10-NEXT:    v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
37; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
38; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
39; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
40; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
41; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
42; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
43; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
44; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
45; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
46; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
47; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
48; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, 0x41000000, vcc_lo
49; GFX10-NEXT:    s_setpc_b64 s[30:31]
50entry:
51  %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
52  ret float %ext
53}
54
55define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) {
56; GCN-LABEL: dyn_extract_v8f32_const_s_s:
57; GCN:       ; %bb.0: ; %entry
58; GCN-NEXT:    s_cmp_eq_u32 s2, 1
59; GCN-NEXT:    s_cselect_b32 s0, 2.0, 1.0
60; GCN-NEXT:    s_cmp_eq_u32 s2, 2
61; GCN-NEXT:    s_cselect_b32 s0, 0x40400000, s0
62; GCN-NEXT:    s_cmp_eq_u32 s2, 3
63; GCN-NEXT:    s_cselect_b32 s0, 4.0, s0
64; GCN-NEXT:    s_cmp_eq_u32 s2, 4
65; GCN-NEXT:    s_cselect_b32 s0, 0x40a00000, s0
66; GCN-NEXT:    s_cmp_eq_u32 s2, 5
67; GCN-NEXT:    s_cselect_b32 s0, 0x40c00000, s0
68; GCN-NEXT:    s_cmp_eq_u32 s2, 6
69; GCN-NEXT:    s_cselect_b32 s0, 0x40e00000, s0
70; GCN-NEXT:    s_cmp_eq_u32 s2, 7
71; GCN-NEXT:    s_cselect_b32 s0, 0x41000000, s0
72; GCN-NEXT:    v_mov_b32_e32 v0, s0
73; GCN-NEXT:    ; return to shader part epilog
74;
75; GFX10-LABEL: dyn_extract_v8f32_const_s_s:
76; GFX10:       ; %bb.0: ; %entry
77; GFX10-NEXT:    s_cmp_eq_u32 s2, 1
78; GFX10-NEXT:    s_cselect_b32 s0, 2.0, 1.0
79; GFX10-NEXT:    s_cmp_eq_u32 s2, 2
80; GFX10-NEXT:    s_cselect_b32 s0, 0x40400000, s0
81; GFX10-NEXT:    s_cmp_eq_u32 s2, 3
82; GFX10-NEXT:    s_cselect_b32 s0, 4.0, s0
83; GFX10-NEXT:    s_cmp_eq_u32 s2, 4
84; GFX10-NEXT:    s_cselect_b32 s0, 0x40a00000, s0
85; GFX10-NEXT:    s_cmp_eq_u32 s2, 5
86; GFX10-NEXT:    s_cselect_b32 s0, 0x40c00000, s0
87; GFX10-NEXT:    s_cmp_eq_u32 s2, 6
88; GFX10-NEXT:    s_cselect_b32 s0, 0x40e00000, s0
89; GFX10-NEXT:    s_cmp_eq_u32 s2, 7
90; GFX10-NEXT:    s_cselect_b32 s0, 0x41000000, s0
91; GFX10-NEXT:    v_mov_b32_e32 v0, s0
92; GFX10-NEXT:    ; return to shader part epilog
93entry:
94  %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
95  ret float %ext
96}
97
98define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) {
99; GCN-LABEL: dyn_extract_v8f32_s_v:
100; GCN:       ; %bb.0: ; %entry
101; GCN-NEXT:    s_mov_b32 s0, s2
102; GCN-NEXT:    s_mov_b32 s1, s3
103; GCN-NEXT:    s_mov_b32 s2, s4
104; GCN-NEXT:    v_mov_b32_e32 v1, s0
105; GCN-NEXT:    v_mov_b32_e32 v2, s1
106; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
107; GCN-NEXT:    s_mov_b32 s3, s5
108; GCN-NEXT:    v_mov_b32_e32 v3, s2
109; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
110; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
111; GCN-NEXT:    v_mov_b32_e32 v4, s3
112; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
113; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
114; GCN-NEXT:    v_mov_b32_e32 v5, s6
115; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
116; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
117; GCN-NEXT:    v_mov_b32_e32 v6, s7
118; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
119; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
120; GCN-NEXT:    v_mov_b32_e32 v7, s8
121; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
122; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
123; GCN-NEXT:    v_mov_b32_e32 v8, s9
124; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
125; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
126; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v8, vcc
127; GCN-NEXT:    ; return to shader part epilog
128;
129; GFX10-LABEL: dyn_extract_v8f32_s_v:
130; GFX10:       ; %bb.0: ; %entry
131; GFX10-NEXT:    s_mov_b32 s1, s3
132; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
133; GFX10-NEXT:    v_mov_b32_e32 v1, s1
134; GFX10-NEXT:    s_mov_b32 s0, s2
135; GFX10-NEXT:    s_mov_b32 s2, s4
136; GFX10-NEXT:    s_mov_b32 s3, s5
137; GFX10-NEXT:    s_mov_b32 s4, s6
138; GFX10-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
139; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
140; GFX10-NEXT:    s_mov_b32 s5, s7
141; GFX10-NEXT:    s_mov_b32 s6, s8
142; GFX10-NEXT:    s_mov_b32 s7, s9
143; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc_lo
144; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
145; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
146; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
147; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
148; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
149; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s5, vcc_lo
150; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
151; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
152; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
153; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s7, vcc_lo
154; GFX10-NEXT:    ; return to shader part epilog
155entry:
156  %ext = extractelement <8 x float> %vec, i32 %sel
157  ret float %ext
158}
159
160define float @dyn_extract_v8f32_v_v(<8 x float> %vec, i32 %sel) {
161; GCN-LABEL: dyn_extract_v8f32_v_v:
162; GCN:       ; %bb.0: ; %entry
163; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
165; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
166; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
167; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
168; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
169; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
170; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v8
171; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
172; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v8
173; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
174; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v8
175; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
176; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v8
177; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
178; GCN-NEXT:    s_setpc_b64 s[30:31]
179;
180; GFX10-LABEL: dyn_extract_v8f32_v_v:
181; GFX10:       ; %bb.0: ; %entry
182; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
184; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
185; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
186; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v8
187; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
188; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v8
189; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
190; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v8
191; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
192; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v8
193; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
194; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v8
195; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
196; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v8
197; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
198; GFX10-NEXT:    s_setpc_b64 s[30:31]
199entry:
200  %ext = extractelement <8 x float> %vec, i32 %sel
201  ret float %ext
202}
203
204define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) {
205; GCN-LABEL: dyn_extract_v8f32_v_s:
206; GCN:       ; %bb.0: ; %entry
207; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
208; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
209; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
210; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
211; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 3
212; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
213; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 4
214; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
215; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 5
216; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
217; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 6
218; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
219; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 7
220; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
221; GCN-NEXT:    ; return to shader part epilog
222;
223; GFX10-LABEL: dyn_extract_v8f32_v_s:
224; GFX10:       ; %bb.0: ; %entry
225; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 1
226; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
227; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 2
228; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
229; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 3
230; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
231; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 4
232; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
233; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 5
234; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
235; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 6
236; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
237; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 7
238; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
239; GFX10-NEXT:    ; return to shader part epilog
240entry:
241  %ext = extractelement <8 x float> %vec, i32 %sel
242  ret float %ext
243}
244
245define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) {
246; GCN-LABEL: dyn_extract_v8f32_s_s:
247; GCN:       ; %bb.0: ; %entry
248; GCN-NEXT:    s_cmp_eq_u32 s10, 1
249; GCN-NEXT:    s_cselect_b32 s0, s3, s2
250; GCN-NEXT:    s_cmp_eq_u32 s10, 2
251; GCN-NEXT:    s_cselect_b32 s0, s4, s0
252; GCN-NEXT:    s_cmp_eq_u32 s10, 3
253; GCN-NEXT:    s_cselect_b32 s0, s5, s0
254; GCN-NEXT:    s_cmp_eq_u32 s10, 4
255; GCN-NEXT:    s_cselect_b32 s0, s6, s0
256; GCN-NEXT:    s_cmp_eq_u32 s10, 5
257; GCN-NEXT:    s_cselect_b32 s0, s7, s0
258; GCN-NEXT:    s_cmp_eq_u32 s10, 6
259; GCN-NEXT:    s_cselect_b32 s0, s8, s0
260; GCN-NEXT:    s_cmp_eq_u32 s10, 7
261; GCN-NEXT:    s_cselect_b32 s0, s9, s0
262; GCN-NEXT:    v_mov_b32_e32 v0, s0
263; GCN-NEXT:    ; return to shader part epilog
264;
265; GFX10-LABEL: dyn_extract_v8f32_s_s:
266; GFX10:       ; %bb.0: ; %entry
267; GFX10-NEXT:    s_cmp_eq_u32 s10, 1
268; GFX10-NEXT:    s_cselect_b32 s0, s3, s2
269; GFX10-NEXT:    s_cmp_eq_u32 s10, 2
270; GFX10-NEXT:    s_cselect_b32 s0, s4, s0
271; GFX10-NEXT:    s_cmp_eq_u32 s10, 3
272; GFX10-NEXT:    s_cselect_b32 s0, s5, s0
273; GFX10-NEXT:    s_cmp_eq_u32 s10, 4
274; GFX10-NEXT:    s_cselect_b32 s0, s6, s0
275; GFX10-NEXT:    s_cmp_eq_u32 s10, 5
276; GFX10-NEXT:    s_cselect_b32 s0, s7, s0
277; GFX10-NEXT:    s_cmp_eq_u32 s10, 6
278; GFX10-NEXT:    s_cselect_b32 s0, s8, s0
279; GFX10-NEXT:    s_cmp_eq_u32 s10, 7
280; GFX10-NEXT:    s_cselect_b32 s0, s9, s0
281; GFX10-NEXT:    v_mov_b32_e32 v0, s0
282; GFX10-NEXT:    ; return to shader part epilog
283entry:
284  %ext = extractelement <8 x float> %vec, i32 %sel
285  ret float %ext
286}
287
288define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) {
289; GCN-LABEL: dyn_extract_v8i64_const_s_v:
290; GCN:       ; %bb.0: ; %entry
291; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
292; GCN-NEXT:    s_mov_b64 s[4:5], 1
293; GCN-NEXT:    s_mov_b64 s[6:7], 2
294; GCN-NEXT:    v_mov_b32_e32 v1, s4
295; GCN-NEXT:    v_mov_b32_e32 v2, s5
296; GCN-NEXT:    v_mov_b32_e32 v3, s6
297; GCN-NEXT:    v_mov_b32_e32 v4, s7
298; GCN-NEXT:    s_mov_b64 s[8:9], 3
299; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
300; GCN-NEXT:    v_mov_b32_e32 v5, s8
301; GCN-NEXT:    v_mov_b32_e32 v6, s9
302; GCN-NEXT:    s_mov_b64 s[10:11], 4
303; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
304; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
305; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
306; GCN-NEXT:    v_mov_b32_e32 v7, s10
307; GCN-NEXT:    v_mov_b32_e32 v8, s11
308; GCN-NEXT:    s_mov_b64 s[12:13], 5
309; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
310; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
311; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
312; GCN-NEXT:    s_mov_b64 s[14:15], 6
313; GCN-NEXT:    v_mov_b32_e32 v9, s12
314; GCN-NEXT:    v_mov_b32_e32 v10, s13
315; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
316; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
317; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
318; GCN-NEXT:    s_mov_b64 s[16:17], 7
319; GCN-NEXT:    v_mov_b32_e32 v11, s14
320; GCN-NEXT:    v_mov_b32_e32 v12, s15
321; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
322; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
323; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
324; GCN-NEXT:    s_mov_b64 s[18:19], 8
325; GCN-NEXT:    v_mov_b32_e32 v13, s16
326; GCN-NEXT:    v_mov_b32_e32 v14, s17
327; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
328; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v12, vcc
329; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
330; GCN-NEXT:    v_mov_b32_e32 v15, s18
331; GCN-NEXT:    v_mov_b32_e32 v16, s19
332; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
333; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v14, vcc
334; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
335; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v15, vcc
336; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v16, vcc
337; GCN-NEXT:    s_setpc_b64 s[30:31]
338;
339; GFX10-LABEL: dyn_extract_v8i64_const_s_v:
340; GFX10:       ; %bb.0: ; %entry
341; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
342; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
343; GFX10-NEXT:    s_mov_b64 s[6:7], 2
344; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
345; GFX10-NEXT:    v_mov_b32_e32 v1, s6
346; GFX10-NEXT:    v_mov_b32_e32 v2, s7
347; GFX10-NEXT:    s_mov_b64 s[4:5], 1
348; GFX10-NEXT:    s_mov_b64 s[8:9], 3
349; GFX10-NEXT:    s_mov_b64 s[10:11], 4
350; GFX10-NEXT:    v_cndmask_b32_e32 v1, s4, v1, vcc_lo
351; GFX10-NEXT:    v_cndmask_b32_e32 v2, s5, v2, vcc_lo
352; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
353; GFX10-NEXT:    s_mov_b64 s[12:13], 5
354; GFX10-NEXT:    s_mov_b64 s[14:15], 6
355; GFX10-NEXT:    s_mov_b64 s[16:17], 7
356; GFX10-NEXT:    s_mov_b64 s[18:19], 8
357; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
358; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
359; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
360; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
361; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
362; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
363; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s12, vcc_lo
364; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s13, vcc_lo
365; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
366; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s14, vcc_lo
367; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s15, vcc_lo
368; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
369; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s16, vcc_lo
370; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s17, vcc_lo
371; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
372; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s18, vcc_lo
373; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, s19, vcc_lo
374; GFX10-NEXT:    s_setpc_b64 s[30:31]
375entry:
376  %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel
377  ret i64 %ext
378}
379
380define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) {
381; GPRIDX-LABEL: dyn_extract_v8i64_const_s_s:
382; GPRIDX:       ; %bb.0: ; %entry
383; GPRIDX-NEXT:    s_mov_b64 s[4:5], 1
384; GPRIDX-NEXT:    s_mov_b32 m0, s2
385; GPRIDX-NEXT:    s_mov_b64 s[18:19], 8
386; GPRIDX-NEXT:    s_mov_b64 s[16:17], 7
387; GPRIDX-NEXT:    s_mov_b64 s[14:15], 6
388; GPRIDX-NEXT:    s_mov_b64 s[12:13], 5
389; GPRIDX-NEXT:    s_mov_b64 s[10:11], 4
390; GPRIDX-NEXT:    s_mov_b64 s[8:9], 3
391; GPRIDX-NEXT:    s_mov_b64 s[6:7], 2
392; GPRIDX-NEXT:    s_movrels_b64 s[0:1], s[4:5]
393; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
394; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
395; GPRIDX-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
396; GPRIDX-NEXT:    s_endpgm
397;
398; MOVREL-LABEL: dyn_extract_v8i64_const_s_s:
399; MOVREL:       ; %bb.0: ; %entry
400; MOVREL-NEXT:    s_mov_b64 s[4:5], 1
401; MOVREL-NEXT:    s_mov_b32 m0, s2
402; MOVREL-NEXT:    s_mov_b64 s[18:19], 8
403; MOVREL-NEXT:    s_mov_b64 s[16:17], 7
404; MOVREL-NEXT:    s_mov_b64 s[14:15], 6
405; MOVREL-NEXT:    s_mov_b64 s[12:13], 5
406; MOVREL-NEXT:    s_mov_b64 s[10:11], 4
407; MOVREL-NEXT:    s_mov_b64 s[8:9], 3
408; MOVREL-NEXT:    s_mov_b64 s[6:7], 2
409; MOVREL-NEXT:    s_movrels_b64 s[0:1], s[4:5]
410; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
411; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
412; MOVREL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
413; MOVREL-NEXT:    s_endpgm
414;
415; GFX10-LABEL: dyn_extract_v8i64_const_s_s:
416; GFX10:       ; %bb.0: ; %entry
417; GFX10-NEXT:    s_mov_b64 s[4:5], 1
418; GFX10-NEXT:    s_mov_b32 m0, s2
419; GFX10-NEXT:    s_mov_b64 s[18:19], 8
420; GFX10-NEXT:    s_mov_b64 s[16:17], 7
421; GFX10-NEXT:    s_mov_b64 s[14:15], 6
422; GFX10-NEXT:    s_mov_b64 s[12:13], 5
423; GFX10-NEXT:    s_mov_b64 s[10:11], 4
424; GFX10-NEXT:    s_mov_b64 s[8:9], 3
425; GFX10-NEXT:    s_mov_b64 s[6:7], 2
426; GFX10-NEXT:    s_movrels_b64 s[0:1], s[4:5]
427; GFX10-NEXT:    v_mov_b32_e32 v0, s0
428; GFX10-NEXT:    v_mov_b32_e32 v1, s1
429; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
430; GFX10-NEXT:    s_endpgm
431entry:
432  %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel
433  store i64 %ext, i64 addrspace(1)* undef
434  ret void
435}
436
437define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) {
438; GPRIDX-LABEL: dyn_extract_v8i64_s_v:
439; GPRIDX:       ; %bb.0: ; %entry
440; GPRIDX-NEXT:    s_mov_b32 s0, s2
441; GPRIDX-NEXT:    s_mov_b32 s1, s3
442; GPRIDX-NEXT:    s_mov_b32 s2, s4
443; GPRIDX-NEXT:    s_mov_b32 s3, s5
444; GPRIDX-NEXT:    s_mov_b32 s4, s6
445; GPRIDX-NEXT:    s_mov_b32 s5, s7
446; GPRIDX-NEXT:    v_mov_b32_e32 v1, s0
447; GPRIDX-NEXT:    v_mov_b32_e32 v2, s1
448; GPRIDX-NEXT:    v_mov_b32_e32 v3, s2
449; GPRIDX-NEXT:    v_mov_b32_e32 v4, s3
450; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
451; GPRIDX-NEXT:    s_mov_b32 s6, s8
452; GPRIDX-NEXT:    s_mov_b32 s7, s9
453; GPRIDX-NEXT:    v_mov_b32_e32 v5, s4
454; GPRIDX-NEXT:    v_mov_b32_e32 v6, s5
455; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
456; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
457; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
458; GPRIDX-NEXT:    s_mov_b32 s8, s10
459; GPRIDX-NEXT:    s_mov_b32 s9, s11
460; GPRIDX-NEXT:    v_mov_b32_e32 v7, s6
461; GPRIDX-NEXT:    v_mov_b32_e32 v8, s7
462; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
463; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
464; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
465; GPRIDX-NEXT:    s_mov_b32 s10, s12
466; GPRIDX-NEXT:    s_mov_b32 s11, s13
467; GPRIDX-NEXT:    v_mov_b32_e32 v9, s8
468; GPRIDX-NEXT:    v_mov_b32_e32 v10, s9
469; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
470; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
471; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
472; GPRIDX-NEXT:    v_mov_b32_e32 v11, s10
473; GPRIDX-NEXT:    v_mov_b32_e32 v12, s11
474; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
475; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
476; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
477; GPRIDX-NEXT:    v_mov_b32_e32 v13, s14
478; GPRIDX-NEXT:    v_mov_b32_e32 v14, s15
479; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
480; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v12, vcc
481; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
482; GPRIDX-NEXT:    v_mov_b32_e32 v15, s16
483; GPRIDX-NEXT:    v_mov_b32_e32 v16, s17
484; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
485; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v14, vcc
486; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
487; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v1, v15, vcc
488; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v2, v16, vcc
489; GPRIDX-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
490; GPRIDX-NEXT:    s_endpgm
491;
492; MOVREL-LABEL: dyn_extract_v8i64_s_v:
493; MOVREL:       ; %bb.0: ; %entry
494; MOVREL-NEXT:    s_mov_b32 s0, s2
495; MOVREL-NEXT:    s_mov_b32 s1, s3
496; MOVREL-NEXT:    s_mov_b32 s2, s4
497; MOVREL-NEXT:    s_mov_b32 s3, s5
498; MOVREL-NEXT:    s_mov_b32 s4, s6
499; MOVREL-NEXT:    s_mov_b32 s5, s7
500; MOVREL-NEXT:    v_mov_b32_e32 v1, s0
501; MOVREL-NEXT:    v_mov_b32_e32 v2, s1
502; MOVREL-NEXT:    v_mov_b32_e32 v3, s2
503; MOVREL-NEXT:    v_mov_b32_e32 v4, s3
504; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
505; MOVREL-NEXT:    s_mov_b32 s6, s8
506; MOVREL-NEXT:    s_mov_b32 s7, s9
507; MOVREL-NEXT:    v_mov_b32_e32 v5, s4
508; MOVREL-NEXT:    v_mov_b32_e32 v6, s5
509; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
510; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
511; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
512; MOVREL-NEXT:    s_mov_b32 s8, s10
513; MOVREL-NEXT:    s_mov_b32 s9, s11
514; MOVREL-NEXT:    v_mov_b32_e32 v7, s6
515; MOVREL-NEXT:    v_mov_b32_e32 v8, s7
516; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
517; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
518; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
519; MOVREL-NEXT:    s_mov_b32 s10, s12
520; MOVREL-NEXT:    s_mov_b32 s11, s13
521; MOVREL-NEXT:    v_mov_b32_e32 v9, s8
522; MOVREL-NEXT:    v_mov_b32_e32 v10, s9
523; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
524; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
525; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
526; MOVREL-NEXT:    v_mov_b32_e32 v11, s10
527; MOVREL-NEXT:    v_mov_b32_e32 v12, s11
528; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
529; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
530; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
531; MOVREL-NEXT:    v_mov_b32_e32 v13, s14
532; MOVREL-NEXT:    v_mov_b32_e32 v14, s15
533; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
534; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v12, vcc
535; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
536; MOVREL-NEXT:    v_mov_b32_e32 v15, s16
537; MOVREL-NEXT:    v_mov_b32_e32 v16, s17
538; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
539; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v14, vcc
540; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
541; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v1, v15, vcc
542; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v2, v16, vcc
543; MOVREL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
544; MOVREL-NEXT:    s_endpgm
545;
546; GFX10-LABEL: dyn_extract_v8i64_s_v:
547; GFX10:       ; %bb.0: ; %entry
548; GFX10-NEXT:    s_mov_b32 s0, s2
549; GFX10-NEXT:    s_mov_b32 s2, s4
550; GFX10-NEXT:    s_mov_b32 s19, s5
551; GFX10-NEXT:    v_mov_b32_e32 v1, s2
552; GFX10-NEXT:    v_mov_b32_e32 v2, s19
553; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
554; GFX10-NEXT:    s_mov_b32 s1, s3
555; GFX10-NEXT:    s_mov_b32 s4, s6
556; GFX10-NEXT:    s_mov_b32 s5, s7
557; GFX10-NEXT:    s_mov_b32 s6, s8
558; GFX10-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
559; GFX10-NEXT:    v_cndmask_b32_e32 v2, s1, v2, vcc_lo
560; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
561; GFX10-NEXT:    s_mov_b32 s7, s9
562; GFX10-NEXT:    s_mov_b32 s8, s10
563; GFX10-NEXT:    s_mov_b32 s9, s11
564; GFX10-NEXT:    s_mov_b32 s10, s12
565; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
566; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s5, vcc_lo
567; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
568; GFX10-NEXT:    s_mov_b32 s11, s13
569; GFX10-NEXT:    s_mov_b32 s12, s14
570; GFX10-NEXT:    s_mov_b32 s13, s15
571; GFX10-NEXT:    s_mov_b32 s14, s16
572; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
573; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
574; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
575; GFX10-NEXT:    s_mov_b32 s15, s17
576; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
577; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
578; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
579; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
580; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
581; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
582; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s12, vcc_lo
583; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s13, vcc_lo
584; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
585; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s14, vcc_lo
586; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, s15, vcc_lo
587; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
588; GFX10-NEXT:    s_endpgm
589entry:
590  %ext = extractelement <8 x i64> %vec, i32 %sel
591  store i64 %ext, i64 addrspace(1)* undef
592  ret void
593}
594
595define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) {
596; GCN-LABEL: dyn_extract_v8i64_v_v:
597; GCN:       ; %bb.0: ; %entry
598; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
599; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v16
600; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
601; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
602; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v16
603; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
604; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
605; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v16
606; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
607; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
608; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v16
609; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
610; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
611; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v16
612; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
613; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
614; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v16
615; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
616; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
617; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v16
618; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
619; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
620; GCN-NEXT:    s_setpc_b64 s[30:31]
621;
622; GFX10-LABEL: dyn_extract_v8i64_v_v:
623; GFX10:       ; %bb.0: ; %entry
624; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
625; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
626; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
627; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
628; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
629; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
630; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
631; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
632; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v16
633; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
634; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
635; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
636; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
637; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
638; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v16
639; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
640; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
641; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v16
642; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
643; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc_lo
644; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v16
645; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
646; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc_lo
647; GFX10-NEXT:    s_setpc_b64 s[30:31]
648entry:
649  %ext = extractelement <8 x i64> %vec, i32 %sel
650  ret i64 %ext
651}
652
653define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) {
654; GPRIDX-LABEL: dyn_extract_v8i64_v_s:
655; GPRIDX:       ; %bb.0: ; %entry
656; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
657; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(SRC0)
658; GPRIDX-NEXT:    v_mov_b32_e32 v16, v0
659; GPRIDX-NEXT:    v_mov_b32_e32 v17, v1
660; GPRIDX-NEXT:    s_set_gpr_idx_off
661; GPRIDX-NEXT:    global_store_dwordx2 v[0:1], v[16:17], off
662; GPRIDX-NEXT:    s_endpgm
663;
664; MOVREL-LABEL: dyn_extract_v8i64_v_s:
665; MOVREL:       ; %bb.0: ; %entry
666; MOVREL-NEXT:    s_lshl_b32 m0, s2, 1
667; MOVREL-NEXT:    v_movrels_b32_e32 v16, v0
668; MOVREL-NEXT:    v_movrels_b32_e32 v17, v1
669; MOVREL-NEXT:    flat_store_dwordx2 v[0:1], v[16:17]
670; MOVREL-NEXT:    s_endpgm
671;
672; GFX10-LABEL: dyn_extract_v8i64_v_s:
673; GFX10:       ; %bb.0: ; %entry
674; GFX10-NEXT:    s_lshl_b32 m0, s2, 1
675; GFX10-NEXT:    v_movrels_b32_e32 v16, v0
676; GFX10-NEXT:    v_movrels_b32_e32 v17, v1
677; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[16:17], off
678; GFX10-NEXT:    s_endpgm
679entry:
680  %ext = extractelement <8 x i64> %vec, i32 %sel
681  store i64 %ext, i64 addrspace(1)* undef
682  ret void
683}
684
685define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %sel) {
686; GPRIDX-LABEL: dyn_extract_v8i64_s_s:
687; GPRIDX:       ; %bb.0: ; %entry
688; GPRIDX-NEXT:    s_mov_b32 s0, s2
689; GPRIDX-NEXT:    s_mov_b32 s1, s3
690; GPRIDX-NEXT:    s_mov_b32 m0, s18
691; GPRIDX-NEXT:    s_mov_b32 s2, s4
692; GPRIDX-NEXT:    s_mov_b32 s3, s5
693; GPRIDX-NEXT:    s_mov_b32 s4, s6
694; GPRIDX-NEXT:    s_mov_b32 s5, s7
695; GPRIDX-NEXT:    s_mov_b32 s6, s8
696; GPRIDX-NEXT:    s_mov_b32 s7, s9
697; GPRIDX-NEXT:    s_mov_b32 s8, s10
698; GPRIDX-NEXT:    s_mov_b32 s9, s11
699; GPRIDX-NEXT:    s_mov_b32 s10, s12
700; GPRIDX-NEXT:    s_mov_b32 s11, s13
701; GPRIDX-NEXT:    s_mov_b32 s12, s14
702; GPRIDX-NEXT:    s_mov_b32 s13, s15
703; GPRIDX-NEXT:    s_mov_b32 s14, s16
704; GPRIDX-NEXT:    s_mov_b32 s15, s17
705; GPRIDX-NEXT:    s_movrels_b64 s[0:1], s[0:1]
706; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
707; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
708; GPRIDX-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
709; GPRIDX-NEXT:    s_endpgm
710;
711; MOVREL-LABEL: dyn_extract_v8i64_s_s:
712; MOVREL:       ; %bb.0: ; %entry
713; MOVREL-NEXT:    s_mov_b32 s0, s2
714; MOVREL-NEXT:    s_mov_b32 s1, s3
715; MOVREL-NEXT:    s_mov_b32 m0, s18
716; MOVREL-NEXT:    s_mov_b32 s2, s4
717; MOVREL-NEXT:    s_mov_b32 s3, s5
718; MOVREL-NEXT:    s_mov_b32 s4, s6
719; MOVREL-NEXT:    s_mov_b32 s5, s7
720; MOVREL-NEXT:    s_mov_b32 s6, s8
721; MOVREL-NEXT:    s_mov_b32 s7, s9
722; MOVREL-NEXT:    s_mov_b32 s8, s10
723; MOVREL-NEXT:    s_mov_b32 s9, s11
724; MOVREL-NEXT:    s_mov_b32 s10, s12
725; MOVREL-NEXT:    s_mov_b32 s11, s13
726; MOVREL-NEXT:    s_mov_b32 s12, s14
727; MOVREL-NEXT:    s_mov_b32 s13, s15
728; MOVREL-NEXT:    s_mov_b32 s14, s16
729; MOVREL-NEXT:    s_mov_b32 s15, s17
730; MOVREL-NEXT:    s_movrels_b64 s[0:1], s[0:1]
731; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
732; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
733; MOVREL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
734; MOVREL-NEXT:    s_endpgm
735;
736; GFX10-LABEL: dyn_extract_v8i64_s_s:
737; GFX10:       ; %bb.0: ; %entry
738; GFX10-NEXT:    s_mov_b32 s0, s2
739; GFX10-NEXT:    s_mov_b32 s1, s3
740; GFX10-NEXT:    s_mov_b32 m0, s18
741; GFX10-NEXT:    s_mov_b32 s2, s4
742; GFX10-NEXT:    s_mov_b32 s3, s5
743; GFX10-NEXT:    s_mov_b32 s4, s6
744; GFX10-NEXT:    s_mov_b32 s5, s7
745; GFX10-NEXT:    s_mov_b32 s6, s8
746; GFX10-NEXT:    s_mov_b32 s7, s9
747; GFX10-NEXT:    s_mov_b32 s8, s10
748; GFX10-NEXT:    s_mov_b32 s9, s11
749; GFX10-NEXT:    s_mov_b32 s10, s12
750; GFX10-NEXT:    s_mov_b32 s11, s13
751; GFX10-NEXT:    s_mov_b32 s12, s14
752; GFX10-NEXT:    s_mov_b32 s13, s15
753; GFX10-NEXT:    s_mov_b32 s14, s16
754; GFX10-NEXT:    s_mov_b32 s15, s17
755; GFX10-NEXT:    s_movrels_b64 s[0:1], s[0:1]
756; GFX10-NEXT:    v_mov_b32_e32 v0, s0
757; GFX10-NEXT:    v_mov_b32_e32 v1, s1
758; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
759; GFX10-NEXT:    s_endpgm
760entry:
761  %ext = extractelement <8 x i64> %vec, i32 %sel
762  store i64 %ext, i64 addrspace(1)* undef
763  ret void
764}
765
766define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) {
767; GCN-LABEL: dyn_extract_v8f32_s_s_offset3:
768; GCN:       ; %bb.0: ; %entry
769; GCN-NEXT:    s_add_i32 s10, s10, 3
770; GCN-NEXT:    s_cmp_eq_u32 s10, 1
771; GCN-NEXT:    s_cselect_b32 s0, s3, s2
772; GCN-NEXT:    s_cmp_eq_u32 s10, 2
773; GCN-NEXT:    s_cselect_b32 s0, s4, s0
774; GCN-NEXT:    s_cmp_eq_u32 s10, 3
775; GCN-NEXT:    s_cselect_b32 s0, s5, s0
776; GCN-NEXT:    s_cmp_eq_u32 s10, 4
777; GCN-NEXT:    s_cselect_b32 s0, s6, s0
778; GCN-NEXT:    s_cmp_eq_u32 s10, 5
779; GCN-NEXT:    s_cselect_b32 s0, s7, s0
780; GCN-NEXT:    s_cmp_eq_u32 s10, 6
781; GCN-NEXT:    s_cselect_b32 s0, s8, s0
782; GCN-NEXT:    s_cmp_eq_u32 s10, 7
783; GCN-NEXT:    s_cselect_b32 s0, s9, s0
784; GCN-NEXT:    v_mov_b32_e32 v0, s0
785; GCN-NEXT:    ; return to shader part epilog
786;
787; GFX10-LABEL: dyn_extract_v8f32_s_s_offset3:
788; GFX10:       ; %bb.0: ; %entry
789; GFX10-NEXT:    s_add_i32 s10, s10, 3
790; GFX10-NEXT:    s_cmp_eq_u32 s10, 1
791; GFX10-NEXT:    s_cselect_b32 s0, s3, s2
792; GFX10-NEXT:    s_cmp_eq_u32 s10, 2
793; GFX10-NEXT:    s_cselect_b32 s0, s4, s0
794; GFX10-NEXT:    s_cmp_eq_u32 s10, 3
795; GFX10-NEXT:    s_cselect_b32 s0, s5, s0
796; GFX10-NEXT:    s_cmp_eq_u32 s10, 4
797; GFX10-NEXT:    s_cselect_b32 s0, s6, s0
798; GFX10-NEXT:    s_cmp_eq_u32 s10, 5
799; GFX10-NEXT:    s_cselect_b32 s0, s7, s0
800; GFX10-NEXT:    s_cmp_eq_u32 s10, 6
801; GFX10-NEXT:    s_cselect_b32 s0, s8, s0
802; GFX10-NEXT:    s_cmp_eq_u32 s10, 7
803; GFX10-NEXT:    s_cselect_b32 s0, s9, s0
804; GFX10-NEXT:    v_mov_b32_e32 v0, s0
805; GFX10-NEXT:    ; return to shader part epilog
806entry:
807  %add = add i32 %sel, 3
808  %ext = extractelement <8 x float> %vec, i32 %add
809  ret float %ext
810}
811
812define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) {
813; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3:
814; GPRIDX:       ; %bb.0: ; %entry
815; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
816; GPRIDX-NEXT:    v_add_u32_e32 v8, 3, v8
817; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
818; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
819; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
820; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
821; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
822; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
823; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v8
824; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
825; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v8
826; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
827; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v8
828; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
829; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v8
830; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
831; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
832;
833; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3:
834; MOVREL:       ; %bb.0: ; %entry
835; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
836; MOVREL-NEXT:    v_add_u32_e32 v8, vcc, 3, v8
837; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
838; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
839; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
840; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
841; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
842; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
843; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v8
844; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
845; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v8
846; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
847; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v8
848; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
849; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v8
850; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
851; MOVREL-NEXT:    s_setpc_b64 s[30:31]
852;
853; GFX10-LABEL: dyn_extract_v8f32_v_v_offset3:
854; GFX10:       ; %bb.0: ; %entry
855; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
856; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
857; GFX10-NEXT:    v_add_nc_u32_e32 v8, 3, v8
858; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
859; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
860; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v8
861; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
862; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v8
863; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
864; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v8
865; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
866; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v8
867; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
868; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v8
869; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
870; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v8
871; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
872; GFX10-NEXT:    s_setpc_b64 s[30:31]
873entry:
874  %add = add i32 %sel, 3
875  %ext = extractelement <8 x float> %vec, i32 %add
876  ret float %ext
877}
878
879define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) {
880; GCN-LABEL: dyn_extract_v8f64_s_s_offset1:
881; GCN:       ; %bb.0: ; %entry
882; GCN-NEXT:    s_mov_b32 s0, s2
883; GCN-NEXT:    s_mov_b32 s1, s3
884; GCN-NEXT:    s_mov_b32 s2, s4
885; GCN-NEXT:    s_mov_b32 s3, s5
886; GCN-NEXT:    s_mov_b32 m0, s18
887; GCN-NEXT:    s_mov_b32 s4, s6
888; GCN-NEXT:    s_mov_b32 s5, s7
889; GCN-NEXT:    s_mov_b32 s6, s8
890; GCN-NEXT:    s_mov_b32 s7, s9
891; GCN-NEXT:    s_mov_b32 s8, s10
892; GCN-NEXT:    s_mov_b32 s9, s11
893; GCN-NEXT:    s_mov_b32 s10, s12
894; GCN-NEXT:    s_mov_b32 s11, s13
895; GCN-NEXT:    s_mov_b32 s12, s14
896; GCN-NEXT:    s_mov_b32 s13, s15
897; GCN-NEXT:    s_mov_b32 s14, s16
898; GCN-NEXT:    s_mov_b32 s15, s17
899; GCN-NEXT:    s_movrels_b64 s[0:1], s[2:3]
900; GCN-NEXT:    ; return to shader part epilog
901;
902; GFX10-LABEL: dyn_extract_v8f64_s_s_offset1:
903; GFX10:       ; %bb.0: ; %entry
904; GFX10-NEXT:    s_mov_b32 s0, s2
905; GFX10-NEXT:    s_mov_b32 s1, s3
906; GFX10-NEXT:    s_mov_b32 s2, s4
907; GFX10-NEXT:    s_mov_b32 s3, s5
908; GFX10-NEXT:    s_mov_b32 m0, s18
909; GFX10-NEXT:    s_mov_b32 s4, s6
910; GFX10-NEXT:    s_mov_b32 s5, s7
911; GFX10-NEXT:    s_mov_b32 s6, s8
912; GFX10-NEXT:    s_mov_b32 s7, s9
913; GFX10-NEXT:    s_mov_b32 s8, s10
914; GFX10-NEXT:    s_mov_b32 s9, s11
915; GFX10-NEXT:    s_mov_b32 s10, s12
916; GFX10-NEXT:    s_mov_b32 s11, s13
917; GFX10-NEXT:    s_mov_b32 s12, s14
918; GFX10-NEXT:    s_mov_b32 s13, s15
919; GFX10-NEXT:    s_mov_b32 s14, s16
920; GFX10-NEXT:    s_mov_b32 s15, s17
921; GFX10-NEXT:    s_movrels_b64 s[0:1], s[2:3]
922; GFX10-NEXT:    ; return to shader part epilog
923entry:
924  %add = add i32 %sel, 1
925  %ext = extractelement <8 x double> %vec, i32 %add
926  ret double %ext
927}
928
929define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) {
930; GCN-LABEL: dyn_extract_v8f64_s_s_offset2:
931; GCN:       ; %bb.0: ; %entry
932; GCN-NEXT:    s_mov_b32 s0, s2
933; GCN-NEXT:    s_mov_b32 s1, s3
934; GCN-NEXT:    s_mov_b32 s2, s4
935; GCN-NEXT:    s_mov_b32 s3, s5
936; GCN-NEXT:    s_mov_b32 s4, s6
937; GCN-NEXT:    s_mov_b32 s5, s7
938; GCN-NEXT:    s_mov_b32 m0, s18
939; GCN-NEXT:    s_mov_b32 s6, s8
940; GCN-NEXT:    s_mov_b32 s7, s9
941; GCN-NEXT:    s_mov_b32 s8, s10
942; GCN-NEXT:    s_mov_b32 s9, s11
943; GCN-NEXT:    s_mov_b32 s10, s12
944; GCN-NEXT:    s_mov_b32 s11, s13
945; GCN-NEXT:    s_mov_b32 s12, s14
946; GCN-NEXT:    s_mov_b32 s13, s15
947; GCN-NEXT:    s_mov_b32 s14, s16
948; GCN-NEXT:    s_mov_b32 s15, s17
949; GCN-NEXT:    s_movrels_b64 s[0:1], s[4:5]
950; GCN-NEXT:    ; return to shader part epilog
951;
952; GFX10-LABEL: dyn_extract_v8f64_s_s_offset2:
953; GFX10:       ; %bb.0: ; %entry
954; GFX10-NEXT:    s_mov_b32 s0, s2
955; GFX10-NEXT:    s_mov_b32 s1, s3
956; GFX10-NEXT:    s_mov_b32 s2, s4
957; GFX10-NEXT:    s_mov_b32 s3, s5
958; GFX10-NEXT:    s_mov_b32 s4, s6
959; GFX10-NEXT:    s_mov_b32 s5, s7
960; GFX10-NEXT:    s_mov_b32 m0, s18
961; GFX10-NEXT:    s_mov_b32 s6, s8
962; GFX10-NEXT:    s_mov_b32 s7, s9
963; GFX10-NEXT:    s_mov_b32 s8, s10
964; GFX10-NEXT:    s_mov_b32 s9, s11
965; GFX10-NEXT:    s_mov_b32 s10, s12
966; GFX10-NEXT:    s_mov_b32 s11, s13
967; GFX10-NEXT:    s_mov_b32 s12, s14
968; GFX10-NEXT:    s_mov_b32 s13, s15
969; GFX10-NEXT:    s_mov_b32 s14, s16
970; GFX10-NEXT:    s_mov_b32 s15, s17
971; GFX10-NEXT:    s_movrels_b64 s[0:1], s[4:5]
972; GFX10-NEXT:    ; return to shader part epilog
973entry:
974  %add = add i32 %sel, 2
975  %ext = extractelement <8 x double> %vec, i32 %add
976  ret double %ext
977}
978
979define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) {
980; GCN-LABEL: dyn_extract_v8f64_s_s_offset3:
981; GCN:       ; %bb.0: ; %entry
982; GCN-NEXT:    s_mov_b32 s0, s2
983; GCN-NEXT:    s_mov_b32 s1, s3
984; GCN-NEXT:    s_mov_b32 s2, s4
985; GCN-NEXT:    s_mov_b32 s3, s5
986; GCN-NEXT:    s_mov_b32 s4, s6
987; GCN-NEXT:    s_mov_b32 s5, s7
988; GCN-NEXT:    s_mov_b32 s6, s8
989; GCN-NEXT:    s_mov_b32 s7, s9
990; GCN-NEXT:    s_mov_b32 m0, s18
991; GCN-NEXT:    s_mov_b32 s8, s10
992; GCN-NEXT:    s_mov_b32 s9, s11
993; GCN-NEXT:    s_mov_b32 s10, s12
994; GCN-NEXT:    s_mov_b32 s11, s13
995; GCN-NEXT:    s_mov_b32 s12, s14
996; GCN-NEXT:    s_mov_b32 s13, s15
997; GCN-NEXT:    s_mov_b32 s14, s16
998; GCN-NEXT:    s_mov_b32 s15, s17
999; GCN-NEXT:    s_movrels_b64 s[0:1], s[6:7]
1000; GCN-NEXT:    ; return to shader part epilog
1001;
1002; GFX10-LABEL: dyn_extract_v8f64_s_s_offset3:
1003; GFX10:       ; %bb.0: ; %entry
1004; GFX10-NEXT:    s_mov_b32 s0, s2
1005; GFX10-NEXT:    s_mov_b32 s1, s3
1006; GFX10-NEXT:    s_mov_b32 s2, s4
1007; GFX10-NEXT:    s_mov_b32 s3, s5
1008; GFX10-NEXT:    s_mov_b32 s4, s6
1009; GFX10-NEXT:    s_mov_b32 s5, s7
1010; GFX10-NEXT:    s_mov_b32 s6, s8
1011; GFX10-NEXT:    s_mov_b32 s7, s9
1012; GFX10-NEXT:    s_mov_b32 m0, s18
1013; GFX10-NEXT:    s_mov_b32 s8, s10
1014; GFX10-NEXT:    s_mov_b32 s9, s11
1015; GFX10-NEXT:    s_mov_b32 s10, s12
1016; GFX10-NEXT:    s_mov_b32 s11, s13
1017; GFX10-NEXT:    s_mov_b32 s12, s14
1018; GFX10-NEXT:    s_mov_b32 s13, s15
1019; GFX10-NEXT:    s_mov_b32 s14, s16
1020; GFX10-NEXT:    s_mov_b32 s15, s17
1021; GFX10-NEXT:    s_movrels_b64 s[0:1], s[6:7]
1022; GFX10-NEXT:    ; return to shader part epilog
1023entry:
1024  %add = add i32 %sel, 3
1025  %ext = extractelement <8 x double> %vec, i32 %add
1026  ret double %ext
1027}
1028
1029define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) {
1030; GCN-LABEL: dyn_extract_v8f64_s_s_offset4:
1031; GCN:       ; %bb.0: ; %entry
1032; GCN-NEXT:    s_mov_b32 s0, s2
1033; GCN-NEXT:    s_mov_b32 s1, s3
1034; GCN-NEXT:    s_mov_b32 s2, s4
1035; GCN-NEXT:    s_mov_b32 s3, s5
1036; GCN-NEXT:    s_mov_b32 s4, s6
1037; GCN-NEXT:    s_mov_b32 s5, s7
1038; GCN-NEXT:    s_mov_b32 s6, s8
1039; GCN-NEXT:    s_mov_b32 s7, s9
1040; GCN-NEXT:    s_mov_b32 s8, s10
1041; GCN-NEXT:    s_mov_b32 s9, s11
1042; GCN-NEXT:    s_mov_b32 m0, s18
1043; GCN-NEXT:    s_mov_b32 s10, s12
1044; GCN-NEXT:    s_mov_b32 s11, s13
1045; GCN-NEXT:    s_mov_b32 s12, s14
1046; GCN-NEXT:    s_mov_b32 s13, s15
1047; GCN-NEXT:    s_mov_b32 s14, s16
1048; GCN-NEXT:    s_mov_b32 s15, s17
1049; GCN-NEXT:    s_movrels_b64 s[0:1], s[8:9]
1050; GCN-NEXT:    ; return to shader part epilog
1051;
1052; GFX10-LABEL: dyn_extract_v8f64_s_s_offset4:
1053; GFX10:       ; %bb.0: ; %entry
1054; GFX10-NEXT:    s_mov_b32 s0, s2
1055; GFX10-NEXT:    s_mov_b32 s1, s3
1056; GFX10-NEXT:    s_mov_b32 s2, s4
1057; GFX10-NEXT:    s_mov_b32 s3, s5
1058; GFX10-NEXT:    s_mov_b32 s4, s6
1059; GFX10-NEXT:    s_mov_b32 s5, s7
1060; GFX10-NEXT:    s_mov_b32 s6, s8
1061; GFX10-NEXT:    s_mov_b32 s7, s9
1062; GFX10-NEXT:    s_mov_b32 s8, s10
1063; GFX10-NEXT:    s_mov_b32 s9, s11
1064; GFX10-NEXT:    s_mov_b32 m0, s18
1065; GFX10-NEXT:    s_mov_b32 s10, s12
1066; GFX10-NEXT:    s_mov_b32 s11, s13
1067; GFX10-NEXT:    s_mov_b32 s12, s14
1068; GFX10-NEXT:    s_mov_b32 s13, s15
1069; GFX10-NEXT:    s_mov_b32 s14, s16
1070; GFX10-NEXT:    s_mov_b32 s15, s17
1071; GFX10-NEXT:    s_movrels_b64 s[0:1], s[8:9]
1072; GFX10-NEXT:    ; return to shader part epilog
1073entry:
1074  %add = add i32 %sel, 4
1075  %ext = extractelement <8 x double> %vec, i32 %add
1076  ret double %ext
1077}
1078
1079define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) {
1080; GCN-LABEL: dyn_extract_v8f64_s_s_offset5:
1081; GCN:       ; %bb.0: ; %entry
1082; GCN-NEXT:    s_mov_b32 s0, s2
1083; GCN-NEXT:    s_mov_b32 s1, s3
1084; GCN-NEXT:    s_mov_b32 s2, s4
1085; GCN-NEXT:    s_mov_b32 s3, s5
1086; GCN-NEXT:    s_mov_b32 s4, s6
1087; GCN-NEXT:    s_mov_b32 s5, s7
1088; GCN-NEXT:    s_mov_b32 s6, s8
1089; GCN-NEXT:    s_mov_b32 s7, s9
1090; GCN-NEXT:    s_mov_b32 s8, s10
1091; GCN-NEXT:    s_mov_b32 s9, s11
1092; GCN-NEXT:    s_mov_b32 s10, s12
1093; GCN-NEXT:    s_mov_b32 s11, s13
1094; GCN-NEXT:    s_mov_b32 m0, s18
1095; GCN-NEXT:    s_mov_b32 s12, s14
1096; GCN-NEXT:    s_mov_b32 s13, s15
1097; GCN-NEXT:    s_mov_b32 s14, s16
1098; GCN-NEXT:    s_mov_b32 s15, s17
1099; GCN-NEXT:    s_movrels_b64 s[0:1], s[10:11]
1100; GCN-NEXT:    ; return to shader part epilog
1101;
1102; GFX10-LABEL: dyn_extract_v8f64_s_s_offset5:
1103; GFX10:       ; %bb.0: ; %entry
1104; GFX10-NEXT:    s_mov_b32 s0, s2
1105; GFX10-NEXT:    s_mov_b32 s1, s3
1106; GFX10-NEXT:    s_mov_b32 s2, s4
1107; GFX10-NEXT:    s_mov_b32 s3, s5
1108; GFX10-NEXT:    s_mov_b32 s4, s6
1109; GFX10-NEXT:    s_mov_b32 s5, s7
1110; GFX10-NEXT:    s_mov_b32 s6, s8
1111; GFX10-NEXT:    s_mov_b32 s7, s9
1112; GFX10-NEXT:    s_mov_b32 s8, s10
1113; GFX10-NEXT:    s_mov_b32 s9, s11
1114; GFX10-NEXT:    s_mov_b32 s10, s12
1115; GFX10-NEXT:    s_mov_b32 s11, s13
1116; GFX10-NEXT:    s_mov_b32 m0, s18
1117; GFX10-NEXT:    s_mov_b32 s12, s14
1118; GFX10-NEXT:    s_mov_b32 s13, s15
1119; GFX10-NEXT:    s_mov_b32 s14, s16
1120; GFX10-NEXT:    s_mov_b32 s15, s17
1121; GFX10-NEXT:    s_movrels_b64 s[0:1], s[10:11]
1122; GFX10-NEXT:    ; return to shader part epilog
1123entry:
1124  %add = add i32 %sel, 5
1125  %ext = extractelement <8 x double> %vec, i32 %add
1126  ret double %ext
1127}
1128
1129define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) {
1130; GCN-LABEL: dyn_extract_v8f64_s_s_offset6:
1131; GCN:       ; %bb.0: ; %entry
1132; GCN-NEXT:    s_mov_b32 s0, s2
1133; GCN-NEXT:    s_mov_b32 s1, s3
1134; GCN-NEXT:    s_mov_b32 s2, s4
1135; GCN-NEXT:    s_mov_b32 s3, s5
1136; GCN-NEXT:    s_mov_b32 s4, s6
1137; GCN-NEXT:    s_mov_b32 s5, s7
1138; GCN-NEXT:    s_mov_b32 s6, s8
1139; GCN-NEXT:    s_mov_b32 s7, s9
1140; GCN-NEXT:    s_mov_b32 s8, s10
1141; GCN-NEXT:    s_mov_b32 s9, s11
1142; GCN-NEXT:    s_mov_b32 s10, s12
1143; GCN-NEXT:    s_mov_b32 s11, s13
1144; GCN-NEXT:    s_mov_b32 s12, s14
1145; GCN-NEXT:    s_mov_b32 s13, s15
1146; GCN-NEXT:    s_mov_b32 m0, s18
1147; GCN-NEXT:    s_mov_b32 s14, s16
1148; GCN-NEXT:    s_mov_b32 s15, s17
1149; GCN-NEXT:    s_movrels_b64 s[0:1], s[12:13]
1150; GCN-NEXT:    ; return to shader part epilog
1151;
1152; GFX10-LABEL: dyn_extract_v8f64_s_s_offset6:
1153; GFX10:       ; %bb.0: ; %entry
1154; GFX10-NEXT:    s_mov_b32 s0, s2
1155; GFX10-NEXT:    s_mov_b32 s1, s3
1156; GFX10-NEXT:    s_mov_b32 s2, s4
1157; GFX10-NEXT:    s_mov_b32 s3, s5
1158; GFX10-NEXT:    s_mov_b32 s4, s6
1159; GFX10-NEXT:    s_mov_b32 s5, s7
1160; GFX10-NEXT:    s_mov_b32 s6, s8
1161; GFX10-NEXT:    s_mov_b32 s7, s9
1162; GFX10-NEXT:    s_mov_b32 s8, s10
1163; GFX10-NEXT:    s_mov_b32 s9, s11
1164; GFX10-NEXT:    s_mov_b32 s10, s12
1165; GFX10-NEXT:    s_mov_b32 s11, s13
1166; GFX10-NEXT:    s_mov_b32 s12, s14
1167; GFX10-NEXT:    s_mov_b32 s13, s15
1168; GFX10-NEXT:    s_mov_b32 m0, s18
1169; GFX10-NEXT:    s_mov_b32 s14, s16
1170; GFX10-NEXT:    s_mov_b32 s15, s17
1171; GFX10-NEXT:    s_movrels_b64 s[0:1], s[12:13]
1172; GFX10-NEXT:    ; return to shader part epilog
1173entry:
1174  %add = add i32 %sel, 6
1175  %ext = extractelement <8 x double> %vec, i32 %add
1176  ret double %ext
1177}
1178
1179define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) {
1180; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7:
1181; GPRIDX:       ; %bb.0: ; %entry
1182; GPRIDX-NEXT:    s_mov_b32 s0, s2
1183; GPRIDX-NEXT:    s_mov_b32 s1, s3
1184; GPRIDX-NEXT:    s_mov_b32 s2, s4
1185; GPRIDX-NEXT:    s_mov_b32 s3, s5
1186; GPRIDX-NEXT:    s_mov_b32 s4, s6
1187; GPRIDX-NEXT:    s_mov_b32 s5, s7
1188; GPRIDX-NEXT:    s_mov_b32 s6, s8
1189; GPRIDX-NEXT:    s_mov_b32 s7, s9
1190; GPRIDX-NEXT:    s_mov_b32 s8, s10
1191; GPRIDX-NEXT:    s_mov_b32 s9, s11
1192; GPRIDX-NEXT:    s_mov_b32 s10, s12
1193; GPRIDX-NEXT:    s_mov_b32 s11, s13
1194; GPRIDX-NEXT:    s_mov_b32 s12, s14
1195; GPRIDX-NEXT:    s_mov_b32 s13, s15
1196; GPRIDX-NEXT:    s_mov_b32 s14, s16
1197; GPRIDX-NEXT:    s_mov_b32 s15, s17
1198; GPRIDX-NEXT:    s_mov_b32 m0, s18
1199; GPRIDX-NEXT:    s_nop 0
1200; GPRIDX-NEXT:    s_movrels_b64 s[0:1], s[14:15]
1201; GPRIDX-NEXT:    ; return to shader part epilog
1202;
1203; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7:
1204; MOVREL:       ; %bb.0: ; %entry
1205; MOVREL-NEXT:    s_mov_b32 s0, s2
1206; MOVREL-NEXT:    s_mov_b32 s1, s3
1207; MOVREL-NEXT:    s_mov_b32 s2, s4
1208; MOVREL-NEXT:    s_mov_b32 s3, s5
1209; MOVREL-NEXT:    s_mov_b32 s4, s6
1210; MOVREL-NEXT:    s_mov_b32 s5, s7
1211; MOVREL-NEXT:    s_mov_b32 s6, s8
1212; MOVREL-NEXT:    s_mov_b32 s7, s9
1213; MOVREL-NEXT:    s_mov_b32 s8, s10
1214; MOVREL-NEXT:    s_mov_b32 s9, s11
1215; MOVREL-NEXT:    s_mov_b32 s10, s12
1216; MOVREL-NEXT:    s_mov_b32 s11, s13
1217; MOVREL-NEXT:    s_mov_b32 s12, s14
1218; MOVREL-NEXT:    s_mov_b32 s13, s15
1219; MOVREL-NEXT:    s_mov_b32 s14, s16
1220; MOVREL-NEXT:    s_mov_b32 s15, s17
1221; MOVREL-NEXT:    s_mov_b32 m0, s18
1222; MOVREL-NEXT:    s_movrels_b64 s[0:1], s[14:15]
1223; MOVREL-NEXT:    ; return to shader part epilog
1224;
1225; GFX10-LABEL: dyn_extract_v8f64_s_s_offset7:
1226; GFX10:       ; %bb.0: ; %entry
1227; GFX10-NEXT:    s_mov_b32 s0, s2
1228; GFX10-NEXT:    s_mov_b32 s1, s3
1229; GFX10-NEXT:    s_mov_b32 s2, s4
1230; GFX10-NEXT:    s_mov_b32 s3, s5
1231; GFX10-NEXT:    s_mov_b32 s4, s6
1232; GFX10-NEXT:    s_mov_b32 s5, s7
1233; GFX10-NEXT:    s_mov_b32 s6, s8
1234; GFX10-NEXT:    s_mov_b32 s7, s9
1235; GFX10-NEXT:    s_mov_b32 s8, s10
1236; GFX10-NEXT:    s_mov_b32 s9, s11
1237; GFX10-NEXT:    s_mov_b32 s10, s12
1238; GFX10-NEXT:    s_mov_b32 s11, s13
1239; GFX10-NEXT:    s_mov_b32 s12, s14
1240; GFX10-NEXT:    s_mov_b32 s13, s15
1241; GFX10-NEXT:    s_mov_b32 s14, s16
1242; GFX10-NEXT:    s_mov_b32 s15, s17
1243; GFX10-NEXT:    s_mov_b32 m0, s18
1244; GFX10-NEXT:    s_movrels_b64 s[0:1], s[14:15]
1245; GFX10-NEXT:    ; return to shader part epilog
1246entry:
1247  %add = add i32 %sel, 7
1248  %ext = extractelement <8 x double> %vec, i32 %add
1249  ret double %ext
1250}
1251
1252define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) {
1253; GCN-LABEL: dyn_extract_v8f64_s_s_offsetm1:
1254; GCN:       ; %bb.0: ; %entry
1255; GCN-NEXT:    s_mov_b32 s0, s2
1256; GCN-NEXT:    s_mov_b32 s1, s3
1257; GCN-NEXT:    s_add_i32 m0, s18, -1
1258; GCN-NEXT:    s_mov_b32 s2, s4
1259; GCN-NEXT:    s_mov_b32 s3, s5
1260; GCN-NEXT:    s_mov_b32 s4, s6
1261; GCN-NEXT:    s_mov_b32 s5, s7
1262; GCN-NEXT:    s_mov_b32 s6, s8
1263; GCN-NEXT:    s_mov_b32 s7, s9
1264; GCN-NEXT:    s_mov_b32 s8, s10
1265; GCN-NEXT:    s_mov_b32 s9, s11
1266; GCN-NEXT:    s_mov_b32 s10, s12
1267; GCN-NEXT:    s_mov_b32 s11, s13
1268; GCN-NEXT:    s_mov_b32 s12, s14
1269; GCN-NEXT:    s_mov_b32 s13, s15
1270; GCN-NEXT:    s_mov_b32 s14, s16
1271; GCN-NEXT:    s_mov_b32 s15, s17
1272; GCN-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1273; GCN-NEXT:    ; return to shader part epilog
1274;
1275; GFX10-LABEL: dyn_extract_v8f64_s_s_offsetm1:
1276; GFX10:       ; %bb.0: ; %entry
1277; GFX10-NEXT:    s_mov_b32 s0, s2
1278; GFX10-NEXT:    s_mov_b32 s1, s3
1279; GFX10-NEXT:    s_add_i32 m0, s18, -1
1280; GFX10-NEXT:    s_mov_b32 s2, s4
1281; GFX10-NEXT:    s_mov_b32 s3, s5
1282; GFX10-NEXT:    s_mov_b32 s4, s6
1283; GFX10-NEXT:    s_mov_b32 s5, s7
1284; GFX10-NEXT:    s_mov_b32 s6, s8
1285; GFX10-NEXT:    s_mov_b32 s7, s9
1286; GFX10-NEXT:    s_mov_b32 s8, s10
1287; GFX10-NEXT:    s_mov_b32 s9, s11
1288; GFX10-NEXT:    s_mov_b32 s10, s12
1289; GFX10-NEXT:    s_mov_b32 s11, s13
1290; GFX10-NEXT:    s_mov_b32 s12, s14
1291; GFX10-NEXT:    s_mov_b32 s13, s15
1292; GFX10-NEXT:    s_mov_b32 s14, s16
1293; GFX10-NEXT:    s_mov_b32 s15, s17
1294; GFX10-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1295; GFX10-NEXT:    ; return to shader part epilog
1296entry:
1297  %add = add i32 %sel, -1
1298  %ext = extractelement <8 x double> %vec, i32 %add
1299  ret double %ext
1300}
1301
1302define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) {
1303; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3:
1304; GPRIDX:       ; %bb.0: ; %entry
1305; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1306; GPRIDX-NEXT:    v_add_u32_e32 v16, 3, v16
1307; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v16
1308; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1309; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1310; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v16
1311; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1312; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
1313; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v16
1314; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
1315; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
1316; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v16
1317; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
1318; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
1319; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v16
1320; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
1321; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
1322; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v16
1323; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
1324; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
1325; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v16
1326; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
1327; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
1328; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
1329;
1330; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3:
1331; MOVREL:       ; %bb.0: ; %entry
1332; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1333; MOVREL-NEXT:    v_add_u32_e32 v16, vcc, 3, v16
1334; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v16
1335; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1336; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1337; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v16
1338; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1339; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
1340; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v16
1341; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
1342; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
1343; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v16
1344; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
1345; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
1346; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v16
1347; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
1348; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
1349; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v16
1350; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
1351; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
1352; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v16
1353; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
1354; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
1355; MOVREL-NEXT:    s_setpc_b64 s[30:31]
1356;
1357; GFX10-LABEL: dyn_extract_v8f64_v_v_offset3:
1358; GFX10:       ; %bb.0: ; %entry
1359; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1360; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1361; GFX10-NEXT:    v_add_nc_u32_e32 v16, 3, v16
1362; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
1363; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1364; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
1365; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
1366; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
1367; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
1368; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v16
1369; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
1370; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
1371; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
1372; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
1373; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
1374; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v16
1375; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
1376; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
1377; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v16
1378; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
1379; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc_lo
1380; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v16
1381; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
1382; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc_lo
1383; GFX10-NEXT:    s_setpc_b64 s[30:31]
1384entry:
1385  %add = add i32 %sel, 3
1386  %ext = extractelement <8 x double> %vec, i32 %add
1387  ret double %ext
1388}
1389
1390define i8 addrspace(3)* @dyn_extract_v8p3_v_v(<8 x i8 addrspace(3)*> %vec, i32 %idx) {
1391; GCN-LABEL: dyn_extract_v8p3_v_v:
1392; GCN:       ; %bb.0: ; %entry
1393; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1394; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
1395; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1396; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
1397; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1398; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
1399; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1400; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v8
1401; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1402; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v8
1403; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
1404; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v8
1405; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
1406; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v8
1407; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
1408; GCN-NEXT:    s_setpc_b64 s[30:31]
1409;
1410; GFX10-LABEL: dyn_extract_v8p3_v_v:
1411; GFX10:       ; %bb.0: ; %entry
1412; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1413; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1414; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
1415; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1416; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v8
1417; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1418; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v8
1419; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
1420; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v8
1421; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
1422; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v8
1423; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
1424; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v8
1425; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
1426; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v8
1427; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
1428; GFX10-NEXT:    s_setpc_b64 s[30:31]
1429entry:
1430  %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx
1431  ret i8 addrspace(3)* %ext
1432}
1433
1434define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x i8 addrspace(3)*> inreg %vec, i32 inreg %idx) {
1435; GPRIDX-LABEL: dyn_extract_v8p3_s_s:
1436; GPRIDX:       ; %bb.0: ; %entry
1437; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 1
1438; GPRIDX-NEXT:    s_cselect_b32 s0, s3, s2
1439; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 2
1440; GPRIDX-NEXT:    s_cselect_b32 s0, s4, s0
1441; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 3
1442; GPRIDX-NEXT:    s_cselect_b32 s0, s5, s0
1443; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 4
1444; GPRIDX-NEXT:    s_cselect_b32 s0, s6, s0
1445; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 5
1446; GPRIDX-NEXT:    s_cselect_b32 s0, s7, s0
1447; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 6
1448; GPRIDX-NEXT:    s_cselect_b32 s0, s8, s0
1449; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 7
1450; GPRIDX-NEXT:    s_cselect_b32 s0, s9, s0
1451; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
1452; GPRIDX-NEXT:    ds_write_b32 v0, v0
1453; GPRIDX-NEXT:    s_endpgm
1454;
1455; MOVREL-LABEL: dyn_extract_v8p3_s_s:
1456; MOVREL:       ; %bb.0: ; %entry
1457; MOVREL-NEXT:    s_cmp_eq_u32 s10, 1
1458; MOVREL-NEXT:    s_cselect_b32 s0, s3, s2
1459; MOVREL-NEXT:    s_cmp_eq_u32 s10, 2
1460; MOVREL-NEXT:    s_cselect_b32 s0, s4, s0
1461; MOVREL-NEXT:    s_cmp_eq_u32 s10, 3
1462; MOVREL-NEXT:    s_cselect_b32 s0, s5, s0
1463; MOVREL-NEXT:    s_cmp_eq_u32 s10, 4
1464; MOVREL-NEXT:    s_cselect_b32 s0, s6, s0
1465; MOVREL-NEXT:    s_cmp_eq_u32 s10, 5
1466; MOVREL-NEXT:    s_cselect_b32 s0, s7, s0
1467; MOVREL-NEXT:    s_cmp_eq_u32 s10, 6
1468; MOVREL-NEXT:    s_cselect_b32 s0, s8, s0
1469; MOVREL-NEXT:    s_cmp_eq_u32 s10, 7
1470; MOVREL-NEXT:    s_cselect_b32 s0, s9, s0
1471; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
1472; MOVREL-NEXT:    s_mov_b32 m0, -1
1473; MOVREL-NEXT:    ds_write_b32 v0, v0
1474; MOVREL-NEXT:    s_endpgm
1475;
1476; GFX10-LABEL: dyn_extract_v8p3_s_s:
1477; GFX10:       ; %bb.0: ; %entry
1478; GFX10-NEXT:    s_cmp_eq_u32 s10, 1
1479; GFX10-NEXT:    s_cselect_b32 s0, s3, s2
1480; GFX10-NEXT:    s_cmp_eq_u32 s10, 2
1481; GFX10-NEXT:    s_cselect_b32 s0, s4, s0
1482; GFX10-NEXT:    s_cmp_eq_u32 s10, 3
1483; GFX10-NEXT:    s_cselect_b32 s0, s5, s0
1484; GFX10-NEXT:    s_cmp_eq_u32 s10, 4
1485; GFX10-NEXT:    s_cselect_b32 s0, s6, s0
1486; GFX10-NEXT:    s_cmp_eq_u32 s10, 5
1487; GFX10-NEXT:    s_cselect_b32 s0, s7, s0
1488; GFX10-NEXT:    s_cmp_eq_u32 s10, 6
1489; GFX10-NEXT:    s_cselect_b32 s0, s8, s0
1490; GFX10-NEXT:    s_cmp_eq_u32 s10, 7
1491; GFX10-NEXT:    s_cselect_b32 s0, s9, s0
1492; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1493; GFX10-NEXT:    ds_write_b32 v0, v0
1494; GFX10-NEXT:    s_endpgm
1495entry:
1496  %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx
1497  store i8 addrspace(3)* %ext, i8 addrspace(3)* addrspace(3)* undef
1498  ret void
1499}
1500
1501define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %idx) {
1502; GCN-LABEL: dyn_extract_v8p1_v_v:
1503; GCN:       ; %bb.0: ; %entry
1504; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1505; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v16
1506; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1507; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1508; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v16
1509; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1510; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
1511; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v16
1512; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
1513; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
1514; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v16
1515; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
1516; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
1517; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v16
1518; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
1519; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
1520; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v16
1521; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
1522; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
1523; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v16
1524; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
1525; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
1526; GCN-NEXT:    s_setpc_b64 s[30:31]
1527;
1528; GFX10-LABEL: dyn_extract_v8p1_v_v:
1529; GFX10:       ; %bb.0: ; %entry
1530; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1531; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1532; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
1533; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1534; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
1535; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
1536; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
1537; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
1538; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v16
1539; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
1540; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
1541; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
1542; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
1543; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
1544; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v16
1545; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
1546; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
1547; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v16
1548; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
1549; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc_lo
1550; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v16
1551; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
1552; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc_lo
1553; GFX10-NEXT:    s_setpc_b64 s[30:31]
1554entry:
1555  %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx
1556  ret i8 addrspace(1)* %ext
1557}
1558
1559define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x i8 addrspace(1)*> inreg %vec, i32 inreg %idx) {
1560; GPRIDX-LABEL: dyn_extract_v8p1_s_s:
1561; GPRIDX:       ; %bb.0: ; %entry
1562; GPRIDX-NEXT:    s_mov_b32 s0, s2
1563; GPRIDX-NEXT:    s_mov_b32 s1, s3
1564; GPRIDX-NEXT:    s_mov_b32 m0, s18
1565; GPRIDX-NEXT:    s_mov_b32 s2, s4
1566; GPRIDX-NEXT:    s_mov_b32 s3, s5
1567; GPRIDX-NEXT:    s_mov_b32 s4, s6
1568; GPRIDX-NEXT:    s_mov_b32 s5, s7
1569; GPRIDX-NEXT:    s_mov_b32 s6, s8
1570; GPRIDX-NEXT:    s_mov_b32 s7, s9
1571; GPRIDX-NEXT:    s_mov_b32 s8, s10
1572; GPRIDX-NEXT:    s_mov_b32 s9, s11
1573; GPRIDX-NEXT:    s_mov_b32 s10, s12
1574; GPRIDX-NEXT:    s_mov_b32 s11, s13
1575; GPRIDX-NEXT:    s_mov_b32 s12, s14
1576; GPRIDX-NEXT:    s_mov_b32 s13, s15
1577; GPRIDX-NEXT:    s_mov_b32 s14, s16
1578; GPRIDX-NEXT:    s_mov_b32 s15, s17
1579; GPRIDX-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1580; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
1581; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
1582; GPRIDX-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
1583; GPRIDX-NEXT:    s_endpgm
1584;
1585; MOVREL-LABEL: dyn_extract_v8p1_s_s:
1586; MOVREL:       ; %bb.0: ; %entry
1587; MOVREL-NEXT:    s_mov_b32 s0, s2
1588; MOVREL-NEXT:    s_mov_b32 s1, s3
1589; MOVREL-NEXT:    s_mov_b32 m0, s18
1590; MOVREL-NEXT:    s_mov_b32 s2, s4
1591; MOVREL-NEXT:    s_mov_b32 s3, s5
1592; MOVREL-NEXT:    s_mov_b32 s4, s6
1593; MOVREL-NEXT:    s_mov_b32 s5, s7
1594; MOVREL-NEXT:    s_mov_b32 s6, s8
1595; MOVREL-NEXT:    s_mov_b32 s7, s9
1596; MOVREL-NEXT:    s_mov_b32 s8, s10
1597; MOVREL-NEXT:    s_mov_b32 s9, s11
1598; MOVREL-NEXT:    s_mov_b32 s10, s12
1599; MOVREL-NEXT:    s_mov_b32 s11, s13
1600; MOVREL-NEXT:    s_mov_b32 s12, s14
1601; MOVREL-NEXT:    s_mov_b32 s13, s15
1602; MOVREL-NEXT:    s_mov_b32 s14, s16
1603; MOVREL-NEXT:    s_mov_b32 s15, s17
1604; MOVREL-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1605; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
1606; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
1607; MOVREL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
1608; MOVREL-NEXT:    s_endpgm
1609;
1610; GFX10-LABEL: dyn_extract_v8p1_s_s:
1611; GFX10:       ; %bb.0: ; %entry
1612; GFX10-NEXT:    s_mov_b32 s0, s2
1613; GFX10-NEXT:    s_mov_b32 s1, s3
1614; GFX10-NEXT:    s_mov_b32 m0, s18
1615; GFX10-NEXT:    s_mov_b32 s2, s4
1616; GFX10-NEXT:    s_mov_b32 s3, s5
1617; GFX10-NEXT:    s_mov_b32 s4, s6
1618; GFX10-NEXT:    s_mov_b32 s5, s7
1619; GFX10-NEXT:    s_mov_b32 s6, s8
1620; GFX10-NEXT:    s_mov_b32 s7, s9
1621; GFX10-NEXT:    s_mov_b32 s8, s10
1622; GFX10-NEXT:    s_mov_b32 s9, s11
1623; GFX10-NEXT:    s_mov_b32 s10, s12
1624; GFX10-NEXT:    s_mov_b32 s11, s13
1625; GFX10-NEXT:    s_mov_b32 s12, s14
1626; GFX10-NEXT:    s_mov_b32 s13, s15
1627; GFX10-NEXT:    s_mov_b32 s14, s16
1628; GFX10-NEXT:    s_mov_b32 s15, s17
1629; GFX10-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1630; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1631; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1632; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
1633; GFX10-NEXT:    s_endpgm
1634entry:
1635  %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx
1636  store i8 addrspace(1)* %ext, i8 addrspace(1)* addrspace(1)* undef
1637  ret void
1638}
1639
1640define amdgpu_ps float @dyn_extract_v16f32_v_s(<16 x float> %vec, i32 inreg %sel) {
1641; GPRIDX-LABEL: dyn_extract_v16f32_v_s:
1642; GPRIDX:       ; %bb.0: ; %entry
1643; GPRIDX-NEXT:    s_set_gpr_idx_on s2, gpr_idx(SRC0)
1644; GPRIDX-NEXT:    v_mov_b32_e32 v0, v0
1645; GPRIDX-NEXT:    s_set_gpr_idx_off
1646; GPRIDX-NEXT:    ; return to shader part epilog
1647;
1648; MOVREL-LABEL: dyn_extract_v16f32_v_s:
1649; MOVREL:       ; %bb.0: ; %entry
1650; MOVREL-NEXT:    s_mov_b32 m0, s2
1651; MOVREL-NEXT:    v_movrels_b32_e32 v0, v0
1652; MOVREL-NEXT:    ; return to shader part epilog
1653;
1654; GFX10-LABEL: dyn_extract_v16f32_v_s:
1655; GFX10:       ; %bb.0: ; %entry
1656; GFX10-NEXT:    s_mov_b32 m0, s2
1657; GFX10-NEXT:    v_movrels_b32_e32 v0, v0
1658; GFX10-NEXT:    ; return to shader part epilog
1659entry:
1660  %ext = extractelement <16 x float> %vec, i32 %sel
1661  ret float %ext
1662}
1663
1664define amdgpu_ps float @dyn_extract_v32f32_v_s(<32 x float> %vec, i32 inreg %sel) {
1665; GPRIDX-LABEL: dyn_extract_v32f32_v_s:
1666; GPRIDX:       ; %bb.0: ; %entry
1667; GPRIDX-NEXT:    s_set_gpr_idx_on s2, gpr_idx(SRC0)
1668; GPRIDX-NEXT:    v_mov_b32_e32 v0, v0
1669; GPRIDX-NEXT:    s_set_gpr_idx_off
1670; GPRIDX-NEXT:    ; return to shader part epilog
1671;
1672; MOVREL-LABEL: dyn_extract_v32f32_v_s:
1673; MOVREL:       ; %bb.0: ; %entry
1674; MOVREL-NEXT:    s_mov_b32 m0, s2
1675; MOVREL-NEXT:    v_movrels_b32_e32 v0, v0
1676; MOVREL-NEXT:    ; return to shader part epilog
1677;
1678; GFX10-LABEL: dyn_extract_v32f32_v_s:
1679; GFX10:       ; %bb.0: ; %entry
1680; GFX10-NEXT:    s_mov_b32 m0, s2
1681; GFX10-NEXT:    v_movrels_b32_e32 v0, v0
1682; GFX10-NEXT:    ; return to shader part epilog
1683entry:
1684  %ext = extractelement <32 x float> %vec, i32 %sel
1685  ret float %ext
1686}
1687
1688define amdgpu_ps double @dyn_extract_v16f64_v_s(<16 x double> %vec, i32 inreg %sel) {
1689; GPRIDX-LABEL: dyn_extract_v16f64_v_s:
1690; GPRIDX:       ; %bb.0: ; %entry
1691; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
1692; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(SRC0)
1693; GPRIDX-NEXT:    v_mov_b32_e32 v32, v0
1694; GPRIDX-NEXT:    v_mov_b32_e32 v0, v1
1695; GPRIDX-NEXT:    s_set_gpr_idx_off
1696; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v32
1697; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v0
1698; GPRIDX-NEXT:    ; return to shader part epilog
1699;
1700; MOVREL-LABEL: dyn_extract_v16f64_v_s:
1701; MOVREL:       ; %bb.0: ; %entry
1702; MOVREL-NEXT:    s_lshl_b32 m0, s2, 1
1703; MOVREL-NEXT:    v_movrels_b32_e32 v32, v0
1704; MOVREL-NEXT:    v_movrels_b32_e32 v0, v1
1705; MOVREL-NEXT:    v_readfirstlane_b32 s0, v32
1706; MOVREL-NEXT:    v_readfirstlane_b32 s1, v0
1707; MOVREL-NEXT:    ; return to shader part epilog
1708;
1709; GFX10-LABEL: dyn_extract_v16f64_v_s:
1710; GFX10:       ; %bb.0: ; %entry
1711; GFX10-NEXT:    s_lshl_b32 m0, s2, 1
1712; GFX10-NEXT:    v_movrels_b32_e32 v32, v0
1713; GFX10-NEXT:    v_movrels_b32_e32 v0, v1
1714; GFX10-NEXT:    v_readfirstlane_b32 s0, v32
1715; GFX10-NEXT:    v_readfirstlane_b32 s1, v0
1716; GFX10-NEXT:    ; return to shader part epilog
1717entry:
1718  %ext = extractelement <16 x double> %vec, i32 %sel
1719  ret double %ext
1720}
1721
1722define amdgpu_ps float @dyn_extract_v16f32_s_s(i32 inreg %sel) {
1723; GCN-LABEL: dyn_extract_v16f32_s_s:
1724; GCN:       ; %bb.0: ; %entry
1725; GCN-NEXT:    s_mov_b32 s4, 1.0
1726; GCN-NEXT:    s_mov_b32 m0, s2
1727; GCN-NEXT:    s_mov_b32 s19, 0x41800000
1728; GCN-NEXT:    s_mov_b32 s18, 0x41700000
1729; GCN-NEXT:    s_mov_b32 s17, 0x41600000
1730; GCN-NEXT:    s_mov_b32 s16, 0x41500000
1731; GCN-NEXT:    s_mov_b32 s15, 0x41400000
1732; GCN-NEXT:    s_mov_b32 s14, 0x41300000
1733; GCN-NEXT:    s_mov_b32 s13, 0x41200000
1734; GCN-NEXT:    s_mov_b32 s12, 0x41100000
1735; GCN-NEXT:    s_mov_b32 s11, 0x41000000
1736; GCN-NEXT:    s_mov_b32 s10, 0x40e00000
1737; GCN-NEXT:    s_mov_b32 s9, 0x40c00000
1738; GCN-NEXT:    s_mov_b32 s8, 0x40a00000
1739; GCN-NEXT:    s_mov_b32 s7, 4.0
1740; GCN-NEXT:    s_mov_b32 s6, 0x40400000
1741; GCN-NEXT:    s_mov_b32 s5, 2.0
1742; GCN-NEXT:    s_movrels_b32 s0, s4
1743; GCN-NEXT:    v_mov_b32_e32 v0, s0
1744; GCN-NEXT:    ; return to shader part epilog
1745;
1746; GFX10-LABEL: dyn_extract_v16f32_s_s:
1747; GFX10:       ; %bb.0: ; %entry
1748; GFX10-NEXT:    s_mov_b32 s4, 1.0
1749; GFX10-NEXT:    s_mov_b32 m0, s2
1750; GFX10-NEXT:    s_mov_b32 s19, 0x41800000
1751; GFX10-NEXT:    s_mov_b32 s18, 0x41700000
1752; GFX10-NEXT:    s_mov_b32 s17, 0x41600000
1753; GFX10-NEXT:    s_mov_b32 s16, 0x41500000
1754; GFX10-NEXT:    s_mov_b32 s15, 0x41400000
1755; GFX10-NEXT:    s_mov_b32 s14, 0x41300000
1756; GFX10-NEXT:    s_mov_b32 s13, 0x41200000
1757; GFX10-NEXT:    s_mov_b32 s12, 0x41100000
1758; GFX10-NEXT:    s_mov_b32 s11, 0x41000000
1759; GFX10-NEXT:    s_mov_b32 s10, 0x40e00000
1760; GFX10-NEXT:    s_mov_b32 s9, 0x40c00000
1761; GFX10-NEXT:    s_mov_b32 s8, 0x40a00000
1762; GFX10-NEXT:    s_mov_b32 s7, 4.0
1763; GFX10-NEXT:    s_mov_b32 s6, 0x40400000
1764; GFX10-NEXT:    s_mov_b32 s5, 2.0
1765; GFX10-NEXT:    s_movrels_b32 s0, s4
1766; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1767; GFX10-NEXT:    ; return to shader part epilog
1768entry:
1769  %ext = extractelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, i32 %sel
1770  ret float %ext
1771}
1772
1773define amdgpu_ps float @dyn_extract_v32f32_s_s(i32 inreg %sel) {
1774; GCN-LABEL: dyn_extract_v32f32_s_s:
1775; GCN:       ; %bb.0: ; %entry
1776; GCN-NEXT:    s_mov_b32 s36, 1.0
1777; GCN-NEXT:    s_mov_b32 m0, s2
1778; GCN-NEXT:    s_mov_b32 s67, 0x42000000
1779; GCN-NEXT:    s_mov_b32 s66, 0x41f80000
1780; GCN-NEXT:    s_mov_b32 s65, 0x41f00000
1781; GCN-NEXT:    s_mov_b32 s64, 0x41e80000
1782; GCN-NEXT:    s_mov_b32 s63, 0x41e00000
1783; GCN-NEXT:    s_mov_b32 s62, 0x41d80000
1784; GCN-NEXT:    s_mov_b32 s61, 0x41d00000
1785; GCN-NEXT:    s_mov_b32 s60, 0x41c80000
1786; GCN-NEXT:    s_mov_b32 s59, 0x41c00000
1787; GCN-NEXT:    s_mov_b32 s58, 0x41b80000
1788; GCN-NEXT:    s_mov_b32 s57, 0x41b00000
1789; GCN-NEXT:    s_mov_b32 s56, 0x41a80000
1790; GCN-NEXT:    s_mov_b32 s55, 0x41a00000
1791; GCN-NEXT:    s_mov_b32 s54, 0x41980000
1792; GCN-NEXT:    s_mov_b32 s53, 0x41900000
1793; GCN-NEXT:    s_mov_b32 s52, 0x41880000
1794; GCN-NEXT:    s_mov_b32 s51, 0x41800000
1795; GCN-NEXT:    s_mov_b32 s50, 0x41700000
1796; GCN-NEXT:    s_mov_b32 s49, 0x41600000
1797; GCN-NEXT:    s_mov_b32 s48, 0x41500000
1798; GCN-NEXT:    s_mov_b32 s47, 0x41400000
1799; GCN-NEXT:    s_mov_b32 s46, 0x41300000
1800; GCN-NEXT:    s_mov_b32 s45, 0x41200000
1801; GCN-NEXT:    s_mov_b32 s44, 0x41100000
1802; GCN-NEXT:    s_mov_b32 s43, 0x41000000
1803; GCN-NEXT:    s_mov_b32 s42, 0x40e00000
1804; GCN-NEXT:    s_mov_b32 s41, 0x40c00000
1805; GCN-NEXT:    s_mov_b32 s40, 0x40a00000
1806; GCN-NEXT:    s_mov_b32 s39, 4.0
1807; GCN-NEXT:    s_mov_b32 s38, 0x40400000
1808; GCN-NEXT:    s_mov_b32 s37, 2.0
1809; GCN-NEXT:    s_movrels_b32 s0, s36
1810; GCN-NEXT:    v_mov_b32_e32 v0, s0
1811; GCN-NEXT:    ; return to shader part epilog
1812;
1813; GFX10-LABEL: dyn_extract_v32f32_s_s:
1814; GFX10:       ; %bb.0: ; %entry
1815; GFX10-NEXT:    s_mov_b32 s36, 1.0
1816; GFX10-NEXT:    s_mov_b32 m0, s2
1817; GFX10-NEXT:    s_mov_b32 s67, 0x42000000
1818; GFX10-NEXT:    s_mov_b32 s66, 0x41f80000
1819; GFX10-NEXT:    s_mov_b32 s65, 0x41f00000
1820; GFX10-NEXT:    s_mov_b32 s64, 0x41e80000
1821; GFX10-NEXT:    s_mov_b32 s63, 0x41e00000
1822; GFX10-NEXT:    s_mov_b32 s62, 0x41d80000
1823; GFX10-NEXT:    s_mov_b32 s61, 0x41d00000
1824; GFX10-NEXT:    s_mov_b32 s60, 0x41c80000
1825; GFX10-NEXT:    s_mov_b32 s59, 0x41c00000
1826; GFX10-NEXT:    s_mov_b32 s58, 0x41b80000
1827; GFX10-NEXT:    s_mov_b32 s57, 0x41b00000
1828; GFX10-NEXT:    s_mov_b32 s56, 0x41a80000
1829; GFX10-NEXT:    s_mov_b32 s55, 0x41a00000
1830; GFX10-NEXT:    s_mov_b32 s54, 0x41980000
1831; GFX10-NEXT:    s_mov_b32 s53, 0x41900000
1832; GFX10-NEXT:    s_mov_b32 s52, 0x41880000
1833; GFX10-NEXT:    s_mov_b32 s51, 0x41800000
1834; GFX10-NEXT:    s_mov_b32 s50, 0x41700000
1835; GFX10-NEXT:    s_mov_b32 s49, 0x41600000
1836; GFX10-NEXT:    s_mov_b32 s48, 0x41500000
1837; GFX10-NEXT:    s_mov_b32 s47, 0x41400000
1838; GFX10-NEXT:    s_mov_b32 s46, 0x41300000
1839; GFX10-NEXT:    s_mov_b32 s45, 0x41200000
1840; GFX10-NEXT:    s_mov_b32 s44, 0x41100000
1841; GFX10-NEXT:    s_mov_b32 s43, 0x41000000
1842; GFX10-NEXT:    s_mov_b32 s42, 0x40e00000
1843; GFX10-NEXT:    s_mov_b32 s41, 0x40c00000
1844; GFX10-NEXT:    s_mov_b32 s40, 0x40a00000
1845; GFX10-NEXT:    s_mov_b32 s39, 4.0
1846; GFX10-NEXT:    s_mov_b32 s38, 0x40400000
1847; GFX10-NEXT:    s_mov_b32 s37, 2.0
1848; GFX10-NEXT:    s_movrels_b32 s0, s36
1849; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1850; GFX10-NEXT:    ; return to shader part epilog
1851entry:
1852  %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel
1853  ret float %ext
1854}
1855
1856define amdgpu_ps double @dyn_extract_v16f64_s_s(i32 inreg %sel) {
1857; GCN-LABEL: dyn_extract_v16f64_s_s:
1858; GCN:       ; %bb.0: ; %entry
1859; GCN-NEXT:    s_mov_b32 s66, 0
1860; GCN-NEXT:    s_mov_b64 s[36:37], 1.0
1861; GCN-NEXT:    s_mov_b32 m0, s2
1862; GCN-NEXT:    s_mov_b32 s67, 0x40300000
1863; GCN-NEXT:    s_mov_b32 s65, 0x402e0000
1864; GCN-NEXT:    s_mov_b32 s64, s66
1865; GCN-NEXT:    s_mov_b32 s63, 0x402c0000
1866; GCN-NEXT:    s_mov_b32 s62, s66
1867; GCN-NEXT:    s_mov_b32 s61, 0x402a0000
1868; GCN-NEXT:    s_mov_b32 s60, s66
1869; GCN-NEXT:    s_mov_b32 s59, 0x40280000
1870; GCN-NEXT:    s_mov_b32 s58, s66
1871; GCN-NEXT:    s_mov_b32 s57, 0x40260000
1872; GCN-NEXT:    s_mov_b32 s56, s66
1873; GCN-NEXT:    s_mov_b32 s55, 0x40240000
1874; GCN-NEXT:    s_mov_b32 s54, s66
1875; GCN-NEXT:    s_mov_b32 s53, 0x40220000
1876; GCN-NEXT:    s_mov_b32 s52, s66
1877; GCN-NEXT:    s_mov_b32 s51, 0x40200000
1878; GCN-NEXT:    s_mov_b32 s50, s66
1879; GCN-NEXT:    s_mov_b32 s49, 0x401c0000
1880; GCN-NEXT:    s_mov_b32 s48, s66
1881; GCN-NEXT:    s_mov_b32 s47, 0x40180000
1882; GCN-NEXT:    s_mov_b32 s46, s66
1883; GCN-NEXT:    s_mov_b32 s45, 0x40140000
1884; GCN-NEXT:    s_mov_b32 s44, s66
1885; GCN-NEXT:    s_mov_b64 s[42:43], 4.0
1886; GCN-NEXT:    s_mov_b32 s41, 0x40080000
1887; GCN-NEXT:    s_mov_b32 s40, s66
1888; GCN-NEXT:    s_mov_b64 s[38:39], 2.0
1889; GCN-NEXT:    s_movrels_b64 s[0:1], s[36:37]
1890; GCN-NEXT:    ; return to shader part epilog
1891;
1892; GFX10-LABEL: dyn_extract_v16f64_s_s:
1893; GFX10:       ; %bb.0: ; %entry
1894; GFX10-NEXT:    s_mov_b32 s66, 0
1895; GFX10-NEXT:    s_mov_b64 s[36:37], 1.0
1896; GFX10-NEXT:    s_mov_b32 m0, s2
1897; GFX10-NEXT:    s_mov_b32 s67, 0x40300000
1898; GFX10-NEXT:    s_mov_b32 s65, 0x402e0000
1899; GFX10-NEXT:    s_mov_b32 s64, s66
1900; GFX10-NEXT:    s_mov_b32 s63, 0x402c0000
1901; GFX10-NEXT:    s_mov_b32 s62, s66
1902; GFX10-NEXT:    s_mov_b32 s61, 0x402a0000
1903; GFX10-NEXT:    s_mov_b32 s60, s66
1904; GFX10-NEXT:    s_mov_b32 s59, 0x40280000
1905; GFX10-NEXT:    s_mov_b32 s58, s66
1906; GFX10-NEXT:    s_mov_b32 s57, 0x40260000
1907; GFX10-NEXT:    s_mov_b32 s56, s66
1908; GFX10-NEXT:    s_mov_b32 s55, 0x40240000
1909; GFX10-NEXT:    s_mov_b32 s54, s66
1910; GFX10-NEXT:    s_mov_b32 s53, 0x40220000
1911; GFX10-NEXT:    s_mov_b32 s52, s66
1912; GFX10-NEXT:    s_mov_b32 s51, 0x40200000
1913; GFX10-NEXT:    s_mov_b32 s50, s66
1914; GFX10-NEXT:    s_mov_b32 s49, 0x401c0000
1915; GFX10-NEXT:    s_mov_b32 s48, s66
1916; GFX10-NEXT:    s_mov_b32 s47, 0x40180000
1917; GFX10-NEXT:    s_mov_b32 s46, s66
1918; GFX10-NEXT:    s_mov_b32 s45, 0x40140000
1919; GFX10-NEXT:    s_mov_b32 s44, s66
1920; GFX10-NEXT:    s_mov_b64 s[42:43], 4.0
1921; GFX10-NEXT:    s_mov_b32 s41, 0x40080000
1922; GFX10-NEXT:    s_mov_b32 s40, s66
1923; GFX10-NEXT:    s_mov_b64 s[38:39], 2.0
1924; GFX10-NEXT:    s_movrels_b64 s[0:1], s[36:37]
1925; GFX10-NEXT:    ; return to shader part epilog
1926entry:
1927  %ext = extractelement <16 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0, double 16.0>, i32 %sel
1928  ret double %ext
1929}
1930
1931define amdgpu_ps float @dyn_extract_v6f32_s_v(<6 x float> inreg %vec, i32 %sel) {
1932; GCN-LABEL: dyn_extract_v6f32_s_v:
1933; GCN:       ; %bb.0: ; %entry
1934; GCN-NEXT:    s_mov_b32 s0, s2
1935; GCN-NEXT:    s_mov_b32 s1, s3
1936; GCN-NEXT:    v_mov_b32_e32 v1, s0
1937; GCN-NEXT:    v_mov_b32_e32 v2, s1
1938; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
1939; GCN-NEXT:    v_mov_b32_e32 v3, s4
1940; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1941; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
1942; GCN-NEXT:    v_mov_b32_e32 v4, s5
1943; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1944; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
1945; GCN-NEXT:    v_mov_b32_e32 v5, s6
1946; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
1947; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
1948; GCN-NEXT:    v_mov_b32_e32 v6, s7
1949; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
1950; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
1951; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v6, vcc
1952; GCN-NEXT:    ; return to shader part epilog
1953;
1954; GFX10-LABEL: dyn_extract_v6f32_s_v:
1955; GFX10:       ; %bb.0: ; %entry
1956; GFX10-NEXT:    s_mov_b32 s1, s3
1957; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
1958; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1959; GFX10-NEXT:    s_mov_b32 s0, s2
1960; GFX10-NEXT:    s_mov_b32 s2, s4
1961; GFX10-NEXT:    s_mov_b32 s3, s5
1962; GFX10-NEXT:    s_mov_b32 s4, s6
1963; GFX10-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
1964; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
1965; GFX10-NEXT:    s_mov_b32 s5, s7
1966; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc_lo
1967; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
1968; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
1969; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
1970; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
1971; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
1972; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s5, vcc_lo
1973; GFX10-NEXT:    ; return to shader part epilog
1974entry:
1975  %ext = extractelement <6 x float> %vec, i32 %sel
1976  ret float %ext
1977}
1978
1979define float @dyn_extract_v6f32_v_v(<6 x float> %vec, i32 %sel) {
1980; GCN-LABEL: dyn_extract_v6f32_v_v:
1981; GCN:       ; %bb.0: ; %entry
1982; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1983; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v6
1984; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1985; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v6
1986; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1987; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v6
1988; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1989; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v6
1990; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1991; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v6
1992; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
1993; GCN-NEXT:    s_setpc_b64 s[30:31]
1994;
1995; GFX10-LABEL: dyn_extract_v6f32_v_v:
1996; GFX10:       ; %bb.0: ; %entry
1997; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1998; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1999; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v6
2000; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2001; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v6
2002; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2003; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v6
2004; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2005; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v6
2006; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2007; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v6
2008; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2009; GFX10-NEXT:    s_setpc_b64 s[30:31]
2010entry:
2011  %ext = extractelement <6 x float> %vec, i32 %sel
2012  ret float %ext
2013}
2014
2015define amdgpu_ps float @dyn_extract_v6f32_v_s(<6 x float> %vec, i32 inreg %sel) {
2016; GCN-LABEL: dyn_extract_v6f32_v_s:
2017; GCN:       ; %bb.0: ; %entry
2018; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
2019; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2020; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
2021; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2022; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 3
2023; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2024; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 4
2025; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2026; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 5
2027; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
2028; GCN-NEXT:    ; return to shader part epilog
2029;
2030; GFX10-LABEL: dyn_extract_v6f32_v_s:
2031; GFX10:       ; %bb.0: ; %entry
2032; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 1
2033; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2034; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 2
2035; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2036; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 3
2037; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2038; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 4
2039; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2040; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 5
2041; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2042; GFX10-NEXT:    ; return to shader part epilog
2043entry:
2044  %ext = extractelement <6 x float> %vec, i32 %sel
2045  ret float %ext
2046}
2047
2048define amdgpu_ps float @dyn_extract_v6f32_s_s(<6 x float> inreg %vec, i32 inreg %sel) {
2049; GCN-LABEL: dyn_extract_v6f32_s_s:
2050; GCN:       ; %bb.0: ; %entry
2051; GCN-NEXT:    s_cmp_eq_u32 s8, 1
2052; GCN-NEXT:    s_cselect_b32 s0, s3, s2
2053; GCN-NEXT:    s_cmp_eq_u32 s8, 2
2054; GCN-NEXT:    s_cselect_b32 s0, s4, s0
2055; GCN-NEXT:    s_cmp_eq_u32 s8, 3
2056; GCN-NEXT:    s_cselect_b32 s0, s5, s0
2057; GCN-NEXT:    s_cmp_eq_u32 s8, 4
2058; GCN-NEXT:    s_cselect_b32 s0, s6, s0
2059; GCN-NEXT:    s_cmp_eq_u32 s8, 5
2060; GCN-NEXT:    s_cselect_b32 s0, s7, s0
2061; GCN-NEXT:    v_mov_b32_e32 v0, s0
2062; GCN-NEXT:    ; return to shader part epilog
2063;
2064; GFX10-LABEL: dyn_extract_v6f32_s_s:
2065; GFX10:       ; %bb.0: ; %entry
2066; GFX10-NEXT:    s_cmp_eq_u32 s8, 1
2067; GFX10-NEXT:    s_cselect_b32 s0, s3, s2
2068; GFX10-NEXT:    s_cmp_eq_u32 s8, 2
2069; GFX10-NEXT:    s_cselect_b32 s0, s4, s0
2070; GFX10-NEXT:    s_cmp_eq_u32 s8, 3
2071; GFX10-NEXT:    s_cselect_b32 s0, s5, s0
2072; GFX10-NEXT:    s_cmp_eq_u32 s8, 4
2073; GFX10-NEXT:    s_cselect_b32 s0, s6, s0
2074; GFX10-NEXT:    s_cmp_eq_u32 s8, 5
2075; GFX10-NEXT:    s_cselect_b32 s0, s7, s0
2076; GFX10-NEXT:    v_mov_b32_e32 v0, s0
2077; GFX10-NEXT:    ; return to shader part epilog
2078entry:
2079  %ext = extractelement <6 x float> %vec, i32 %sel
2080  ret float %ext
2081}
2082
2083define amdgpu_ps float @dyn_extract_v7f32_s_v(<7 x float> inreg %vec, i32 %sel) {
2084; GCN-LABEL: dyn_extract_v7f32_s_v:
2085; GCN:       ; %bb.0: ; %entry
2086; GCN-NEXT:    s_mov_b32 s0, s2
2087; GCN-NEXT:    s_mov_b32 s1, s3
2088; GCN-NEXT:    s_mov_b32 s2, s4
2089; GCN-NEXT:    v_mov_b32_e32 v1, s0
2090; GCN-NEXT:    v_mov_b32_e32 v2, s1
2091; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
2092; GCN-NEXT:    v_mov_b32_e32 v3, s2
2093; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2094; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
2095; GCN-NEXT:    v_mov_b32_e32 v4, s5
2096; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2097; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
2098; GCN-NEXT:    v_mov_b32_e32 v5, s6
2099; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
2100; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
2101; GCN-NEXT:    v_mov_b32_e32 v6, s7
2102; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2103; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
2104; GCN-NEXT:    v_mov_b32_e32 v7, s8
2105; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
2106; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
2107; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v7, vcc
2108; GCN-NEXT:    ; return to shader part epilog
2109;
2110; GFX10-LABEL: dyn_extract_v7f32_s_v:
2111; GFX10:       ; %bb.0: ; %entry
2112; GFX10-NEXT:    s_mov_b32 s1, s3
2113; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
2114; GFX10-NEXT:    v_mov_b32_e32 v1, s1
2115; GFX10-NEXT:    s_mov_b32 s0, s2
2116; GFX10-NEXT:    s_mov_b32 s2, s4
2117; GFX10-NEXT:    s_mov_b32 s3, s5
2118; GFX10-NEXT:    s_mov_b32 s4, s6
2119; GFX10-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
2120; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
2121; GFX10-NEXT:    s_mov_b32 s5, s7
2122; GFX10-NEXT:    s_mov_b32 s6, s8
2123; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc_lo
2124; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
2125; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
2126; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
2127; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2128; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
2129; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s5, vcc_lo
2130; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
2131; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s6, vcc_lo
2132; GFX10-NEXT:    ; return to shader part epilog
2133entry:
2134  %ext = extractelement <7 x float> %vec, i32 %sel
2135  ret float %ext
2136}
2137
2138define float @dyn_extract_v7f32_v_v(<7 x float> %vec, i32 %sel) {
2139; GCN-LABEL: dyn_extract_v7f32_v_v:
2140; GCN:       ; %bb.0: ; %entry
2141; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2142; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v7
2143; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2144; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v7
2145; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2146; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v7
2147; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2148; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v7
2149; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2150; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v7
2151; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
2152; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v7
2153; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
2154; GCN-NEXT:    s_setpc_b64 s[30:31]
2155;
2156; GFX10-LABEL: dyn_extract_v7f32_v_v:
2157; GFX10:       ; %bb.0: ; %entry
2158; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2159; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2160; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
2161; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2162; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v7
2163; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2164; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v7
2165; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2166; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v7
2167; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2168; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v7
2169; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2170; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v7
2171; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2172; GFX10-NEXT:    s_setpc_b64 s[30:31]
2173entry:
2174  %ext = extractelement <7 x float> %vec, i32 %sel
2175  ret float %ext
2176}
2177
2178define amdgpu_ps float @dyn_extract_v7f32_v_s(<7 x float> %vec, i32 inreg %sel) {
2179; GCN-LABEL: dyn_extract_v7f32_v_s:
2180; GCN:       ; %bb.0: ; %entry
2181; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
2182; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2183; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
2184; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2185; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 3
2186; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2187; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 4
2188; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2189; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 5
2190; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
2191; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 6
2192; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
2193; GCN-NEXT:    ; return to shader part epilog
2194;
2195; GFX10-LABEL: dyn_extract_v7f32_v_s:
2196; GFX10:       ; %bb.0: ; %entry
2197; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 1
2198; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2199; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 2
2200; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2201; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 3
2202; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2203; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 4
2204; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2205; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 5
2206; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2207; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 6
2208; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2209; GFX10-NEXT:    ; return to shader part epilog
2210entry:
2211  %ext = extractelement <7 x float> %vec, i32 %sel
2212  ret float %ext
2213}
2214
2215define amdgpu_ps float @dyn_extract_v7f32_s_s(<7 x float> inreg %vec, i32 inreg %sel) {
2216; GCN-LABEL: dyn_extract_v7f32_s_s:
2217; GCN:       ; %bb.0: ; %entry
2218; GCN-NEXT:    s_cmp_eq_u32 s9, 1
2219; GCN-NEXT:    s_cselect_b32 s0, s3, s2
2220; GCN-NEXT:    s_cmp_eq_u32 s9, 2
2221; GCN-NEXT:    s_cselect_b32 s0, s4, s0
2222; GCN-NEXT:    s_cmp_eq_u32 s9, 3
2223; GCN-NEXT:    s_cselect_b32 s0, s5, s0
2224; GCN-NEXT:    s_cmp_eq_u32 s9, 4
2225; GCN-NEXT:    s_cselect_b32 s0, s6, s0
2226; GCN-NEXT:    s_cmp_eq_u32 s9, 5
2227; GCN-NEXT:    s_cselect_b32 s0, s7, s0
2228; GCN-NEXT:    s_cmp_eq_u32 s9, 6
2229; GCN-NEXT:    s_cselect_b32 s0, s8, s0
2230; GCN-NEXT:    v_mov_b32_e32 v0, s0
2231; GCN-NEXT:    ; return to shader part epilog
2232;
2233; GFX10-LABEL: dyn_extract_v7f32_s_s:
2234; GFX10:       ; %bb.0: ; %entry
2235; GFX10-NEXT:    s_cmp_eq_u32 s9, 1
2236; GFX10-NEXT:    s_cselect_b32 s0, s3, s2
2237; GFX10-NEXT:    s_cmp_eq_u32 s9, 2
2238; GFX10-NEXT:    s_cselect_b32 s0, s4, s0
2239; GFX10-NEXT:    s_cmp_eq_u32 s9, 3
2240; GFX10-NEXT:    s_cselect_b32 s0, s5, s0
2241; GFX10-NEXT:    s_cmp_eq_u32 s9, 4
2242; GFX10-NEXT:    s_cselect_b32 s0, s6, s0
2243; GFX10-NEXT:    s_cmp_eq_u32 s9, 5
2244; GFX10-NEXT:    s_cselect_b32 s0, s7, s0
2245; GFX10-NEXT:    s_cmp_eq_u32 s9, 6
2246; GFX10-NEXT:    s_cselect_b32 s0, s8, s0
2247; GFX10-NEXT:    v_mov_b32_e32 v0, s0
2248; GFX10-NEXT:    ; return to shader part epilog
2249entry:
2250  %ext = extractelement <7 x float> %vec, i32 %sel
2251  ret float %ext
2252}
2253
2254define amdgpu_ps double @dyn_extract_v6f64_s_v(<6 x double> inreg %vec, i32 %sel) {
2255; GCN-LABEL: dyn_extract_v6f64_s_v:
2256; GCN:       ; %bb.0: ; %entry
2257; GCN-NEXT:    s_mov_b32 s0, s2
2258; GCN-NEXT:    s_mov_b32 s1, s3
2259; GCN-NEXT:    s_mov_b32 s2, s4
2260; GCN-NEXT:    s_mov_b32 s3, s5
2261; GCN-NEXT:    s_mov_b32 s4, s6
2262; GCN-NEXT:    s_mov_b32 s5, s7
2263; GCN-NEXT:    v_mov_b32_e32 v1, s0
2264; GCN-NEXT:    v_mov_b32_e32 v2, s1
2265; GCN-NEXT:    v_mov_b32_e32 v3, s2
2266; GCN-NEXT:    v_mov_b32_e32 v4, s3
2267; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
2268; GCN-NEXT:    s_mov_b32 s6, s8
2269; GCN-NEXT:    s_mov_b32 s7, s9
2270; GCN-NEXT:    v_mov_b32_e32 v5, s4
2271; GCN-NEXT:    v_mov_b32_e32 v6, s5
2272; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2273; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2274; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
2275; GCN-NEXT:    v_mov_b32_e32 v7, s6
2276; GCN-NEXT:    v_mov_b32_e32 v8, s7
2277; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2278; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
2279; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
2280; GCN-NEXT:    v_mov_b32_e32 v9, s10
2281; GCN-NEXT:    v_mov_b32_e32 v10, s11
2282; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
2283; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
2284; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
2285; GCN-NEXT:    v_mov_b32_e32 v11, s12
2286; GCN-NEXT:    v_mov_b32_e32 v12, s13
2287; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
2288; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
2289; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
2290; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v11, vcc
2291; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v12, vcc
2292; GCN-NEXT:    v_readfirstlane_b32 s0, v0
2293; GCN-NEXT:    v_readfirstlane_b32 s1, v1
2294; GCN-NEXT:    ; return to shader part epilog
2295;
2296; GFX10-LABEL: dyn_extract_v6f64_s_v:
2297; GFX10:       ; %bb.0: ; %entry
2298; GFX10-NEXT:    s_mov_b32 s0, s2
2299; GFX10-NEXT:    s_mov_b32 s2, s4
2300; GFX10-NEXT:    s_mov_b32 s15, s5
2301; GFX10-NEXT:    v_mov_b32_e32 v1, s2
2302; GFX10-NEXT:    v_mov_b32_e32 v2, s15
2303; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
2304; GFX10-NEXT:    s_mov_b32 s1, s3
2305; GFX10-NEXT:    s_mov_b32 s4, s6
2306; GFX10-NEXT:    s_mov_b32 s5, s7
2307; GFX10-NEXT:    s_mov_b32 s6, s8
2308; GFX10-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
2309; GFX10-NEXT:    v_cndmask_b32_e32 v2, s1, v2, vcc_lo
2310; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
2311; GFX10-NEXT:    s_mov_b32 s7, s9
2312; GFX10-NEXT:    s_mov_b32 s8, s10
2313; GFX10-NEXT:    s_mov_b32 s9, s11
2314; GFX10-NEXT:    s_mov_b32 s10, s12
2315; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2316; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s5, vcc_lo
2317; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
2318; GFX10-NEXT:    s_mov_b32 s11, s13
2319; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2320; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2321; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
2322; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2323; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2324; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
2325; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s10, vcc_lo
2326; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, s11, vcc_lo
2327; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2328; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
2329; GFX10-NEXT:    ; return to shader part epilog
2330entry:
2331  %ext = extractelement <6 x double> %vec, i32 %sel
2332  ret double %ext
2333}
2334
2335define double @dyn_extract_v6f64_v_v(<6 x double> %vec, i32 %sel) {
2336; GCN-LABEL: dyn_extract_v6f64_v_v:
2337; GCN:       ; %bb.0: ; %entry
2338; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2339; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v12
2340; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2341; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2342; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v12
2343; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2344; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2345; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v12
2346; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
2347; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
2348; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v12
2349; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
2350; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
2351; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v12
2352; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
2353; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
2354; GCN-NEXT:    s_setpc_b64 s[30:31]
2355;
2356; GFX10-LABEL: dyn_extract_v6f64_v_v:
2357; GFX10:       ; %bb.0: ; %entry
2358; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2359; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2360; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v12
2361; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2362; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
2363; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v12
2364; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2365; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
2366; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v12
2367; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2368; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
2369; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v12
2370; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
2371; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
2372; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v12
2373; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
2374; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
2375; GFX10-NEXT:    s_setpc_b64 s[30:31]
2376entry:
2377  %ext = extractelement <6 x double> %vec, i32 %sel
2378  ret double %ext
2379}
2380
2381define amdgpu_ps double @dyn_extract_v6f64_v_s(<6 x double> %vec, i32 inreg %sel) {
2382; GPRIDX-LABEL: dyn_extract_v6f64_v_s:
2383; GPRIDX:       ; %bb.0: ; %entry
2384; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
2385; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(SRC0)
2386; GPRIDX-NEXT:    v_mov_b32_e32 v12, v0
2387; GPRIDX-NEXT:    v_mov_b32_e32 v0, v1
2388; GPRIDX-NEXT:    s_set_gpr_idx_off
2389; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v12
2390; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v0
2391; GPRIDX-NEXT:    ; return to shader part epilog
2392;
2393; MOVREL-LABEL: dyn_extract_v6f64_v_s:
2394; MOVREL:       ; %bb.0: ; %entry
2395; MOVREL-NEXT:    s_lshl_b32 m0, s2, 1
2396; MOVREL-NEXT:    v_movrels_b32_e32 v12, v0
2397; MOVREL-NEXT:    v_movrels_b32_e32 v0, v1
2398; MOVREL-NEXT:    v_readfirstlane_b32 s0, v12
2399; MOVREL-NEXT:    v_readfirstlane_b32 s1, v0
2400; MOVREL-NEXT:    ; return to shader part epilog
2401;
2402; GFX10-LABEL: dyn_extract_v6f64_v_s:
2403; GFX10:       ; %bb.0: ; %entry
2404; GFX10-NEXT:    s_lshl_b32 m0, s2, 1
2405; GFX10-NEXT:    v_movrels_b32_e32 v12, v0
2406; GFX10-NEXT:    v_movrels_b32_e32 v0, v1
2407; GFX10-NEXT:    v_readfirstlane_b32 s0, v12
2408; GFX10-NEXT:    v_readfirstlane_b32 s1, v0
2409; GFX10-NEXT:    ; return to shader part epilog
2410entry:
2411  %ext = extractelement <6 x double> %vec, i32 %sel
2412  ret double %ext
2413}
2414
2415define amdgpu_ps double @dyn_extract_v6f64_s_s(<6 x double> inreg %vec, i32 inreg %sel) {
2416; GCN-LABEL: dyn_extract_v6f64_s_s:
2417; GCN:       ; %bb.0: ; %entry
2418; GCN-NEXT:    s_mov_b32 s0, s2
2419; GCN-NEXT:    s_mov_b32 s1, s3
2420; GCN-NEXT:    s_mov_b32 m0, s14
2421; GCN-NEXT:    s_mov_b32 s2, s4
2422; GCN-NEXT:    s_mov_b32 s3, s5
2423; GCN-NEXT:    s_mov_b32 s4, s6
2424; GCN-NEXT:    s_mov_b32 s5, s7
2425; GCN-NEXT:    s_mov_b32 s6, s8
2426; GCN-NEXT:    s_mov_b32 s7, s9
2427; GCN-NEXT:    s_mov_b32 s8, s10
2428; GCN-NEXT:    s_mov_b32 s9, s11
2429; GCN-NEXT:    s_mov_b32 s10, s12
2430; GCN-NEXT:    s_mov_b32 s11, s13
2431; GCN-NEXT:    s_movrels_b64 s[0:1], s[0:1]
2432; GCN-NEXT:    ; return to shader part epilog
2433;
2434; GFX10-LABEL: dyn_extract_v6f64_s_s:
2435; GFX10:       ; %bb.0: ; %entry
2436; GFX10-NEXT:    s_mov_b32 s0, s2
2437; GFX10-NEXT:    s_mov_b32 s1, s3
2438; GFX10-NEXT:    s_mov_b32 m0, s14
2439; GFX10-NEXT:    s_mov_b32 s2, s4
2440; GFX10-NEXT:    s_mov_b32 s3, s5
2441; GFX10-NEXT:    s_mov_b32 s4, s6
2442; GFX10-NEXT:    s_mov_b32 s5, s7
2443; GFX10-NEXT:    s_mov_b32 s6, s8
2444; GFX10-NEXT:    s_mov_b32 s7, s9
2445; GFX10-NEXT:    s_mov_b32 s8, s10
2446; GFX10-NEXT:    s_mov_b32 s9, s11
2447; GFX10-NEXT:    s_mov_b32 s10, s12
2448; GFX10-NEXT:    s_mov_b32 s11, s13
2449; GFX10-NEXT:    s_movrels_b64 s[0:1], s[0:1]
2450; GFX10-NEXT:    ; return to shader part epilog
2451entry:
2452  %ext = extractelement <6 x double> %vec, i32 %sel
2453  ret double %ext
2454}
2455
2456define amdgpu_ps double @dyn_extract_v7f64_s_v(<7 x double> inreg %vec, i32 %sel) {
2457; GCN-LABEL: dyn_extract_v7f64_s_v:
2458; GCN:       ; %bb.0: ; %entry
2459; GCN-NEXT:    s_mov_b32 s0, s2
2460; GCN-NEXT:    s_mov_b32 s1, s3
2461; GCN-NEXT:    s_mov_b32 s2, s4
2462; GCN-NEXT:    s_mov_b32 s3, s5
2463; GCN-NEXT:    s_mov_b32 s4, s6
2464; GCN-NEXT:    s_mov_b32 s5, s7
2465; GCN-NEXT:    v_mov_b32_e32 v1, s0
2466; GCN-NEXT:    v_mov_b32_e32 v2, s1
2467; GCN-NEXT:    v_mov_b32_e32 v3, s2
2468; GCN-NEXT:    v_mov_b32_e32 v4, s3
2469; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
2470; GCN-NEXT:    s_mov_b32 s6, s8
2471; GCN-NEXT:    s_mov_b32 s7, s9
2472; GCN-NEXT:    v_mov_b32_e32 v5, s4
2473; GCN-NEXT:    v_mov_b32_e32 v6, s5
2474; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2475; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2476; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
2477; GCN-NEXT:    s_mov_b32 s8, s10
2478; GCN-NEXT:    s_mov_b32 s9, s11
2479; GCN-NEXT:    v_mov_b32_e32 v7, s6
2480; GCN-NEXT:    v_mov_b32_e32 v8, s7
2481; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2482; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
2483; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
2484; GCN-NEXT:    v_mov_b32_e32 v9, s8
2485; GCN-NEXT:    v_mov_b32_e32 v10, s9
2486; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
2487; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
2488; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
2489; GCN-NEXT:    v_mov_b32_e32 v11, s12
2490; GCN-NEXT:    v_mov_b32_e32 v12, s13
2491; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
2492; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
2493; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
2494; GCN-NEXT:    v_mov_b32_e32 v13, s14
2495; GCN-NEXT:    v_mov_b32_e32 v14, s15
2496; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
2497; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v12, vcc
2498; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
2499; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v13, vcc
2500; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v14, vcc
2501; GCN-NEXT:    v_readfirstlane_b32 s0, v0
2502; GCN-NEXT:    v_readfirstlane_b32 s1, v1
2503; GCN-NEXT:    ; return to shader part epilog
2504;
2505; GFX10-LABEL: dyn_extract_v7f64_s_v:
2506; GFX10:       ; %bb.0: ; %entry
2507; GFX10-NEXT:    s_mov_b32 s0, s2
2508; GFX10-NEXT:    s_mov_b32 s2, s4
2509; GFX10-NEXT:    s_mov_b32 s19, s5
2510; GFX10-NEXT:    v_mov_b32_e32 v1, s2
2511; GFX10-NEXT:    v_mov_b32_e32 v2, s19
2512; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
2513; GFX10-NEXT:    s_mov_b32 s1, s3
2514; GFX10-NEXT:    s_mov_b32 s4, s6
2515; GFX10-NEXT:    s_mov_b32 s5, s7
2516; GFX10-NEXT:    s_mov_b32 s6, s8
2517; GFX10-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
2518; GFX10-NEXT:    v_cndmask_b32_e32 v2, s1, v2, vcc_lo
2519; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
2520; GFX10-NEXT:    s_mov_b32 s7, s9
2521; GFX10-NEXT:    s_mov_b32 s8, s10
2522; GFX10-NEXT:    s_mov_b32 s9, s11
2523; GFX10-NEXT:    s_mov_b32 s10, s12
2524; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2525; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s5, vcc_lo
2526; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
2527; GFX10-NEXT:    s_mov_b32 s11, s13
2528; GFX10-NEXT:    s_mov_b32 s12, s14
2529; GFX10-NEXT:    s_mov_b32 s13, s15
2530; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2531; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2532; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
2533; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2534; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2535; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
2536; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2537; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2538; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
2539; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s12, vcc_lo
2540; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, s13, vcc_lo
2541; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2542; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
2543; GFX10-NEXT:    ; return to shader part epilog
2544entry:
2545  %ext = extractelement <7 x double> %vec, i32 %sel
2546  ret double %ext
2547}
2548
2549define double @dyn_extract_v7f64_v_v(<7 x double> %vec, i32 %sel) {
2550; GCN-LABEL: dyn_extract_v7f64_v_v:
2551; GCN:       ; %bb.0: ; %entry
2552; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2553; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v14
2554; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2555; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2556; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v14
2557; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2558; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2559; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v14
2560; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
2561; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
2562; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v14
2563; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
2564; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
2565; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v14
2566; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
2567; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
2568; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v14
2569; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
2570; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
2571; GCN-NEXT:    s_setpc_b64 s[30:31]
2572;
2573; GFX10-LABEL: dyn_extract_v7f64_v_v:
2574; GFX10:       ; %bb.0: ; %entry
2575; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2576; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2577; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v14
2578; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2579; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
2580; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v14
2581; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2582; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
2583; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v14
2584; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2585; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
2586; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v14
2587; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
2588; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
2589; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v14
2590; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
2591; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
2592; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v14
2593; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
2594; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc_lo
2595; GFX10-NEXT:    s_setpc_b64 s[30:31]
2596entry:
2597  %ext = extractelement <7 x double> %vec, i32 %sel
2598  ret double %ext
2599}
2600
2601define amdgpu_ps double @dyn_extract_v7f64_v_s(<7 x double> %vec, i32 inreg %sel) {
2602; GPRIDX-LABEL: dyn_extract_v7f64_v_s:
2603; GPRIDX:       ; %bb.0: ; %entry
2604; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
2605; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(SRC0)
2606; GPRIDX-NEXT:    v_mov_b32_e32 v14, v0
2607; GPRIDX-NEXT:    v_mov_b32_e32 v0, v1
2608; GPRIDX-NEXT:    s_set_gpr_idx_off
2609; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v14
2610; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v0
2611; GPRIDX-NEXT:    ; return to shader part epilog
2612;
2613; MOVREL-LABEL: dyn_extract_v7f64_v_s:
2614; MOVREL:       ; %bb.0: ; %entry
2615; MOVREL-NEXT:    s_lshl_b32 m0, s2, 1
2616; MOVREL-NEXT:    v_movrels_b32_e32 v14, v0
2617; MOVREL-NEXT:    v_movrels_b32_e32 v0, v1
2618; MOVREL-NEXT:    v_readfirstlane_b32 s0, v14
2619; MOVREL-NEXT:    v_readfirstlane_b32 s1, v0
2620; MOVREL-NEXT:    ; return to shader part epilog
2621;
2622; GFX10-LABEL: dyn_extract_v7f64_v_s:
2623; GFX10:       ; %bb.0: ; %entry
2624; GFX10-NEXT:    s_lshl_b32 m0, s2, 1
2625; GFX10-NEXT:    v_movrels_b32_e32 v14, v0
2626; GFX10-NEXT:    v_movrels_b32_e32 v0, v1
2627; GFX10-NEXT:    v_readfirstlane_b32 s0, v14
2628; GFX10-NEXT:    v_readfirstlane_b32 s1, v0
2629; GFX10-NEXT:    ; return to shader part epilog
2630entry:
2631  %ext = extractelement <7 x double> %vec, i32 %sel
2632  ret double %ext
2633}
2634
2635define amdgpu_ps double @dyn_extract_v7f64_s_s(<7 x double> inreg %vec, i32 inreg %sel) {
2636; GCN-LABEL: dyn_extract_v7f64_s_s:
2637; GCN:       ; %bb.0: ; %entry
2638; GCN-NEXT:    s_mov_b32 s0, s2
2639; GCN-NEXT:    s_mov_b32 s1, s3
2640; GCN-NEXT:    s_mov_b32 m0, s16
2641; GCN-NEXT:    s_mov_b32 s2, s4
2642; GCN-NEXT:    s_mov_b32 s3, s5
2643; GCN-NEXT:    s_mov_b32 s4, s6
2644; GCN-NEXT:    s_mov_b32 s5, s7
2645; GCN-NEXT:    s_mov_b32 s6, s8
2646; GCN-NEXT:    s_mov_b32 s7, s9
2647; GCN-NEXT:    s_mov_b32 s8, s10
2648; GCN-NEXT:    s_mov_b32 s9, s11
2649; GCN-NEXT:    s_mov_b32 s10, s12
2650; GCN-NEXT:    s_mov_b32 s11, s13
2651; GCN-NEXT:    s_mov_b32 s12, s14
2652; GCN-NEXT:    s_mov_b32 s13, s15
2653; GCN-NEXT:    s_movrels_b64 s[0:1], s[0:1]
2654; GCN-NEXT:    ; return to shader part epilog
2655;
2656; GFX10-LABEL: dyn_extract_v7f64_s_s:
2657; GFX10:       ; %bb.0: ; %entry
2658; GFX10-NEXT:    s_mov_b32 s0, s2
2659; GFX10-NEXT:    s_mov_b32 s1, s3
2660; GFX10-NEXT:    s_mov_b32 m0, s16
2661; GFX10-NEXT:    s_mov_b32 s2, s4
2662; GFX10-NEXT:    s_mov_b32 s3, s5
2663; GFX10-NEXT:    s_mov_b32 s4, s6
2664; GFX10-NEXT:    s_mov_b32 s5, s7
2665; GFX10-NEXT:    s_mov_b32 s6, s8
2666; GFX10-NEXT:    s_mov_b32 s7, s9
2667; GFX10-NEXT:    s_mov_b32 s8, s10
2668; GFX10-NEXT:    s_mov_b32 s9, s11
2669; GFX10-NEXT:    s_mov_b32 s10, s12
2670; GFX10-NEXT:    s_mov_b32 s11, s13
2671; GFX10-NEXT:    s_mov_b32 s12, s14
2672; GFX10-NEXT:    s_mov_b32 s13, s15
2673; GFX10-NEXT:    s_movrels_b64 s[0:1], s[0:1]
2674; GFX10-NEXT:    ; return to shader part epilog
2675entry:
2676  %ext = extractelement <7 x double> %vec, i32 %sel
2677  ret double %ext
2678}
2679
2680define amdgpu_kernel void @dyn_extract_v5f64_s_s(double addrspace(1)* %out, i32 %sel) {
2681; GPRIDX-LABEL: dyn_extract_v5f64_s_s:
2682; GPRIDX:         .amd_kernel_code_t
2683; GPRIDX-NEXT:     amd_code_version_major = 1
2684; GPRIDX-NEXT:     amd_code_version_minor = 2
2685; GPRIDX-NEXT:     amd_machine_kind = 1
2686; GPRIDX-NEXT:     amd_machine_version_major = 9
2687; GPRIDX-NEXT:     amd_machine_version_minor = 0
2688; GPRIDX-NEXT:     amd_machine_version_stepping = 0
2689; GPRIDX-NEXT:     kernel_code_entry_byte_offset = 256
2690; GPRIDX-NEXT:     kernel_code_prefetch_byte_size = 0
2691; GPRIDX-NEXT:     granulated_workitem_vgpr_count = 0
2692; GPRIDX-NEXT:     granulated_wavefront_sgpr_count = 1
2693; GPRIDX-NEXT:     priority = 0
2694; GPRIDX-NEXT:     float_mode = 240
2695; GPRIDX-NEXT:     priv = 0
2696; GPRIDX-NEXT:     enable_dx10_clamp = 1
2697; GPRIDX-NEXT:     debug_mode = 0
2698; GPRIDX-NEXT:     enable_ieee_mode = 1
2699; GPRIDX-NEXT:     enable_wgp_mode = 0
2700; GPRIDX-NEXT:     enable_mem_ordered = 0
2701; GPRIDX-NEXT:     enable_fwd_progress = 0
2702; GPRIDX-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
2703; GPRIDX-NEXT:     user_sgpr_count = 6
2704; GPRIDX-NEXT:     enable_trap_handler = 0
2705; GPRIDX-NEXT:     enable_sgpr_workgroup_id_x = 1
2706; GPRIDX-NEXT:     enable_sgpr_workgroup_id_y = 0
2707; GPRIDX-NEXT:     enable_sgpr_workgroup_id_z = 0
2708; GPRIDX-NEXT:     enable_sgpr_workgroup_info = 0
2709; GPRIDX-NEXT:     enable_vgpr_workitem_id = 0
2710; GPRIDX-NEXT:     enable_exception_msb = 0
2711; GPRIDX-NEXT:     granulated_lds_size = 0
2712; GPRIDX-NEXT:     enable_exception = 0
2713; GPRIDX-NEXT:     enable_sgpr_private_segment_buffer = 1
2714; GPRIDX-NEXT:     enable_sgpr_dispatch_ptr = 0
2715; GPRIDX-NEXT:     enable_sgpr_queue_ptr = 0
2716; GPRIDX-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
2717; GPRIDX-NEXT:     enable_sgpr_dispatch_id = 0
2718; GPRIDX-NEXT:     enable_sgpr_flat_scratch_init = 0
2719; GPRIDX-NEXT:     enable_sgpr_private_segment_size = 0
2720; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
2721; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
2722; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
2723; GPRIDX-NEXT:     enable_wavefront_size32 = 0
2724; GPRIDX-NEXT:     enable_ordered_append_gds = 0
2725; GPRIDX-NEXT:     private_element_size = 1
2726; GPRIDX-NEXT:     is_ptr64 = 1
2727; GPRIDX-NEXT:     is_dynamic_callstack = 0
2728; GPRIDX-NEXT:     is_debug_enabled = 0
2729; GPRIDX-NEXT:     is_xnack_enabled = 1
2730; GPRIDX-NEXT:     workitem_private_segment_byte_size = 0
2731; GPRIDX-NEXT:     workgroup_group_segment_byte_size = 0
2732; GPRIDX-NEXT:     gds_segment_byte_size = 0
2733; GPRIDX-NEXT:     kernarg_segment_byte_size = 28
2734; GPRIDX-NEXT:     workgroup_fbarrier_count = 0
2735; GPRIDX-NEXT:     wavefront_sgpr_count = 9
2736; GPRIDX-NEXT:     workitem_vgpr_count = 3
2737; GPRIDX-NEXT:     reserved_vgpr_first = 0
2738; GPRIDX-NEXT:     reserved_vgpr_count = 0
2739; GPRIDX-NEXT:     reserved_sgpr_first = 0
2740; GPRIDX-NEXT:     reserved_sgpr_count = 0
2741; GPRIDX-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
2742; GPRIDX-NEXT:     debug_private_segment_buffer_sgpr = 0
2743; GPRIDX-NEXT:     kernarg_segment_alignment = 4
2744; GPRIDX-NEXT:     group_segment_alignment = 4
2745; GPRIDX-NEXT:     private_segment_alignment = 4
2746; GPRIDX-NEXT:     wavefront_size = 6
2747; GPRIDX-NEXT:     call_convention = -1
2748; GPRIDX-NEXT:     runtime_loader_kernel_symbol = 0
2749; GPRIDX-NEXT:    .end_amd_kernel_code_t
2750; GPRIDX-NEXT:  ; %bb.0: ; %entry
2751; GPRIDX-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x0
2752; GPRIDX-NEXT:    s_load_dword s8, s[4:5], 0x8
2753; GPRIDX-NEXT:    s_mov_b32 s0, 0
2754; GPRIDX-NEXT:    s_mov_b32 s1, 0x40140000
2755; GPRIDX-NEXT:    s_mov_b32 s3, 0x40080000
2756; GPRIDX-NEXT:    s_mov_b32 s2, s0
2757; GPRIDX-NEXT:    s_waitcnt lgkmcnt(0)
2758; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 1
2759; GPRIDX-NEXT:    s_cselect_b64 s[4:5], 2.0, 1.0
2760; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 2
2761; GPRIDX-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
2762; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 3
2763; GPRIDX-NEXT:    s_cselect_b64 s[2:3], 4.0, s[2:3]
2764; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 4
2765; GPRIDX-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
2766; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
2767; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
2768; GPRIDX-NEXT:    v_mov_b32_e32 v2, 0
2769; GPRIDX-NEXT:    global_store_dwordx2 v2, v[0:1], s[6:7]
2770; GPRIDX-NEXT:    s_endpgm
2771;
2772; MOVREL-LABEL: dyn_extract_v5f64_s_s:
2773; MOVREL:         .amd_kernel_code_t
2774; MOVREL-NEXT:     amd_code_version_major = 1
2775; MOVREL-NEXT:     amd_code_version_minor = 2
2776; MOVREL-NEXT:     amd_machine_kind = 1
2777; MOVREL-NEXT:     amd_machine_version_major = 8
2778; MOVREL-NEXT:     amd_machine_version_minor = 0
2779; MOVREL-NEXT:     amd_machine_version_stepping = 3
2780; MOVREL-NEXT:     kernel_code_entry_byte_offset = 256
2781; MOVREL-NEXT:     kernel_code_prefetch_byte_size = 0
2782; MOVREL-NEXT:     granulated_workitem_vgpr_count = 0
2783; MOVREL-NEXT:     granulated_wavefront_sgpr_count = 1
2784; MOVREL-NEXT:     priority = 0
2785; MOVREL-NEXT:     float_mode = 240
2786; MOVREL-NEXT:     priv = 0
2787; MOVREL-NEXT:     enable_dx10_clamp = 1
2788; MOVREL-NEXT:     debug_mode = 0
2789; MOVREL-NEXT:     enable_ieee_mode = 1
2790; MOVREL-NEXT:     enable_wgp_mode = 0
2791; MOVREL-NEXT:     enable_mem_ordered = 0
2792; MOVREL-NEXT:     enable_fwd_progress = 0
2793; MOVREL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
2794; MOVREL-NEXT:     user_sgpr_count = 6
2795; MOVREL-NEXT:     enable_trap_handler = 0
2796; MOVREL-NEXT:     enable_sgpr_workgroup_id_x = 1
2797; MOVREL-NEXT:     enable_sgpr_workgroup_id_y = 0
2798; MOVREL-NEXT:     enable_sgpr_workgroup_id_z = 0
2799; MOVREL-NEXT:     enable_sgpr_workgroup_info = 0
2800; MOVREL-NEXT:     enable_vgpr_workitem_id = 0
2801; MOVREL-NEXT:     enable_exception_msb = 0
2802; MOVREL-NEXT:     granulated_lds_size = 0
2803; MOVREL-NEXT:     enable_exception = 0
2804; MOVREL-NEXT:     enable_sgpr_private_segment_buffer = 1
2805; MOVREL-NEXT:     enable_sgpr_dispatch_ptr = 0
2806; MOVREL-NEXT:     enable_sgpr_queue_ptr = 0
2807; MOVREL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
2808; MOVREL-NEXT:     enable_sgpr_dispatch_id = 0
2809; MOVREL-NEXT:     enable_sgpr_flat_scratch_init = 0
2810; MOVREL-NEXT:     enable_sgpr_private_segment_size = 0
2811; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
2812; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
2813; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
2814; MOVREL-NEXT:     enable_wavefront_size32 = 0
2815; MOVREL-NEXT:     enable_ordered_append_gds = 0
2816; MOVREL-NEXT:     private_element_size = 1
2817; MOVREL-NEXT:     is_ptr64 = 1
2818; MOVREL-NEXT:     is_dynamic_callstack = 0
2819; MOVREL-NEXT:     is_debug_enabled = 0
2820; MOVREL-NEXT:     is_xnack_enabled = 0
2821; MOVREL-NEXT:     workitem_private_segment_byte_size = 0
2822; MOVREL-NEXT:     workgroup_group_segment_byte_size = 0
2823; MOVREL-NEXT:     gds_segment_byte_size = 0
2824; MOVREL-NEXT:     kernarg_segment_byte_size = 28
2825; MOVREL-NEXT:     workgroup_fbarrier_count = 0
2826; MOVREL-NEXT:     wavefront_sgpr_count = 9
2827; MOVREL-NEXT:     workitem_vgpr_count = 4
2828; MOVREL-NEXT:     reserved_vgpr_first = 0
2829; MOVREL-NEXT:     reserved_vgpr_count = 0
2830; MOVREL-NEXT:     reserved_sgpr_first = 0
2831; MOVREL-NEXT:     reserved_sgpr_count = 0
2832; MOVREL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
2833; MOVREL-NEXT:     debug_private_segment_buffer_sgpr = 0
2834; MOVREL-NEXT:     kernarg_segment_alignment = 4
2835; MOVREL-NEXT:     group_segment_alignment = 4
2836; MOVREL-NEXT:     private_segment_alignment = 4
2837; MOVREL-NEXT:     wavefront_size = 6
2838; MOVREL-NEXT:     call_convention = -1
2839; MOVREL-NEXT:     runtime_loader_kernel_symbol = 0
2840; MOVREL-NEXT:    .end_amd_kernel_code_t
2841; MOVREL-NEXT:  ; %bb.0: ; %entry
2842; MOVREL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x0
2843; MOVREL-NEXT:    s_load_dword s8, s[4:5], 0x8
2844; MOVREL-NEXT:    s_mov_b32 s0, 0
2845; MOVREL-NEXT:    s_mov_b32 s1, 0x40140000
2846; MOVREL-NEXT:    s_mov_b32 s3, 0x40080000
2847; MOVREL-NEXT:    s_mov_b32 s2, s0
2848; MOVREL-NEXT:    s_waitcnt lgkmcnt(0)
2849; MOVREL-NEXT:    s_cmp_eq_u32 s8, 1
2850; MOVREL-NEXT:    s_cselect_b64 s[4:5], 2.0, 1.0
2851; MOVREL-NEXT:    s_cmp_eq_u32 s8, 2
2852; MOVREL-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
2853; MOVREL-NEXT:    s_cmp_eq_u32 s8, 3
2854; MOVREL-NEXT:    s_cselect_b64 s[2:3], 4.0, s[2:3]
2855; MOVREL-NEXT:    s_cmp_eq_u32 s8, 4
2856; MOVREL-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
2857; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
2858; MOVREL-NEXT:    v_mov_b32_e32 v2, s6
2859; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
2860; MOVREL-NEXT:    v_mov_b32_e32 v3, s7
2861; MOVREL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
2862; MOVREL-NEXT:    s_endpgm
2863;
2864; GFX10-LABEL: dyn_extract_v5f64_s_s:
2865; GFX10:         .amd_kernel_code_t
2866; GFX10-NEXT:     amd_code_version_major = 1
2867; GFX10-NEXT:     amd_code_version_minor = 2
2868; GFX10-NEXT:     amd_machine_kind = 1
2869; GFX10-NEXT:     amd_machine_version_major = 10
2870; GFX10-NEXT:     amd_machine_version_minor = 1
2871; GFX10-NEXT:     amd_machine_version_stepping = 0
2872; GFX10-NEXT:     kernel_code_entry_byte_offset = 256
2873; GFX10-NEXT:     kernel_code_prefetch_byte_size = 0
2874; GFX10-NEXT:     granulated_workitem_vgpr_count = 0
2875; GFX10-NEXT:     granulated_wavefront_sgpr_count = 1
2876; GFX10-NEXT:     priority = 0
2877; GFX10-NEXT:     float_mode = 240
2878; GFX10-NEXT:     priv = 0
2879; GFX10-NEXT:     enable_dx10_clamp = 1
2880; GFX10-NEXT:     debug_mode = 0
2881; GFX10-NEXT:     enable_ieee_mode = 1
2882; GFX10-NEXT:     enable_wgp_mode = 1
2883; GFX10-NEXT:     enable_mem_ordered = 1
2884; GFX10-NEXT:     enable_fwd_progress = 0
2885; GFX10-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
2886; GFX10-NEXT:     user_sgpr_count = 6
2887; GFX10-NEXT:     enable_trap_handler = 0
2888; GFX10-NEXT:     enable_sgpr_workgroup_id_x = 1
2889; GFX10-NEXT:     enable_sgpr_workgroup_id_y = 0
2890; GFX10-NEXT:     enable_sgpr_workgroup_id_z = 0
2891; GFX10-NEXT:     enable_sgpr_workgroup_info = 0
2892; GFX10-NEXT:     enable_vgpr_workitem_id = 0
2893; GFX10-NEXT:     enable_exception_msb = 0
2894; GFX10-NEXT:     granulated_lds_size = 0
2895; GFX10-NEXT:     enable_exception = 0
2896; GFX10-NEXT:     enable_sgpr_private_segment_buffer = 1
2897; GFX10-NEXT:     enable_sgpr_dispatch_ptr = 0
2898; GFX10-NEXT:     enable_sgpr_queue_ptr = 0
2899; GFX10-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
2900; GFX10-NEXT:     enable_sgpr_dispatch_id = 0
2901; GFX10-NEXT:     enable_sgpr_flat_scratch_init = 0
2902; GFX10-NEXT:     enable_sgpr_private_segment_size = 0
2903; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
2904; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
2905; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
2906; GFX10-NEXT:     enable_wavefront_size32 = 1
2907; GFX10-NEXT:     enable_ordered_append_gds = 0
2908; GFX10-NEXT:     private_element_size = 1
2909; GFX10-NEXT:     is_ptr64 = 1
2910; GFX10-NEXT:     is_dynamic_callstack = 0
2911; GFX10-NEXT:     is_debug_enabled = 0
2912; GFX10-NEXT:     is_xnack_enabled = 1
2913; GFX10-NEXT:     workitem_private_segment_byte_size = 0
2914; GFX10-NEXT:     workgroup_group_segment_byte_size = 0
2915; GFX10-NEXT:     gds_segment_byte_size = 0
2916; GFX10-NEXT:     kernarg_segment_byte_size = 28
2917; GFX10-NEXT:     workgroup_fbarrier_count = 0
2918; GFX10-NEXT:     wavefront_sgpr_count = 9
2919; GFX10-NEXT:     workitem_vgpr_count = 3
2920; GFX10-NEXT:     reserved_vgpr_first = 0
2921; GFX10-NEXT:     reserved_vgpr_count = 0
2922; GFX10-NEXT:     reserved_sgpr_first = 0
2923; GFX10-NEXT:     reserved_sgpr_count = 0
2924; GFX10-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
2925; GFX10-NEXT:     debug_private_segment_buffer_sgpr = 0
2926; GFX10-NEXT:     kernarg_segment_alignment = 4
2927; GFX10-NEXT:     group_segment_alignment = 4
2928; GFX10-NEXT:     private_segment_alignment = 4
2929; GFX10-NEXT:     wavefront_size = 5
2930; GFX10-NEXT:     call_convention = -1
2931; GFX10-NEXT:     runtime_loader_kernel_symbol = 0
2932; GFX10-NEXT:    .end_amd_kernel_code_t
2933; GFX10-NEXT:  ; %bb.0: ; %entry
2934; GFX10-NEXT:    s_clause 0x1
2935; GFX10-NEXT:    s_load_dword s8, s[4:5], 0x8
2936; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2937; GFX10-NEXT:    s_mov_b32 s2, 0
2938; GFX10-NEXT:    s_mov_b32 s3, 0x40140000
2939; GFX10-NEXT:    s_mov_b32 s5, 0x40080000
2940; GFX10-NEXT:    s_mov_b32 s4, s2
2941; GFX10-NEXT:    v_mov_b32_e32 v2, 0
2942; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2943; GFX10-NEXT:    s_cmp_eq_u32 s8, 1
2944; GFX10-NEXT:    s_cselect_b64 s[6:7], 2.0, 1.0
2945; GFX10-NEXT:    s_cmp_eq_u32 s8, 2
2946; GFX10-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
2947; GFX10-NEXT:    s_cmp_eq_u32 s8, 3
2948; GFX10-NEXT:    s_cselect_b64 s[4:5], 4.0, s[4:5]
2949; GFX10-NEXT:    s_cmp_eq_u32 s8, 4
2950; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
2951; GFX10-NEXT:    v_mov_b32_e32 v0, s2
2952; GFX10-NEXT:    v_mov_b32_e32 v1, s3
2953; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
2954; GFX10-NEXT:    s_endpgm
2955entry:
2956  %ext = extractelement <5 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0>, i32 %sel
2957  store double %ext, double addrspace(1)* %out
2958  ret void
2959}
2960
2961define float @dyn_extract_v15f32_const_s_v(i32 %sel) {
2962; GCN-LABEL: dyn_extract_v15f32_const_s_v:
2963; GCN:       ; %bb.0: ; %entry
2964; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2965; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
2966; GCN-NEXT:    v_mov_b32_e32 v1, 0x40400000
2967; GCN-NEXT:    v_cndmask_b32_e64 v13, 1.0, 2.0, vcc
2968; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
2969; GCN-NEXT:    v_cndmask_b32_e32 v1, v13, v1, vcc
2970; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
2971; GCN-NEXT:    v_mov_b32_e32 v2, 0x40a00000
2972; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc
2973; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
2974; GCN-NEXT:    v_mov_b32_e32 v3, 0x40c00000
2975; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2976; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
2977; GCN-NEXT:    v_mov_b32_e32 v4, 0x40e00000
2978; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2979; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
2980; GCN-NEXT:    v_mov_b32_e32 v5, 0x41000000
2981; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
2982; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
2983; GCN-NEXT:    v_mov_b32_e32 v6, 0x41100000
2984; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2985; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v0
2986; GCN-NEXT:    v_mov_b32_e32 v7, 0x41200000
2987; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
2988; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v0
2989; GCN-NEXT:    v_mov_b32_e32 v8, 0x41300000
2990; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
2991; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v0
2992; GCN-NEXT:    v_mov_b32_e32 v9, 0x41400000
2993; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
2994; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v0
2995; GCN-NEXT:    v_mov_b32_e32 v10, 0x41500000
2996; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
2997; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v0
2998; GCN-NEXT:    v_mov_b32_e32 v11, 0x41600000
2999; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v10, vcc
3000; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v0
3001; GCN-NEXT:    v_mov_b32_e32 v12, 0x41700000
3002; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
3003; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v0
3004; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v12, vcc
3005; GCN-NEXT:    s_setpc_b64 s[30:31]
3006;
3007; GFX10-LABEL: dyn_extract_v15f32_const_s_v:
3008; GFX10:       ; %bb.0: ; %entry
3009; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3010; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3011; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
3012; GFX10-NEXT:    v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
3013; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
3014; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
3015; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
3016; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
3017; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
3018; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
3019; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
3020; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
3021; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
3022; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
3023; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
3024; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo
3025; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v0
3026; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo
3027; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v0
3028; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo
3029; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v0
3030; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo
3031; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v0
3032; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo
3033; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v0
3034; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo
3035; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v0
3036; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo
3037; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v0
3038; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, 0x41700000, vcc_lo
3039; GFX10-NEXT:    s_setpc_b64 s[30:31]
3040entry:
3041  %ext = extractelement <15 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>, i32 %sel
3042  ret float %ext
3043}
3044
3045define amdgpu_ps float @dyn_extract_v15f32_const_s_s(i32 inreg %sel) {
3046; GCN-LABEL: dyn_extract_v15f32_const_s_s:
3047; GCN:       ; %bb.0: ; %entry
3048; GCN-NEXT:    s_mov_b32 s4, 1.0
3049; GCN-NEXT:    s_mov_b32 m0, s2
3050; GCN-NEXT:    s_mov_b32 s18, 0x41700000
3051; GCN-NEXT:    s_mov_b32 s17, 0x41600000
3052; GCN-NEXT:    s_mov_b32 s16, 0x41500000
3053; GCN-NEXT:    s_mov_b32 s15, 0x41400000
3054; GCN-NEXT:    s_mov_b32 s14, 0x41300000
3055; GCN-NEXT:    s_mov_b32 s13, 0x41200000
3056; GCN-NEXT:    s_mov_b32 s12, 0x41100000
3057; GCN-NEXT:    s_mov_b32 s11, 0x41000000
3058; GCN-NEXT:    s_mov_b32 s10, 0x40e00000
3059; GCN-NEXT:    s_mov_b32 s9, 0x40c00000
3060; GCN-NEXT:    s_mov_b32 s8, 0x40a00000
3061; GCN-NEXT:    s_mov_b32 s7, 4.0
3062; GCN-NEXT:    s_mov_b32 s6, 0x40400000
3063; GCN-NEXT:    s_mov_b32 s5, 2.0
3064; GCN-NEXT:    s_movrels_b32 s0, s4
3065; GCN-NEXT:    v_mov_b32_e32 v0, s0
3066; GCN-NEXT:    ; return to shader part epilog
3067;
3068; GFX10-LABEL: dyn_extract_v15f32_const_s_s:
3069; GFX10:       ; %bb.0: ; %entry
3070; GFX10-NEXT:    s_mov_b32 s4, 1.0
3071; GFX10-NEXT:    s_mov_b32 m0, s2
3072; GFX10-NEXT:    s_mov_b32 s18, 0x41700000
3073; GFX10-NEXT:    s_mov_b32 s17, 0x41600000
3074; GFX10-NEXT:    s_mov_b32 s16, 0x41500000
3075; GFX10-NEXT:    s_mov_b32 s15, 0x41400000
3076; GFX10-NEXT:    s_mov_b32 s14, 0x41300000
3077; GFX10-NEXT:    s_mov_b32 s13, 0x41200000
3078; GFX10-NEXT:    s_mov_b32 s12, 0x41100000
3079; GFX10-NEXT:    s_mov_b32 s11, 0x41000000
3080; GFX10-NEXT:    s_mov_b32 s10, 0x40e00000
3081; GFX10-NEXT:    s_mov_b32 s9, 0x40c00000
3082; GFX10-NEXT:    s_mov_b32 s8, 0x40a00000
3083; GFX10-NEXT:    s_mov_b32 s7, 4.0
3084; GFX10-NEXT:    s_mov_b32 s6, 0x40400000
3085; GFX10-NEXT:    s_mov_b32 s5, 2.0
3086; GFX10-NEXT:    s_movrels_b32 s0, s4
3087; GFX10-NEXT:    v_mov_b32_e32 v0, s0
3088; GFX10-NEXT:    ; return to shader part epilog
3089entry:
3090  %ext = extractelement <15 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>, i32 %sel
3091  ret float %ext
3092}
3093
3094define amdgpu_ps float @dyn_extract_v15f32_s_v(<15 x float> inreg %vec, i32 %sel) {
3095; GCN-LABEL: dyn_extract_v15f32_s_v:
3096; GCN:       ; %bb.0: ; %entry
3097; GCN-NEXT:    s_mov_b32 s0, s2
3098; GCN-NEXT:    s_mov_b32 s1, s3
3099; GCN-NEXT:    s_mov_b32 s2, s4
3100; GCN-NEXT:    v_mov_b32_e32 v1, s0
3101; GCN-NEXT:    v_mov_b32_e32 v2, s1
3102; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
3103; GCN-NEXT:    s_mov_b32 s3, s5
3104; GCN-NEXT:    v_mov_b32_e32 v3, s2
3105; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3106; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
3107; GCN-NEXT:    s_mov_b32 s4, s6
3108; GCN-NEXT:    v_mov_b32_e32 v4, s3
3109; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
3110; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
3111; GCN-NEXT:    s_mov_b32 s5, s7
3112; GCN-NEXT:    v_mov_b32_e32 v5, s4
3113; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
3114; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
3115; GCN-NEXT:    s_mov_b32 s6, s8
3116; GCN-NEXT:    v_mov_b32_e32 v6, s5
3117; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
3118; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
3119; GCN-NEXT:    s_mov_b32 s7, s9
3120; GCN-NEXT:    v_mov_b32_e32 v7, s6
3121; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
3122; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
3123; GCN-NEXT:    s_mov_b32 s8, s10
3124; GCN-NEXT:    v_mov_b32_e32 v8, s7
3125; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
3126; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
3127; GCN-NEXT:    s_mov_b32 s9, s11
3128; GCN-NEXT:    v_mov_b32_e32 v9, s8
3129; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
3130; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v0
3131; GCN-NEXT:    s_mov_b32 s10, s12
3132; GCN-NEXT:    v_mov_b32_e32 v10, s9
3133; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
3134; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v0
3135; GCN-NEXT:    v_mov_b32_e32 v11, s10
3136; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v10, vcc
3137; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v0
3138; GCN-NEXT:    v_mov_b32_e32 v12, s13
3139; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
3140; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v0
3141; GCN-NEXT:    v_mov_b32_e32 v13, s14
3142; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v12, vcc
3143; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v0
3144; GCN-NEXT:    v_mov_b32_e32 v14, s15
3145; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
3146; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v0
3147; GCN-NEXT:    v_mov_b32_e32 v15, s16
3148; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v14, vcc
3149; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v0
3150; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v15, vcc
3151; GCN-NEXT:    ; return to shader part epilog
3152;
3153; GFX10-LABEL: dyn_extract_v15f32_s_v:
3154; GFX10:       ; %bb.0: ; %entry
3155; GFX10-NEXT:    s_mov_b32 s1, s3
3156; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
3157; GFX10-NEXT:    v_mov_b32_e32 v1, s1
3158; GFX10-NEXT:    s_mov_b32 s0, s2
3159; GFX10-NEXT:    s_mov_b32 s2, s4
3160; GFX10-NEXT:    s_mov_b32 s3, s5
3161; GFX10-NEXT:    s_mov_b32 s4, s6
3162; GFX10-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
3163; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
3164; GFX10-NEXT:    s_mov_b32 s5, s7
3165; GFX10-NEXT:    s_mov_b32 s6, s8
3166; GFX10-NEXT:    s_mov_b32 s7, s9
3167; GFX10-NEXT:    s_mov_b32 s8, s10
3168; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc_lo
3169; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
3170; GFX10-NEXT:    s_mov_b32 s9, s11
3171; GFX10-NEXT:    s_mov_b32 s10, s12
3172; GFX10-NEXT:    s_mov_b32 s11, s13
3173; GFX10-NEXT:    s_mov_b32 s12, s14
3174; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
3175; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
3176; GFX10-NEXT:    s_mov_b32 s13, s15
3177; GFX10-NEXT:    s_mov_b32 s14, s16
3178; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
3179; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
3180; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s5, vcc_lo
3181; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
3182; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
3183; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
3184; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s7, vcc_lo
3185; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v0
3186; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
3187; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v0
3188; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s9, vcc_lo
3189; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v0
3190; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
3191; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v0
3192; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s11, vcc_lo
3193; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v0
3194; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s12, vcc_lo
3195; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v0
3196; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s13, vcc_lo
3197; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v0
3198; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s14, vcc_lo
3199; GFX10-NEXT:    ; return to shader part epilog
3200entry:
3201  %ext = extractelement <15 x float> %vec, i32 %sel
3202  ret float %ext
3203}
3204
3205define float @dyn_extract_v15f32_v_v(<15 x float> %vec, i32 %sel) {
3206; GCN-LABEL: dyn_extract_v15f32_v_v:
3207; GCN:       ; %bb.0: ; %entry
3208; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3209; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v15
3210; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3211; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v15
3212; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3213; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v15
3214; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
3215; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v15
3216; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
3217; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v15
3218; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
3219; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v15
3220; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
3221; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v15
3222; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
3223; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v15
3224; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
3225; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v15
3226; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
3227; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v15
3228; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
3229; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v15
3230; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
3231; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v15
3232; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
3233; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v15
3234; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
3235; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v15
3236; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
3237; GCN-NEXT:    s_setpc_b64 s[30:31]
3238;
3239; GFX10-LABEL: dyn_extract_v15f32_v_v:
3240; GFX10:       ; %bb.0: ; %entry
3241; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3242; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3243; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v15
3244; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3245; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v15
3246; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
3247; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v15
3248; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
3249; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v15
3250; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
3251; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v15
3252; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
3253; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v15
3254; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
3255; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v15
3256; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
3257; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v15
3258; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
3259; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v15
3260; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc_lo
3261; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v15
3262; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
3263; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v15
3264; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc_lo
3265; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v15
3266; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
3267; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v15
3268; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc_lo
3269; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v15
3270; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
3271; GFX10-NEXT:    s_setpc_b64 s[30:31]
3272entry:
3273  %ext = extractelement <15 x float> %vec, i32 %sel
3274  ret float %ext
3275}
3276
3277define amdgpu_ps float @dyn_extract_v15f32_v_s(<15 x float> %vec, i32 inreg %sel) {
3278; GPRIDX-LABEL: dyn_extract_v15f32_v_s:
3279; GPRIDX:       ; %bb.0: ; %entry
3280; GPRIDX-NEXT:    s_set_gpr_idx_on s2, gpr_idx(SRC0)
3281; GPRIDX-NEXT:    v_mov_b32_e32 v0, v0
3282; GPRIDX-NEXT:    s_set_gpr_idx_off
3283; GPRIDX-NEXT:    ; return to shader part epilog
3284;
3285; MOVREL-LABEL: dyn_extract_v15f32_v_s:
3286; MOVREL:       ; %bb.0: ; %entry
3287; MOVREL-NEXT:    s_mov_b32 m0, s2
3288; MOVREL-NEXT:    v_movrels_b32_e32 v0, v0
3289; MOVREL-NEXT:    ; return to shader part epilog
3290;
3291; GFX10-LABEL: dyn_extract_v15f32_v_s:
3292; GFX10:       ; %bb.0: ; %entry
3293; GFX10-NEXT:    s_mov_b32 m0, s2
3294; GFX10-NEXT:    v_movrels_b32_e32 v0, v0
3295; GFX10-NEXT:    ; return to shader part epilog
3296entry:
3297  %ext = extractelement <15 x float> %vec, i32 %sel
3298  ret float %ext
3299}
3300
3301define amdgpu_ps float @dyn_extract_v15f32_s_s(<15 x float> inreg %vec, i32 inreg %sel) {
3302; GCN-LABEL: dyn_extract_v15f32_s_s:
3303; GCN:       ; %bb.0: ; %entry
3304; GCN-NEXT:    s_mov_b32 s0, s2
3305; GCN-NEXT:    s_mov_b32 m0, s17
3306; GCN-NEXT:    s_mov_b32 s1, s3
3307; GCN-NEXT:    s_mov_b32 s2, s4
3308; GCN-NEXT:    s_mov_b32 s3, s5
3309; GCN-NEXT:    s_mov_b32 s4, s6
3310; GCN-NEXT:    s_mov_b32 s5, s7
3311; GCN-NEXT:    s_mov_b32 s6, s8
3312; GCN-NEXT:    s_mov_b32 s7, s9
3313; GCN-NEXT:    s_mov_b32 s8, s10
3314; GCN-NEXT:    s_mov_b32 s9, s11
3315; GCN-NEXT:    s_mov_b32 s10, s12
3316; GCN-NEXT:    s_mov_b32 s11, s13
3317; GCN-NEXT:    s_mov_b32 s12, s14
3318; GCN-NEXT:    s_mov_b32 s13, s15
3319; GCN-NEXT:    s_mov_b32 s14, s16
3320; GCN-NEXT:    s_movrels_b32 s0, s0
3321; GCN-NEXT:    v_mov_b32_e32 v0, s0
3322; GCN-NEXT:    ; return to shader part epilog
3323;
3324; GFX10-LABEL: dyn_extract_v15f32_s_s:
3325; GFX10:       ; %bb.0: ; %entry
3326; GFX10-NEXT:    s_mov_b32 s0, s2
3327; GFX10-NEXT:    s_mov_b32 m0, s17
3328; GFX10-NEXT:    s_mov_b32 s1, s3
3329; GFX10-NEXT:    s_mov_b32 s2, s4
3330; GFX10-NEXT:    s_mov_b32 s3, s5
3331; GFX10-NEXT:    s_mov_b32 s4, s6
3332; GFX10-NEXT:    s_mov_b32 s5, s7
3333; GFX10-NEXT:    s_mov_b32 s6, s8
3334; GFX10-NEXT:    s_mov_b32 s7, s9
3335; GFX10-NEXT:    s_mov_b32 s8, s10
3336; GFX10-NEXT:    s_mov_b32 s9, s11
3337; GFX10-NEXT:    s_mov_b32 s10, s12
3338; GFX10-NEXT:    s_mov_b32 s11, s13
3339; GFX10-NEXT:    s_mov_b32 s12, s14
3340; GFX10-NEXT:    s_mov_b32 s13, s15
3341; GFX10-NEXT:    s_mov_b32 s14, s16
3342; GFX10-NEXT:    s_movrels_b32 s0, s0
3343; GFX10-NEXT:    v_mov_b32_e32 v0, s0
3344; GFX10-NEXT:    ; return to shader part epilog
3345entry:
3346  %ext = extractelement <15 x float> %vec, i32 %sel
3347  ret float %ext
3348}
3349
3350define amdgpu_ps float @dyn_extract_v15f32_s_s_offset3(<15 x float> inreg %vec, i32 inreg %sel) {
3351; GCN-LABEL: dyn_extract_v15f32_s_s_offset3:
3352; GCN:       ; %bb.0: ; %entry
3353; GCN-NEXT:    s_mov_b32 s0, s2
3354; GCN-NEXT:    s_mov_b32 s1, s3
3355; GCN-NEXT:    s_mov_b32 s3, s5
3356; GCN-NEXT:    s_mov_b32 m0, s17
3357; GCN-NEXT:    s_mov_b32 s2, s4
3358; GCN-NEXT:    s_mov_b32 s4, s6
3359; GCN-NEXT:    s_mov_b32 s5, s7
3360; GCN-NEXT:    s_mov_b32 s6, s8
3361; GCN-NEXT:    s_mov_b32 s7, s9
3362; GCN-NEXT:    s_mov_b32 s8, s10
3363; GCN-NEXT:    s_mov_b32 s9, s11
3364; GCN-NEXT:    s_mov_b32 s10, s12
3365; GCN-NEXT:    s_mov_b32 s11, s13
3366; GCN-NEXT:    s_mov_b32 s12, s14
3367; GCN-NEXT:    s_mov_b32 s13, s15
3368; GCN-NEXT:    s_mov_b32 s14, s16
3369; GCN-NEXT:    s_movrels_b32 s0, s3
3370; GCN-NEXT:    v_mov_b32_e32 v0, s0
3371; GCN-NEXT:    ; return to shader part epilog
3372;
3373; GFX10-LABEL: dyn_extract_v15f32_s_s_offset3:
3374; GFX10:       ; %bb.0: ; %entry
3375; GFX10-NEXT:    s_mov_b32 s1, s3
3376; GFX10-NEXT:    s_mov_b32 s3, s5
3377; GFX10-NEXT:    s_mov_b32 m0, s17
3378; GFX10-NEXT:    s_mov_b32 s0, s2
3379; GFX10-NEXT:    s_mov_b32 s2, s4
3380; GFX10-NEXT:    s_mov_b32 s4, s6
3381; GFX10-NEXT:    s_mov_b32 s5, s7
3382; GFX10-NEXT:    s_mov_b32 s6, s8
3383; GFX10-NEXT:    s_mov_b32 s7, s9
3384; GFX10-NEXT:    s_mov_b32 s8, s10
3385; GFX10-NEXT:    s_mov_b32 s9, s11
3386; GFX10-NEXT:    s_mov_b32 s10, s12
3387; GFX10-NEXT:    s_mov_b32 s11, s13
3388; GFX10-NEXT:    s_mov_b32 s12, s14
3389; GFX10-NEXT:    s_mov_b32 s13, s15
3390; GFX10-NEXT:    s_mov_b32 s14, s16
3391; GFX10-NEXT:    s_movrels_b32 s0, s3
3392; GFX10-NEXT:    v_mov_b32_e32 v0, s0
3393; GFX10-NEXT:    ; return to shader part epilog
3394entry:
3395  %add = add i32 %sel, 3
3396  %ext = extractelement <15 x float> %vec, i32 %add
3397  ret float %ext
3398}
3399
3400define float @dyn_extract_v15f32_v_v_offset3(<15 x float> %vec, i32 %sel) {
3401; GPRIDX-LABEL: dyn_extract_v15f32_v_v_offset3:
3402; GPRIDX:       ; %bb.0: ; %entry
3403; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3404; GPRIDX-NEXT:    v_add_u32_e32 v15, 3, v15
3405; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v15
3406; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3407; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v15
3408; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3409; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v15
3410; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
3411; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v15
3412; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
3413; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v15
3414; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
3415; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v15
3416; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
3417; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v15
3418; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
3419; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v15
3420; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
3421; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v15
3422; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
3423; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v15
3424; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
3425; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v15
3426; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
3427; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v15
3428; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
3429; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v15
3430; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
3431; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v15
3432; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
3433; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
3434;
3435; MOVREL-LABEL: dyn_extract_v15f32_v_v_offset3:
3436; MOVREL:       ; %bb.0: ; %entry
3437; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3438; MOVREL-NEXT:    v_add_u32_e32 v15, vcc, 3, v15
3439; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v15
3440; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3441; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v15
3442; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3443; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v15
3444; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
3445; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v15
3446; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
3447; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v15
3448; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
3449; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v15
3450; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
3451; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v15
3452; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
3453; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v15
3454; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
3455; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v15
3456; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
3457; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v15
3458; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
3459; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v15
3460; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
3461; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v15
3462; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
3463; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v15
3464; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
3465; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v15
3466; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
3467; MOVREL-NEXT:    s_setpc_b64 s[30:31]
3468;
3469; GFX10-LABEL: dyn_extract_v15f32_v_v_offset3:
3470; GFX10:       ; %bb.0: ; %entry
3471; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3472; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3473; GFX10-NEXT:    v_add_nc_u32_e32 v15, 3, v15
3474; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v15
3475; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3476; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v15
3477; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
3478; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v15
3479; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
3480; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v15
3481; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
3482; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v15
3483; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
3484; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v15
3485; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
3486; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v15
3487; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
3488; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v15
3489; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
3490; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v15
3491; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc_lo
3492; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v15
3493; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
3494; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v15
3495; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc_lo
3496; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v15
3497; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
3498; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v15
3499; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc_lo
3500; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v15
3501; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
3502; GFX10-NEXT:    s_setpc_b64 s[30:31]
3503entry:
3504  %add = add i32 %sel, 3
3505  %ext = extractelement <15 x float> %vec, i32 %add
3506  ret float %ext
3507}
3508
3509define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(float addrspace(1)* %out, i32 %sel) {
3510; GPRIDX-LABEL: dyn_extract_v4f32_s_s_s:
3511; GPRIDX:         .amd_kernel_code_t
3512; GPRIDX-NEXT:     amd_code_version_major = 1
3513; GPRIDX-NEXT:     amd_code_version_minor = 2
3514; GPRIDX-NEXT:     amd_machine_kind = 1
3515; GPRIDX-NEXT:     amd_machine_version_major = 9
3516; GPRIDX-NEXT:     amd_machine_version_minor = 0
3517; GPRIDX-NEXT:     amd_machine_version_stepping = 0
3518; GPRIDX-NEXT:     kernel_code_entry_byte_offset = 256
3519; GPRIDX-NEXT:     kernel_code_prefetch_byte_size = 0
3520; GPRIDX-NEXT:     granulated_workitem_vgpr_count = 0
3521; GPRIDX-NEXT:     granulated_wavefront_sgpr_count = 0
3522; GPRIDX-NEXT:     priority = 0
3523; GPRIDX-NEXT:     float_mode = 240
3524; GPRIDX-NEXT:     priv = 0
3525; GPRIDX-NEXT:     enable_dx10_clamp = 1
3526; GPRIDX-NEXT:     debug_mode = 0
3527; GPRIDX-NEXT:     enable_ieee_mode = 1
3528; GPRIDX-NEXT:     enable_wgp_mode = 0
3529; GPRIDX-NEXT:     enable_mem_ordered = 0
3530; GPRIDX-NEXT:     enable_fwd_progress = 0
3531; GPRIDX-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
3532; GPRIDX-NEXT:     user_sgpr_count = 6
3533; GPRIDX-NEXT:     enable_trap_handler = 0
3534; GPRIDX-NEXT:     enable_sgpr_workgroup_id_x = 1
3535; GPRIDX-NEXT:     enable_sgpr_workgroup_id_y = 0
3536; GPRIDX-NEXT:     enable_sgpr_workgroup_id_z = 0
3537; GPRIDX-NEXT:     enable_sgpr_workgroup_info = 0
3538; GPRIDX-NEXT:     enable_vgpr_workitem_id = 0
3539; GPRIDX-NEXT:     enable_exception_msb = 0
3540; GPRIDX-NEXT:     granulated_lds_size = 0
3541; GPRIDX-NEXT:     enable_exception = 0
3542; GPRIDX-NEXT:     enable_sgpr_private_segment_buffer = 1
3543; GPRIDX-NEXT:     enable_sgpr_dispatch_ptr = 0
3544; GPRIDX-NEXT:     enable_sgpr_queue_ptr = 0
3545; GPRIDX-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
3546; GPRIDX-NEXT:     enable_sgpr_dispatch_id = 0
3547; GPRIDX-NEXT:     enable_sgpr_flat_scratch_init = 0
3548; GPRIDX-NEXT:     enable_sgpr_private_segment_size = 0
3549; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
3550; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
3551; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
3552; GPRIDX-NEXT:     enable_wavefront_size32 = 0
3553; GPRIDX-NEXT:     enable_ordered_append_gds = 0
3554; GPRIDX-NEXT:     private_element_size = 1
3555; GPRIDX-NEXT:     is_ptr64 = 1
3556; GPRIDX-NEXT:     is_dynamic_callstack = 0
3557; GPRIDX-NEXT:     is_debug_enabled = 0
3558; GPRIDX-NEXT:     is_xnack_enabled = 1
3559; GPRIDX-NEXT:     workitem_private_segment_byte_size = 0
3560; GPRIDX-NEXT:     workgroup_group_segment_byte_size = 0
3561; GPRIDX-NEXT:     gds_segment_byte_size = 0
3562; GPRIDX-NEXT:     kernarg_segment_byte_size = 28
3563; GPRIDX-NEXT:     workgroup_fbarrier_count = 0
3564; GPRIDX-NEXT:     wavefront_sgpr_count = 6
3565; GPRIDX-NEXT:     workitem_vgpr_count = 2
3566; GPRIDX-NEXT:     reserved_vgpr_first = 0
3567; GPRIDX-NEXT:     reserved_vgpr_count = 0
3568; GPRIDX-NEXT:     reserved_sgpr_first = 0
3569; GPRIDX-NEXT:     reserved_sgpr_count = 0
3570; GPRIDX-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
3571; GPRIDX-NEXT:     debug_private_segment_buffer_sgpr = 0
3572; GPRIDX-NEXT:     kernarg_segment_alignment = 4
3573; GPRIDX-NEXT:     group_segment_alignment = 4
3574; GPRIDX-NEXT:     private_segment_alignment = 4
3575; GPRIDX-NEXT:     wavefront_size = 6
3576; GPRIDX-NEXT:     call_convention = -1
3577; GPRIDX-NEXT:     runtime_loader_kernel_symbol = 0
3578; GPRIDX-NEXT:    .end_amd_kernel_code_t
3579; GPRIDX-NEXT:  ; %bb.0: ; %entry
3580; GPRIDX-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3581; GPRIDX-NEXT:    s_load_dword s2, s[4:5], 0x8
3582; GPRIDX-NEXT:    v_mov_b32_e32 v1, 0
3583; GPRIDX-NEXT:    s_waitcnt lgkmcnt(0)
3584; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 1
3585; GPRIDX-NEXT:    s_cselect_b32 s3, 2.0, 1.0
3586; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 2
3587; GPRIDX-NEXT:    s_cselect_b32 s3, 0x40400000, s3
3588; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 3
3589; GPRIDX-NEXT:    s_cselect_b32 s2, 4.0, s3
3590; GPRIDX-NEXT:    v_mov_b32_e32 v0, s2
3591; GPRIDX-NEXT:    global_store_dword v1, v0, s[0:1]
3592; GPRIDX-NEXT:    s_endpgm
3593;
3594; MOVREL-LABEL: dyn_extract_v4f32_s_s_s:
3595; MOVREL:         .amd_kernel_code_t
3596; MOVREL-NEXT:     amd_code_version_major = 1
3597; MOVREL-NEXT:     amd_code_version_minor = 2
3598; MOVREL-NEXT:     amd_machine_kind = 1
3599; MOVREL-NEXT:     amd_machine_version_major = 8
3600; MOVREL-NEXT:     amd_machine_version_minor = 0
3601; MOVREL-NEXT:     amd_machine_version_stepping = 3
3602; MOVREL-NEXT:     kernel_code_entry_byte_offset = 256
3603; MOVREL-NEXT:     kernel_code_prefetch_byte_size = 0
3604; MOVREL-NEXT:     granulated_workitem_vgpr_count = 0
3605; MOVREL-NEXT:     granulated_wavefront_sgpr_count = 0
3606; MOVREL-NEXT:     priority = 0
3607; MOVREL-NEXT:     float_mode = 240
3608; MOVREL-NEXT:     priv = 0
3609; MOVREL-NEXT:     enable_dx10_clamp = 1
3610; MOVREL-NEXT:     debug_mode = 0
3611; MOVREL-NEXT:     enable_ieee_mode = 1
3612; MOVREL-NEXT:     enable_wgp_mode = 0
3613; MOVREL-NEXT:     enable_mem_ordered = 0
3614; MOVREL-NEXT:     enable_fwd_progress = 0
3615; MOVREL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
3616; MOVREL-NEXT:     user_sgpr_count = 6
3617; MOVREL-NEXT:     enable_trap_handler = 0
3618; MOVREL-NEXT:     enable_sgpr_workgroup_id_x = 1
3619; MOVREL-NEXT:     enable_sgpr_workgroup_id_y = 0
3620; MOVREL-NEXT:     enable_sgpr_workgroup_id_z = 0
3621; MOVREL-NEXT:     enable_sgpr_workgroup_info = 0
3622; MOVREL-NEXT:     enable_vgpr_workitem_id = 0
3623; MOVREL-NEXT:     enable_exception_msb = 0
3624; MOVREL-NEXT:     granulated_lds_size = 0
3625; MOVREL-NEXT:     enable_exception = 0
3626; MOVREL-NEXT:     enable_sgpr_private_segment_buffer = 1
3627; MOVREL-NEXT:     enable_sgpr_dispatch_ptr = 0
3628; MOVREL-NEXT:     enable_sgpr_queue_ptr = 0
3629; MOVREL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
3630; MOVREL-NEXT:     enable_sgpr_dispatch_id = 0
3631; MOVREL-NEXT:     enable_sgpr_flat_scratch_init = 0
3632; MOVREL-NEXT:     enable_sgpr_private_segment_size = 0
3633; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
3634; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
3635; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
3636; MOVREL-NEXT:     enable_wavefront_size32 = 0
3637; MOVREL-NEXT:     enable_ordered_append_gds = 0
3638; MOVREL-NEXT:     private_element_size = 1
3639; MOVREL-NEXT:     is_ptr64 = 1
3640; MOVREL-NEXT:     is_dynamic_callstack = 0
3641; MOVREL-NEXT:     is_debug_enabled = 0
3642; MOVREL-NEXT:     is_xnack_enabled = 0
3643; MOVREL-NEXT:     workitem_private_segment_byte_size = 0
3644; MOVREL-NEXT:     workgroup_group_segment_byte_size = 0
3645; MOVREL-NEXT:     gds_segment_byte_size = 0
3646; MOVREL-NEXT:     kernarg_segment_byte_size = 28
3647; MOVREL-NEXT:     workgroup_fbarrier_count = 0
3648; MOVREL-NEXT:     wavefront_sgpr_count = 6
3649; MOVREL-NEXT:     workitem_vgpr_count = 3
3650; MOVREL-NEXT:     reserved_vgpr_first = 0
3651; MOVREL-NEXT:     reserved_vgpr_count = 0
3652; MOVREL-NEXT:     reserved_sgpr_first = 0
3653; MOVREL-NEXT:     reserved_sgpr_count = 0
3654; MOVREL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
3655; MOVREL-NEXT:     debug_private_segment_buffer_sgpr = 0
3656; MOVREL-NEXT:     kernarg_segment_alignment = 4
3657; MOVREL-NEXT:     group_segment_alignment = 4
3658; MOVREL-NEXT:     private_segment_alignment = 4
3659; MOVREL-NEXT:     wavefront_size = 6
3660; MOVREL-NEXT:     call_convention = -1
3661; MOVREL-NEXT:     runtime_loader_kernel_symbol = 0
3662; MOVREL-NEXT:    .end_amd_kernel_code_t
3663; MOVREL-NEXT:  ; %bb.0: ; %entry
3664; MOVREL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3665; MOVREL-NEXT:    s_load_dword s2, s[4:5], 0x8
3666; MOVREL-NEXT:    s_waitcnt lgkmcnt(0)
3667; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
3668; MOVREL-NEXT:    s_cmp_eq_u32 s2, 1
3669; MOVREL-NEXT:    s_cselect_b32 s3, 2.0, 1.0
3670; MOVREL-NEXT:    s_cmp_eq_u32 s2, 2
3671; MOVREL-NEXT:    s_cselect_b32 s3, 0x40400000, s3
3672; MOVREL-NEXT:    s_cmp_eq_u32 s2, 3
3673; MOVREL-NEXT:    s_cselect_b32 s2, 4.0, s3
3674; MOVREL-NEXT:    v_mov_b32_e32 v2, s2
3675; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
3676; MOVREL-NEXT:    flat_store_dword v[0:1], v2
3677; MOVREL-NEXT:    s_endpgm
3678;
3679; GFX10-LABEL: dyn_extract_v4f32_s_s_s:
3680; GFX10:         .amd_kernel_code_t
3681; GFX10-NEXT:     amd_code_version_major = 1
3682; GFX10-NEXT:     amd_code_version_minor = 2
3683; GFX10-NEXT:     amd_machine_kind = 1
3684; GFX10-NEXT:     amd_machine_version_major = 10
3685; GFX10-NEXT:     amd_machine_version_minor = 1
3686; GFX10-NEXT:     amd_machine_version_stepping = 0
3687; GFX10-NEXT:     kernel_code_entry_byte_offset = 256
3688; GFX10-NEXT:     kernel_code_prefetch_byte_size = 0
3689; GFX10-NEXT:     granulated_workitem_vgpr_count = 0
3690; GFX10-NEXT:     granulated_wavefront_sgpr_count = 0
3691; GFX10-NEXT:     priority = 0
3692; GFX10-NEXT:     float_mode = 240
3693; GFX10-NEXT:     priv = 0
3694; GFX10-NEXT:     enable_dx10_clamp = 1
3695; GFX10-NEXT:     debug_mode = 0
3696; GFX10-NEXT:     enable_ieee_mode = 1
3697; GFX10-NEXT:     enable_wgp_mode = 1
3698; GFX10-NEXT:     enable_mem_ordered = 1
3699; GFX10-NEXT:     enable_fwd_progress = 0
3700; GFX10-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
3701; GFX10-NEXT:     user_sgpr_count = 6
3702; GFX10-NEXT:     enable_trap_handler = 0
3703; GFX10-NEXT:     enable_sgpr_workgroup_id_x = 1
3704; GFX10-NEXT:     enable_sgpr_workgroup_id_y = 0
3705; GFX10-NEXT:     enable_sgpr_workgroup_id_z = 0
3706; GFX10-NEXT:     enable_sgpr_workgroup_info = 0
3707; GFX10-NEXT:     enable_vgpr_workitem_id = 0
3708; GFX10-NEXT:     enable_exception_msb = 0
3709; GFX10-NEXT:     granulated_lds_size = 0
3710; GFX10-NEXT:     enable_exception = 0
3711; GFX10-NEXT:     enable_sgpr_private_segment_buffer = 1
3712; GFX10-NEXT:     enable_sgpr_dispatch_ptr = 0
3713; GFX10-NEXT:     enable_sgpr_queue_ptr = 0
3714; GFX10-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
3715; GFX10-NEXT:     enable_sgpr_dispatch_id = 0
3716; GFX10-NEXT:     enable_sgpr_flat_scratch_init = 0
3717; GFX10-NEXT:     enable_sgpr_private_segment_size = 0
3718; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
3719; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
3720; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
3721; GFX10-NEXT:     enable_wavefront_size32 = 1
3722; GFX10-NEXT:     enable_ordered_append_gds = 0
3723; GFX10-NEXT:     private_element_size = 1
3724; GFX10-NEXT:     is_ptr64 = 1
3725; GFX10-NEXT:     is_dynamic_callstack = 0
3726; GFX10-NEXT:     is_debug_enabled = 0
3727; GFX10-NEXT:     is_xnack_enabled = 1
3728; GFX10-NEXT:     workitem_private_segment_byte_size = 0
3729; GFX10-NEXT:     workgroup_group_segment_byte_size = 0
3730; GFX10-NEXT:     gds_segment_byte_size = 0
3731; GFX10-NEXT:     kernarg_segment_byte_size = 28
3732; GFX10-NEXT:     workgroup_fbarrier_count = 0
3733; GFX10-NEXT:     wavefront_sgpr_count = 6
3734; GFX10-NEXT:     workitem_vgpr_count = 2
3735; GFX10-NEXT:     reserved_vgpr_first = 0
3736; GFX10-NEXT:     reserved_vgpr_count = 0
3737; GFX10-NEXT:     reserved_sgpr_first = 0
3738; GFX10-NEXT:     reserved_sgpr_count = 0
3739; GFX10-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
3740; GFX10-NEXT:     debug_private_segment_buffer_sgpr = 0
3741; GFX10-NEXT:     kernarg_segment_alignment = 4
3742; GFX10-NEXT:     group_segment_alignment = 4
3743; GFX10-NEXT:     private_segment_alignment = 4
3744; GFX10-NEXT:     wavefront_size = 5
3745; GFX10-NEXT:     call_convention = -1
3746; GFX10-NEXT:     runtime_loader_kernel_symbol = 0
3747; GFX10-NEXT:    .end_amd_kernel_code_t
3748; GFX10-NEXT:  ; %bb.0: ; %entry
3749; GFX10-NEXT:    s_clause 0x1
3750; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8
3751; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3752; GFX10-NEXT:    v_mov_b32_e32 v1, 0
3753; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
3754; GFX10-NEXT:    s_cmp_eq_u32 s2, 1
3755; GFX10-NEXT:    s_cselect_b32 s3, 2.0, 1.0
3756; GFX10-NEXT:    s_cmp_eq_u32 s2, 2
3757; GFX10-NEXT:    s_cselect_b32 s3, 0x40400000, s3
3758; GFX10-NEXT:    s_cmp_eq_u32 s2, 3
3759; GFX10-NEXT:    s_cselect_b32 s2, 4.0, s3
3760; GFX10-NEXT:    v_mov_b32_e32 v0, s2
3761; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
3762; GFX10-NEXT:    s_endpgm
3763entry:
3764  %ext = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %sel
3765  store float %ext, float addrspace(1)* %out
3766  ret void
3767}
3768
3769define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(double addrspace(1)* %out, i32 %sel) {
3770; GPRIDX-LABEL: dyn_extract_v4f64_s_s_s:
3771; GPRIDX:         .amd_kernel_code_t
3772; GPRIDX-NEXT:     amd_code_version_major = 1
3773; GPRIDX-NEXT:     amd_code_version_minor = 2
3774; GPRIDX-NEXT:     amd_machine_kind = 1
3775; GPRIDX-NEXT:     amd_machine_version_major = 9
3776; GPRIDX-NEXT:     amd_machine_version_minor = 0
3777; GPRIDX-NEXT:     amd_machine_version_stepping = 0
3778; GPRIDX-NEXT:     kernel_code_entry_byte_offset = 256
3779; GPRIDX-NEXT:     kernel_code_prefetch_byte_size = 0
3780; GPRIDX-NEXT:     granulated_workitem_vgpr_count = 0
3781; GPRIDX-NEXT:     granulated_wavefront_sgpr_count = 0
3782; GPRIDX-NEXT:     priority = 0
3783; GPRIDX-NEXT:     float_mode = 240
3784; GPRIDX-NEXT:     priv = 0
3785; GPRIDX-NEXT:     enable_dx10_clamp = 1
3786; GPRIDX-NEXT:     debug_mode = 0
3787; GPRIDX-NEXT:     enable_ieee_mode = 1
3788; GPRIDX-NEXT:     enable_wgp_mode = 0
3789; GPRIDX-NEXT:     enable_mem_ordered = 0
3790; GPRIDX-NEXT:     enable_fwd_progress = 0
3791; GPRIDX-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
3792; GPRIDX-NEXT:     user_sgpr_count = 6
3793; GPRIDX-NEXT:     enable_trap_handler = 0
3794; GPRIDX-NEXT:     enable_sgpr_workgroup_id_x = 1
3795; GPRIDX-NEXT:     enable_sgpr_workgroup_id_y = 0
3796; GPRIDX-NEXT:     enable_sgpr_workgroup_id_z = 0
3797; GPRIDX-NEXT:     enable_sgpr_workgroup_info = 0
3798; GPRIDX-NEXT:     enable_vgpr_workitem_id = 0
3799; GPRIDX-NEXT:     enable_exception_msb = 0
3800; GPRIDX-NEXT:     granulated_lds_size = 0
3801; GPRIDX-NEXT:     enable_exception = 0
3802; GPRIDX-NEXT:     enable_sgpr_private_segment_buffer = 1
3803; GPRIDX-NEXT:     enable_sgpr_dispatch_ptr = 0
3804; GPRIDX-NEXT:     enable_sgpr_queue_ptr = 0
3805; GPRIDX-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
3806; GPRIDX-NEXT:     enable_sgpr_dispatch_id = 0
3807; GPRIDX-NEXT:     enable_sgpr_flat_scratch_init = 0
3808; GPRIDX-NEXT:     enable_sgpr_private_segment_size = 0
3809; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
3810; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
3811; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
3812; GPRIDX-NEXT:     enable_wavefront_size32 = 0
3813; GPRIDX-NEXT:     enable_ordered_append_gds = 0
3814; GPRIDX-NEXT:     private_element_size = 1
3815; GPRIDX-NEXT:     is_ptr64 = 1
3816; GPRIDX-NEXT:     is_dynamic_callstack = 0
3817; GPRIDX-NEXT:     is_debug_enabled = 0
3818; GPRIDX-NEXT:     is_xnack_enabled = 1
3819; GPRIDX-NEXT:     workitem_private_segment_byte_size = 0
3820; GPRIDX-NEXT:     workgroup_group_segment_byte_size = 0
3821; GPRIDX-NEXT:     gds_segment_byte_size = 0
3822; GPRIDX-NEXT:     kernarg_segment_byte_size = 28
3823; GPRIDX-NEXT:     workgroup_fbarrier_count = 0
3824; GPRIDX-NEXT:     wavefront_sgpr_count = 7
3825; GPRIDX-NEXT:     workitem_vgpr_count = 3
3826; GPRIDX-NEXT:     reserved_vgpr_first = 0
3827; GPRIDX-NEXT:     reserved_vgpr_count = 0
3828; GPRIDX-NEXT:     reserved_sgpr_first = 0
3829; GPRIDX-NEXT:     reserved_sgpr_count = 0
3830; GPRIDX-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
3831; GPRIDX-NEXT:     debug_private_segment_buffer_sgpr = 0
3832; GPRIDX-NEXT:     kernarg_segment_alignment = 4
3833; GPRIDX-NEXT:     group_segment_alignment = 4
3834; GPRIDX-NEXT:     private_segment_alignment = 4
3835; GPRIDX-NEXT:     wavefront_size = 6
3836; GPRIDX-NEXT:     call_convention = -1
3837; GPRIDX-NEXT:     runtime_loader_kernel_symbol = 0
3838; GPRIDX-NEXT:    .end_amd_kernel_code_t
3839; GPRIDX-NEXT:  ; %bb.0: ; %entry
3840; GPRIDX-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3841; GPRIDX-NEXT:    s_load_dword s6, s[4:5], 0x8
3842; GPRIDX-NEXT:    s_mov_b32 s2, 0
3843; GPRIDX-NEXT:    s_mov_b32 s3, 0x40080000
3844; GPRIDX-NEXT:    v_mov_b32_e32 v2, 0
3845; GPRIDX-NEXT:    s_waitcnt lgkmcnt(0)
3846; GPRIDX-NEXT:    s_cmp_eq_u32 s6, 1
3847; GPRIDX-NEXT:    s_cselect_b64 s[4:5], 2.0, 1.0
3848; GPRIDX-NEXT:    s_cmp_eq_u32 s6, 2
3849; GPRIDX-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
3850; GPRIDX-NEXT:    s_cmp_eq_u32 s6, 3
3851; GPRIDX-NEXT:    s_cselect_b64 s[2:3], 4.0, s[2:3]
3852; GPRIDX-NEXT:    v_mov_b32_e32 v0, s2
3853; GPRIDX-NEXT:    v_mov_b32_e32 v1, s3
3854; GPRIDX-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
3855; GPRIDX-NEXT:    s_endpgm
3856;
3857; MOVREL-LABEL: dyn_extract_v4f64_s_s_s:
3858; MOVREL:         .amd_kernel_code_t
3859; MOVREL-NEXT:     amd_code_version_major = 1
3860; MOVREL-NEXT:     amd_code_version_minor = 2
3861; MOVREL-NEXT:     amd_machine_kind = 1
3862; MOVREL-NEXT:     amd_machine_version_major = 8
3863; MOVREL-NEXT:     amd_machine_version_minor = 0
3864; MOVREL-NEXT:     amd_machine_version_stepping = 3
3865; MOVREL-NEXT:     kernel_code_entry_byte_offset = 256
3866; MOVREL-NEXT:     kernel_code_prefetch_byte_size = 0
3867; MOVREL-NEXT:     granulated_workitem_vgpr_count = 0
3868; MOVREL-NEXT:     granulated_wavefront_sgpr_count = 0
3869; MOVREL-NEXT:     priority = 0
3870; MOVREL-NEXT:     float_mode = 240
3871; MOVREL-NEXT:     priv = 0
3872; MOVREL-NEXT:     enable_dx10_clamp = 1
3873; MOVREL-NEXT:     debug_mode = 0
3874; MOVREL-NEXT:     enable_ieee_mode = 1
3875; MOVREL-NEXT:     enable_wgp_mode = 0
3876; MOVREL-NEXT:     enable_mem_ordered = 0
3877; MOVREL-NEXT:     enable_fwd_progress = 0
3878; MOVREL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
3879; MOVREL-NEXT:     user_sgpr_count = 6
3880; MOVREL-NEXT:     enable_trap_handler = 0
3881; MOVREL-NEXT:     enable_sgpr_workgroup_id_x = 1
3882; MOVREL-NEXT:     enable_sgpr_workgroup_id_y = 0
3883; MOVREL-NEXT:     enable_sgpr_workgroup_id_z = 0
3884; MOVREL-NEXT:     enable_sgpr_workgroup_info = 0
3885; MOVREL-NEXT:     enable_vgpr_workitem_id = 0
3886; MOVREL-NEXT:     enable_exception_msb = 0
3887; MOVREL-NEXT:     granulated_lds_size = 0
3888; MOVREL-NEXT:     enable_exception = 0
3889; MOVREL-NEXT:     enable_sgpr_private_segment_buffer = 1
3890; MOVREL-NEXT:     enable_sgpr_dispatch_ptr = 0
3891; MOVREL-NEXT:     enable_sgpr_queue_ptr = 0
3892; MOVREL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
3893; MOVREL-NEXT:     enable_sgpr_dispatch_id = 0
3894; MOVREL-NEXT:     enable_sgpr_flat_scratch_init = 0
3895; MOVREL-NEXT:     enable_sgpr_private_segment_size = 0
3896; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
3897; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
3898; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
3899; MOVREL-NEXT:     enable_wavefront_size32 = 0
3900; MOVREL-NEXT:     enable_ordered_append_gds = 0
3901; MOVREL-NEXT:     private_element_size = 1
3902; MOVREL-NEXT:     is_ptr64 = 1
3903; MOVREL-NEXT:     is_dynamic_callstack = 0
3904; MOVREL-NEXT:     is_debug_enabled = 0
3905; MOVREL-NEXT:     is_xnack_enabled = 0
3906; MOVREL-NEXT:     workitem_private_segment_byte_size = 0
3907; MOVREL-NEXT:     workgroup_group_segment_byte_size = 0
3908; MOVREL-NEXT:     gds_segment_byte_size = 0
3909; MOVREL-NEXT:     kernarg_segment_byte_size = 28
3910; MOVREL-NEXT:     workgroup_fbarrier_count = 0
3911; MOVREL-NEXT:     wavefront_sgpr_count = 7
3912; MOVREL-NEXT:     workitem_vgpr_count = 4
3913; MOVREL-NEXT:     reserved_vgpr_first = 0
3914; MOVREL-NEXT:     reserved_vgpr_count = 0
3915; MOVREL-NEXT:     reserved_sgpr_first = 0
3916; MOVREL-NEXT:     reserved_sgpr_count = 0
3917; MOVREL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
3918; MOVREL-NEXT:     debug_private_segment_buffer_sgpr = 0
3919; MOVREL-NEXT:     kernarg_segment_alignment = 4
3920; MOVREL-NEXT:     group_segment_alignment = 4
3921; MOVREL-NEXT:     private_segment_alignment = 4
3922; MOVREL-NEXT:     wavefront_size = 6
3923; MOVREL-NEXT:     call_convention = -1
3924; MOVREL-NEXT:     runtime_loader_kernel_symbol = 0
3925; MOVREL-NEXT:    .end_amd_kernel_code_t
3926; MOVREL-NEXT:  ; %bb.0: ; %entry
3927; MOVREL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3928; MOVREL-NEXT:    s_load_dword s6, s[4:5], 0x8
3929; MOVREL-NEXT:    s_mov_b32 s2, 0
3930; MOVREL-NEXT:    s_mov_b32 s3, 0x40080000
3931; MOVREL-NEXT:    s_waitcnt lgkmcnt(0)
3932; MOVREL-NEXT:    v_mov_b32_e32 v3, s1
3933; MOVREL-NEXT:    s_cmp_eq_u32 s6, 1
3934; MOVREL-NEXT:    s_cselect_b64 s[4:5], 2.0, 1.0
3935; MOVREL-NEXT:    s_cmp_eq_u32 s6, 2
3936; MOVREL-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
3937; MOVREL-NEXT:    s_cmp_eq_u32 s6, 3
3938; MOVREL-NEXT:    s_cselect_b64 s[2:3], 4.0, s[2:3]
3939; MOVREL-NEXT:    v_mov_b32_e32 v0, s2
3940; MOVREL-NEXT:    v_mov_b32_e32 v1, s3
3941; MOVREL-NEXT:    v_mov_b32_e32 v2, s0
3942; MOVREL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
3943; MOVREL-NEXT:    s_endpgm
3944;
3945; GFX10-LABEL: dyn_extract_v4f64_s_s_s:
3946; GFX10:         .amd_kernel_code_t
3947; GFX10-NEXT:     amd_code_version_major = 1
3948; GFX10-NEXT:     amd_code_version_minor = 2
3949; GFX10-NEXT:     amd_machine_kind = 1
3950; GFX10-NEXT:     amd_machine_version_major = 10
3951; GFX10-NEXT:     amd_machine_version_minor = 1
3952; GFX10-NEXT:     amd_machine_version_stepping = 0
3953; GFX10-NEXT:     kernel_code_entry_byte_offset = 256
3954; GFX10-NEXT:     kernel_code_prefetch_byte_size = 0
3955; GFX10-NEXT:     granulated_workitem_vgpr_count = 0
3956; GFX10-NEXT:     granulated_wavefront_sgpr_count = 0
3957; GFX10-NEXT:     priority = 0
3958; GFX10-NEXT:     float_mode = 240
3959; GFX10-NEXT:     priv = 0
3960; GFX10-NEXT:     enable_dx10_clamp = 1
3961; GFX10-NEXT:     debug_mode = 0
3962; GFX10-NEXT:     enable_ieee_mode = 1
3963; GFX10-NEXT:     enable_wgp_mode = 1
3964; GFX10-NEXT:     enable_mem_ordered = 1
3965; GFX10-NEXT:     enable_fwd_progress = 0
3966; GFX10-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
3967; GFX10-NEXT:     user_sgpr_count = 6
3968; GFX10-NEXT:     enable_trap_handler = 0
3969; GFX10-NEXT:     enable_sgpr_workgroup_id_x = 1
3970; GFX10-NEXT:     enable_sgpr_workgroup_id_y = 0
3971; GFX10-NEXT:     enable_sgpr_workgroup_id_z = 0
3972; GFX10-NEXT:     enable_sgpr_workgroup_info = 0
3973; GFX10-NEXT:     enable_vgpr_workitem_id = 0
3974; GFX10-NEXT:     enable_exception_msb = 0
3975; GFX10-NEXT:     granulated_lds_size = 0
3976; GFX10-NEXT:     enable_exception = 0
3977; GFX10-NEXT:     enable_sgpr_private_segment_buffer = 1
3978; GFX10-NEXT:     enable_sgpr_dispatch_ptr = 0
3979; GFX10-NEXT:     enable_sgpr_queue_ptr = 0
3980; GFX10-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
3981; GFX10-NEXT:     enable_sgpr_dispatch_id = 0
3982; GFX10-NEXT:     enable_sgpr_flat_scratch_init = 0
3983; GFX10-NEXT:     enable_sgpr_private_segment_size = 0
3984; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
3985; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
3986; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
3987; GFX10-NEXT:     enable_wavefront_size32 = 1
3988; GFX10-NEXT:     enable_ordered_append_gds = 0
3989; GFX10-NEXT:     private_element_size = 1
3990; GFX10-NEXT:     is_ptr64 = 1
3991; GFX10-NEXT:     is_dynamic_callstack = 0
3992; GFX10-NEXT:     is_debug_enabled = 0
3993; GFX10-NEXT:     is_xnack_enabled = 1
3994; GFX10-NEXT:     workitem_private_segment_byte_size = 0
3995; GFX10-NEXT:     workgroup_group_segment_byte_size = 0
3996; GFX10-NEXT:     gds_segment_byte_size = 0
3997; GFX10-NEXT:     kernarg_segment_byte_size = 28
3998; GFX10-NEXT:     workgroup_fbarrier_count = 0
3999; GFX10-NEXT:     wavefront_sgpr_count = 7
4000; GFX10-NEXT:     workitem_vgpr_count = 3
4001; GFX10-NEXT:     reserved_vgpr_first = 0
4002; GFX10-NEXT:     reserved_vgpr_count = 0
4003; GFX10-NEXT:     reserved_sgpr_first = 0
4004; GFX10-NEXT:     reserved_sgpr_count = 0
4005; GFX10-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
4006; GFX10-NEXT:     debug_private_segment_buffer_sgpr = 0
4007; GFX10-NEXT:     kernarg_segment_alignment = 4
4008; GFX10-NEXT:     group_segment_alignment = 4
4009; GFX10-NEXT:     private_segment_alignment = 4
4010; GFX10-NEXT:     wavefront_size = 5
4011; GFX10-NEXT:     call_convention = -1
4012; GFX10-NEXT:     runtime_loader_kernel_symbol = 0
4013; GFX10-NEXT:    .end_amd_kernel_code_t
4014; GFX10-NEXT:  ; %bb.0: ; %entry
4015; GFX10-NEXT:    s_clause 0x1
4016; GFX10-NEXT:    s_load_dword s6, s[4:5], 0x8
4017; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4018; GFX10-NEXT:    s_mov_b32 s2, 0
4019; GFX10-NEXT:    s_mov_b32 s3, 0x40080000
4020; GFX10-NEXT:    v_mov_b32_e32 v2, 0
4021; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
4022; GFX10-NEXT:    s_cmp_eq_u32 s6, 1
4023; GFX10-NEXT:    s_cselect_b64 s[4:5], 2.0, 1.0
4024; GFX10-NEXT:    s_cmp_eq_u32 s6, 2
4025; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
4026; GFX10-NEXT:    s_cmp_eq_u32 s6, 3
4027; GFX10-NEXT:    s_cselect_b64 s[2:3], 4.0, s[2:3]
4028; GFX10-NEXT:    v_mov_b32_e32 v0, s2
4029; GFX10-NEXT:    v_mov_b32_e32 v1, s3
4030; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
4031; GFX10-NEXT:    s_endpgm
4032entry:
4033  %ext = extractelement <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, i32 %sel
4034  store double %ext, double addrspace(1)* %out
4035  ret void
4036}
4037
4038define i32 @v_extract_v64i32_7(<64 x i32> addrspace(1)* %ptr) {
4039; GPRIDX-LABEL: v_extract_v64i32_7:
4040; GPRIDX:       ; %bb.0:
4041; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4042; GPRIDX-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off offset:16
4043; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
4044; GPRIDX-NEXT:    v_mov_b32_e32 v0, v7
4045; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
4046;
4047; MOVREL-LABEL: v_extract_v64i32_7:
4048; MOVREL:       ; %bb.0:
4049; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4050; MOVREL-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
4051; MOVREL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4052; MOVREL-NEXT:    flat_load_dwordx4 v[4:7], v[0:1]
4053; MOVREL-NEXT:    s_waitcnt vmcnt(0)
4054; MOVREL-NEXT:    v_mov_b32_e32 v0, v7
4055; MOVREL-NEXT:    s_setpc_b64 s[30:31]
4056;
4057; GFX10-LABEL: v_extract_v64i32_7:
4058; GFX10:       ; %bb.0:
4059; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4060; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4061; GFX10-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off offset:16
4062; GFX10-NEXT:    s_waitcnt vmcnt(0)
4063; GFX10-NEXT:    v_mov_b32_e32 v0, v7
4064; GFX10-NEXT:    s_setpc_b64 s[30:31]
4065  %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr
4066  %elt = extractelement <64 x i32> %vec, i32 7
4067  ret i32 %elt
4068}
4069
4070define i32 @v_extract_v64i32_32(<64 x i32> addrspace(1)* %ptr) {
4071; GPRIDX-LABEL: v_extract_v64i32_32:
4072; GPRIDX:       ; %bb.0:
4073; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4074; GPRIDX-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128
4075; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
4076; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
4077;
4078; MOVREL-LABEL: v_extract_v64i32_32:
4079; MOVREL:       ; %bb.0:
4080; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4081; MOVREL-NEXT:    s_mov_b64 s[4:5], 0x80
4082; MOVREL-NEXT:    v_mov_b32_e32 v2, s4
4083; MOVREL-NEXT:    v_mov_b32_e32 v3, s5
4084; MOVREL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
4085; MOVREL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
4086; MOVREL-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
4087; MOVREL-NEXT:    s_waitcnt vmcnt(0)
4088; MOVREL-NEXT:    s_setpc_b64 s[30:31]
4089;
4090; GFX10-LABEL: v_extract_v64i32_32:
4091; GFX10:       ; %bb.0:
4092; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4093; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4094; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128
4095; GFX10-NEXT:    s_waitcnt vmcnt(0)
4096; GFX10-NEXT:    s_setpc_b64 s[30:31]
4097  %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr
4098  %elt = extractelement <64 x i32> %vec, i32 32
4099  ret i32 %elt
4100}
4101
4102define i32 @v_extract_v64i32_33(<64 x i32> addrspace(1)* %ptr) {
4103; GPRIDX-LABEL: v_extract_v64i32_33:
4104; GPRIDX:       ; %bb.0:
4105; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4106; GPRIDX-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128
4107; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
4108; GPRIDX-NEXT:    v_mov_b32_e32 v0, v1
4109; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
4110;
4111; MOVREL-LABEL: v_extract_v64i32_33:
4112; MOVREL:       ; %bb.0:
4113; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4114; MOVREL-NEXT:    s_mov_b64 s[4:5], 0x80
4115; MOVREL-NEXT:    v_mov_b32_e32 v2, s4
4116; MOVREL-NEXT:    v_mov_b32_e32 v3, s5
4117; MOVREL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
4118; MOVREL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
4119; MOVREL-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
4120; MOVREL-NEXT:    s_waitcnt vmcnt(0)
4121; MOVREL-NEXT:    v_mov_b32_e32 v0, v1
4122; MOVREL-NEXT:    s_setpc_b64 s[30:31]
4123;
4124; GFX10-LABEL: v_extract_v64i32_33:
4125; GFX10:       ; %bb.0:
4126; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4127; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4128; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128
4129; GFX10-NEXT:    s_waitcnt vmcnt(0)
4130; GFX10-NEXT:    v_mov_b32_e32 v0, v1
4131; GFX10-NEXT:    s_setpc_b64 s[30:31]
4132  %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr
4133  %elt = extractelement <64 x i32> %vec, i32 33
4134  ret i32 %elt
4135}
4136
4137define i32 @v_extract_v64i32_37(<64 x i32> addrspace(1)* %ptr) {
4138; GPRIDX-LABEL: v_extract_v64i32_37:
4139; GPRIDX:       ; %bb.0:
4140; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4141; GPRIDX-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off offset:144
4142; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
4143; GPRIDX-NEXT:    v_mov_b32_e32 v0, v5
4144; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
4145;
4146; MOVREL-LABEL: v_extract_v64i32_37:
4147; MOVREL:       ; %bb.0:
4148; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4149; MOVREL-NEXT:    v_add_u32_e32 v0, vcc, 0x90, v0
4150; MOVREL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4151; MOVREL-NEXT:    flat_load_dwordx4 v[4:7], v[0:1]
4152; MOVREL-NEXT:    s_waitcnt vmcnt(0)
4153; MOVREL-NEXT:    v_mov_b32_e32 v0, v5
4154; MOVREL-NEXT:    s_setpc_b64 s[30:31]
4155;
4156; GFX10-LABEL: v_extract_v64i32_37:
4157; GFX10:       ; %bb.0:
4158; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4159; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4160; GFX10-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off offset:144
4161; GFX10-NEXT:    s_waitcnt vmcnt(0)
4162; GFX10-NEXT:    v_mov_b32_e32 v0, v5
4163; GFX10-NEXT:    s_setpc_b64 s[30:31]
4164  %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr
4165  %elt = extractelement <64 x i32> %vec, i32 37
4166  ret i32 %elt
4167}
4168