1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
3
4; Check lowering of some large insertelement that use the stack
5; instead of register indexing.
6
7define amdgpu_kernel void @v_insert_v64i32_varidx(<64 x i32> addrspace(1)* %out.ptr, <64 x i32> addrspace(1)* %ptr, i32 %val, i32 %idx) #0 {
8; GCN-LABEL: v_insert_v64i32_varidx:
9; GCN:       ; %bb.0:
10; GCN-NEXT:    s_add_u32 s0, s0, s7
11; GCN-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x0
12; GCN-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x10
13; GCN-NEXT:    v_mov_b32_e32 v16, 0x100
14; GCN-NEXT:    s_addc_u32 s1, s1, 0
15; GCN-NEXT:    v_add_u32_e32 v31, 64, v16
16; GCN-NEXT:    s_waitcnt lgkmcnt(0)
17; GCN-NEXT:    s_load_dwordx16 s[12:27], s[10:11], 0x0
18; GCN-NEXT:    s_load_dwordx16 s[52:67], s[10:11], 0x40
19; GCN-NEXT:    s_load_dwordx16 s[36:51], s[10:11], 0x80
20; GCN-NEXT:    v_add_u32_e32 v32, 0x44, v16
21; GCN-NEXT:    v_add_u32_e32 v33, 0x48, v16
22; GCN-NEXT:    s_waitcnt lgkmcnt(0)
23; GCN-NEXT:    v_mov_b32_e32 v0, s12
24; GCN-NEXT:    v_mov_b32_e32 v1, s13
25; GCN-NEXT:    v_mov_b32_e32 v2, s14
26; GCN-NEXT:    v_mov_b32_e32 v3, s15
27; GCN-NEXT:    v_mov_b32_e32 v4, s16
28; GCN-NEXT:    v_mov_b32_e32 v5, s17
29; GCN-NEXT:    v_mov_b32_e32 v6, s18
30; GCN-NEXT:    v_mov_b32_e32 v7, s19
31; GCN-NEXT:    v_mov_b32_e32 v8, s20
32; GCN-NEXT:    v_mov_b32_e32 v9, s21
33; GCN-NEXT:    v_mov_b32_e32 v10, s22
34; GCN-NEXT:    v_mov_b32_e32 v11, s23
35; GCN-NEXT:    v_mov_b32_e32 v12, s24
36; GCN-NEXT:    v_mov_b32_e32 v13, s25
37; GCN-NEXT:    v_mov_b32_e32 v14, s26
38; GCN-NEXT:    v_mov_b32_e32 v15, s27
39; GCN-NEXT:    s_load_dwordx16 s[12:27], s[10:11], 0xc0
40; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:256
41; GCN-NEXT:    v_add_u32_e32 v0, 4, v16
42; GCN-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
43; GCN-NEXT:    v_mov_b32_e32 v1, s52
44; GCN-NEXT:    buffer_store_dword v1, v31, s[0:3], 0 offen
45; GCN-NEXT:    v_mov_b32_e32 v1, s53
46; GCN-NEXT:    buffer_store_dword v1, v32, s[0:3], 0 offen
47; GCN-NEXT:    v_mov_b32_e32 v1, s54
48; GCN-NEXT:    buffer_store_dword v1, v33, s[0:3], 0 offen
49; GCN-NEXT:    s_movk_i32 s4, 0x50
50; GCN-NEXT:    v_add_u32_e32 v34, 0x4c, v16
51; GCN-NEXT:    v_mov_b32_e32 v1, s55
52; GCN-NEXT:    buffer_store_dword v1, v34, s[0:3], 0 offen
53; GCN-NEXT:    v_add_u32_e32 v35, s4, v16
54; GCN-NEXT:    v_mov_b32_e32 v1, s56
55; GCN-NEXT:    buffer_store_dword v1, v35, s[0:3], 0 offen
56; GCN-NEXT:    v_add_u32_e32 v36, 0x54, v16
57; GCN-NEXT:    v_mov_b32_e32 v1, s57
58; GCN-NEXT:    buffer_store_dword v1, v36, s[0:3], 0 offen
59; GCN-NEXT:    v_add_u32_e32 v37, 0x58, v16
60; GCN-NEXT:    v_mov_b32_e32 v1, s58
61; GCN-NEXT:    buffer_store_dword v1, v37, s[0:3], 0 offen
62; GCN-NEXT:    s_movk_i32 s5, 0x60
63; GCN-NEXT:    v_add_u32_e32 v38, 0x5c, v16
64; GCN-NEXT:    v_mov_b32_e32 v1, s59
65; GCN-NEXT:    buffer_store_dword v1, v38, s[0:3], 0 offen
66; GCN-NEXT:    v_add_u32_e32 v39, s5, v16
67; GCN-NEXT:    v_mov_b32_e32 v1, s60
68; GCN-NEXT:    buffer_store_dword v1, v39, s[0:3], 0 offen
69; GCN-NEXT:    v_add_u32_e32 v40, 0x64, v16
70; GCN-NEXT:    v_mov_b32_e32 v1, s61
71; GCN-NEXT:    buffer_store_dword v1, v40, s[0:3], 0 offen
72; GCN-NEXT:    v_add_u32_e32 v41, 0x68, v16
73; GCN-NEXT:    v_mov_b32_e32 v1, s62
74; GCN-NEXT:    buffer_store_dword v1, v41, s[0:3], 0 offen
75; GCN-NEXT:    s_movk_i32 s10, 0x70
76; GCN-NEXT:    v_add_u32_e32 v42, 0x6c, v16
77; GCN-NEXT:    v_mov_b32_e32 v1, s63
78; GCN-NEXT:    buffer_store_dword v1, v42, s[0:3], 0 offen
79; GCN-NEXT:    v_add_u32_e32 v43, s10, v16
80; GCN-NEXT:    v_mov_b32_e32 v1, s64
81; GCN-NEXT:    buffer_store_dword v1, v43, s[0:3], 0 offen
82; GCN-NEXT:    v_add_u32_e32 v44, 0x74, v16
83; GCN-NEXT:    v_mov_b32_e32 v1, s65
84; GCN-NEXT:    buffer_store_dword v1, v44, s[0:3], 0 offen
85; GCN-NEXT:    v_add_u32_e32 v45, 0x78, v16
86; GCN-NEXT:    v_mov_b32_e32 v1, s66
87; GCN-NEXT:    buffer_store_dword v1, v45, s[0:3], 0 offen
88; GCN-NEXT:    v_add_u32_e32 v46, 0x7c, v16
89; GCN-NEXT:    v_mov_b32_e32 v1, s67
90; GCN-NEXT:    buffer_store_dword v1, v46, s[0:3], 0 offen
91; GCN-NEXT:    v_add_u32_e32 v47, 0x80, v16
92; GCN-NEXT:    v_mov_b32_e32 v1, s36
93; GCN-NEXT:    buffer_store_dword v1, v47, s[0:3], 0 offen
94; GCN-NEXT:    v_add_u32_e32 v48, 0x84, v16
95; GCN-NEXT:    v_mov_b32_e32 v1, s37
96; GCN-NEXT:    buffer_store_dword v1, v48, s[0:3], 0 offen
97; GCN-NEXT:    v_add_u32_e32 v49, 0x88, v16
98; GCN-NEXT:    v_mov_b32_e32 v1, s38
99; GCN-NEXT:    buffer_store_dword v1, v49, s[0:3], 0 offen
100; GCN-NEXT:    s_movk_i32 s11, 0x90
101; GCN-NEXT:    v_add_u32_e32 v50, 0x8c, v16
102; GCN-NEXT:    v_mov_b32_e32 v1, s39
103; GCN-NEXT:    buffer_store_dword v1, v50, s[0:3], 0 offen
104; GCN-NEXT:    v_add_u32_e32 v51, s11, v16
105; GCN-NEXT:    v_mov_b32_e32 v1, s40
106; GCN-NEXT:    buffer_store_dword v1, v51, s[0:3], 0 offen
107; GCN-NEXT:    v_add_u32_e32 v52, 0x94, v16
108; GCN-NEXT:    v_mov_b32_e32 v1, s41
109; GCN-NEXT:    buffer_store_dword v1, v52, s[0:3], 0 offen
110; GCN-NEXT:    v_add_u32_e32 v53, 0x98, v16
111; GCN-NEXT:    v_mov_b32_e32 v1, s42
112; GCN-NEXT:    buffer_store_dword v1, v53, s[0:3], 0 offen
113; GCN-NEXT:    s_movk_i32 s28, 0xa0
114; GCN-NEXT:    v_add_u32_e32 v54, 0x9c, v16
115; GCN-NEXT:    v_mov_b32_e32 v1, s43
116; GCN-NEXT:    buffer_store_dword v1, v54, s[0:3], 0 offen
117; GCN-NEXT:    v_add_u32_e32 v55, s28, v16
118; GCN-NEXT:    v_mov_b32_e32 v1, s44
119; GCN-NEXT:    buffer_store_dword v1, v55, s[0:3], 0 offen
120; GCN-NEXT:    v_add_u32_e32 v56, 0xa4, v16
121; GCN-NEXT:    v_mov_b32_e32 v1, s45
122; GCN-NEXT:    buffer_store_dword v1, v56, s[0:3], 0 offen
123; GCN-NEXT:    v_add_u32_e32 v57, 0xa8, v16
124; GCN-NEXT:    v_mov_b32_e32 v1, s46
125; GCN-NEXT:    buffer_store_dword v1, v57, s[0:3], 0 offen
126; GCN-NEXT:    s_movk_i32 s29, 0xb0
127; GCN-NEXT:    v_add_u32_e32 v58, 0xac, v16
128; GCN-NEXT:    v_mov_b32_e32 v1, s47
129; GCN-NEXT:    buffer_store_dword v1, v58, s[0:3], 0 offen
130; GCN-NEXT:    v_add_u32_e32 v59, s29, v16
131; GCN-NEXT:    v_mov_b32_e32 v1, s48
132; GCN-NEXT:    buffer_store_dword v1, v59, s[0:3], 0 offen
133; GCN-NEXT:    v_add_u32_e32 v60, 0xb4, v16
134; GCN-NEXT:    v_mov_b32_e32 v1, s49
135; GCN-NEXT:    buffer_store_dword v1, v60, s[0:3], 0 offen
136; GCN-NEXT:    v_add_u32_e32 v61, 0xb8, v16
137; GCN-NEXT:    v_mov_b32_e32 v1, s50
138; GCN-NEXT:    buffer_store_dword v1, v61, s[0:3], 0 offen
139; GCN-NEXT:    v_add_u32_e32 v62, 0xbc, v16
140; GCN-NEXT:    v_mov_b32_e32 v1, s51
141; GCN-NEXT:    buffer_store_dword v1, v62, s[0:3], 0 offen
142; GCN-NEXT:    s_waitcnt lgkmcnt(0)
143; GCN-NEXT:    v_mov_b32_e32 v1, s12
144; GCN-NEXT:    v_add_u32_e32 v63, 0xc0, v16
145; GCN-NEXT:    buffer_store_dword v1, v63, s[0:3], 0 offen
146; GCN-NEXT:    v_mov_b32_e32 v1, s13
147; GCN-NEXT:    v_add_u32_e32 v64, 0xc4, v16
148; GCN-NEXT:    buffer_store_dword v1, v64, s[0:3], 0 offen
149; GCN-NEXT:    v_mov_b32_e32 v1, s14
150; GCN-NEXT:    v_add_u32_e32 v65, 0xc8, v16
151; GCN-NEXT:    buffer_store_dword v1, v65, s[0:3], 0 offen
152; GCN-NEXT:    s_movk_i32 s12, 0xd0
153; GCN-NEXT:    v_add_u32_e32 v66, 0xcc, v16
154; GCN-NEXT:    v_mov_b32_e32 v1, s15
155; GCN-NEXT:    buffer_store_dword v1, v66, s[0:3], 0 offen
156; GCN-NEXT:    v_add_u32_e32 v67, s12, v16
157; GCN-NEXT:    v_mov_b32_e32 v1, s16
158; GCN-NEXT:    buffer_store_dword v1, v67, s[0:3], 0 offen
159; GCN-NEXT:    v_add_u32_e32 v68, 0xd4, v16
160; GCN-NEXT:    v_mov_b32_e32 v1, s17
161; GCN-NEXT:    buffer_store_dword v1, v68, s[0:3], 0 offen
162; GCN-NEXT:    v_add_u32_e32 v69, 0xd8, v16
163; GCN-NEXT:    v_mov_b32_e32 v1, s18
164; GCN-NEXT:    buffer_store_dword v1, v69, s[0:3], 0 offen
165; GCN-NEXT:    s_movk_i32 s13, 0xe0
166; GCN-NEXT:    v_add_u32_e32 v70, 0xdc, v16
167; GCN-NEXT:    v_mov_b32_e32 v1, s19
168; GCN-NEXT:    buffer_store_dword v1, v70, s[0:3], 0 offen
169; GCN-NEXT:    v_add_u32_e32 v71, s13, v16
170; GCN-NEXT:    v_mov_b32_e32 v1, s20
171; GCN-NEXT:    buffer_store_dword v1, v71, s[0:3], 0 offen
172; GCN-NEXT:    v_add_u32_e32 v72, 0xe4, v16
173; GCN-NEXT:    v_mov_b32_e32 v1, s21
174; GCN-NEXT:    buffer_store_dword v1, v72, s[0:3], 0 offen
175; GCN-NEXT:    v_add_u32_e32 v73, 0xe8, v16
176; GCN-NEXT:    v_mov_b32_e32 v1, s22
177; GCN-NEXT:    buffer_store_dword v1, v73, s[0:3], 0 offen
178; GCN-NEXT:    s_movk_i32 s14, 0xf0
179; GCN-NEXT:    v_add_u32_e32 v74, 0xec, v16
180; GCN-NEXT:    v_mov_b32_e32 v1, s23
181; GCN-NEXT:    buffer_store_dword v1, v74, s[0:3], 0 offen
182; GCN-NEXT:    v_add_u32_e32 v75, s14, v16
183; GCN-NEXT:    v_mov_b32_e32 v1, s24
184; GCN-NEXT:    buffer_store_dword v1, v75, s[0:3], 0 offen
185; GCN-NEXT:    v_add_u32_e32 v76, 0xf4, v16
186; GCN-NEXT:    v_mov_b32_e32 v1, s25
187; GCN-NEXT:    s_and_b32 s7, s7, 63
188; GCN-NEXT:    buffer_store_dword v1, v76, s[0:3], 0 offen
189; GCN-NEXT:    v_add_u32_e32 v77, 0xf8, v16
190; GCN-NEXT:    v_mov_b32_e32 v1, s26
191; GCN-NEXT:    v_add_u32_e32 v17, 8, v16
192; GCN-NEXT:    buffer_store_dword v1, v77, s[0:3], 0 offen
193; GCN-NEXT:    v_add_u32_e32 v78, 0xfc, v16
194; GCN-NEXT:    v_mov_b32_e32 v1, s27
195; GCN-NEXT:    s_lshl_b32 s7, s7, 2
196; GCN-NEXT:    buffer_store_dword v2, v17, s[0:3], 0 offen
197; GCN-NEXT:    v_add_u32_e32 v18, 12, v16
198; GCN-NEXT:    v_add_u32_e32 v19, 16, v16
199; GCN-NEXT:    v_add_u32_e32 v20, 20, v16
200; GCN-NEXT:    v_add_u32_e32 v21, 24, v16
201; GCN-NEXT:    v_add_u32_e32 v22, 28, v16
202; GCN-NEXT:    v_add_u32_e32 v23, 32, v16
203; GCN-NEXT:    v_add_u32_e32 v24, 36, v16
204; GCN-NEXT:    v_add_u32_e32 v25, 40, v16
205; GCN-NEXT:    v_add_u32_e32 v26, 44, v16
206; GCN-NEXT:    v_add_u32_e32 v27, 48, v16
207; GCN-NEXT:    v_add_u32_e32 v28, 52, v16
208; GCN-NEXT:    v_add_u32_e32 v29, 56, v16
209; GCN-NEXT:    v_add_u32_e32 v30, 60, v16
210; GCN-NEXT:    buffer_store_dword v1, v78, s[0:3], 0 offen
211; GCN-NEXT:    v_mov_b32_e32 v2, s6
212; GCN-NEXT:    v_add_u32_e32 v1, s7, v16
213; GCN-NEXT:    buffer_store_dword v3, v18, s[0:3], 0 offen
214; GCN-NEXT:    buffer_store_dword v4, v19, s[0:3], 0 offen
215; GCN-NEXT:    buffer_store_dword v5, v20, s[0:3], 0 offen
216; GCN-NEXT:    buffer_store_dword v6, v21, s[0:3], 0 offen
217; GCN-NEXT:    buffer_store_dword v7, v22, s[0:3], 0 offen
218; GCN-NEXT:    buffer_store_dword v8, v23, s[0:3], 0 offen
219; GCN-NEXT:    buffer_store_dword v9, v24, s[0:3], 0 offen
220; GCN-NEXT:    buffer_store_dword v10, v25, s[0:3], 0 offen
221; GCN-NEXT:    buffer_store_dword v11, v26, s[0:3], 0 offen
222; GCN-NEXT:    buffer_store_dword v12, v27, s[0:3], 0 offen
223; GCN-NEXT:    buffer_store_dword v13, v28, s[0:3], 0 offen
224; GCN-NEXT:    buffer_store_dword v14, v29, s[0:3], 0 offen
225; GCN-NEXT:    buffer_store_dword v15, v30, s[0:3], 0 offen
226; GCN-NEXT:    buffer_store_dword v2, v1, s[0:3], 0 offen
227; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
228; GCN-NEXT:    buffer_load_dword v2, v17, s[0:3], 0 offen
229; GCN-NEXT:    buffer_load_dword v3, v18, s[0:3], 0 offen
230; GCN-NEXT:    buffer_load_dword v4, v19, s[0:3], 0 offen
231; GCN-NEXT:    buffer_load_dword v5, v20, s[0:3], 0 offen
232; GCN-NEXT:    buffer_load_dword v6, v21, s[0:3], 0 offen
233; GCN-NEXT:    buffer_load_dword v7, v22, s[0:3], 0 offen
234; GCN-NEXT:    buffer_load_dword v8, v23, s[0:3], 0 offen
235; GCN-NEXT:    buffer_load_dword v9, v24, s[0:3], 0 offen
236; GCN-NEXT:    buffer_load_dword v10, v25, s[0:3], 0 offen
237; GCN-NEXT:    buffer_load_dword v11, v26, s[0:3], 0 offen
238; GCN-NEXT:    buffer_load_dword v12, v27, s[0:3], 0 offen
239; GCN-NEXT:    buffer_load_dword v13, v28, s[0:3], 0 offen
240; GCN-NEXT:    buffer_load_dword v14, v29, s[0:3], 0 offen
241; GCN-NEXT:    buffer_load_dword v15, v30, s[0:3], 0 offen
242; GCN-NEXT:    buffer_load_dword v16, v31, s[0:3], 0 offen
243; GCN-NEXT:    buffer_load_dword v17, v32, s[0:3], 0 offen
244; GCN-NEXT:    buffer_load_dword v18, v33, s[0:3], 0 offen
245; GCN-NEXT:    buffer_load_dword v19, v34, s[0:3], 0 offen
246; GCN-NEXT:    buffer_load_dword v20, v35, s[0:3], 0 offen
247; GCN-NEXT:    buffer_load_dword v21, v36, s[0:3], 0 offen
248; GCN-NEXT:    buffer_load_dword v22, v37, s[0:3], 0 offen
249; GCN-NEXT:    buffer_load_dword v23, v38, s[0:3], 0 offen
250; GCN-NEXT:    buffer_load_dword v24, v39, s[0:3], 0 offen
251; GCN-NEXT:    buffer_load_dword v25, v40, s[0:3], 0 offen
252; GCN-NEXT:    buffer_load_dword v26, v41, s[0:3], 0 offen
253; GCN-NEXT:    buffer_load_dword v27, v42, s[0:3], 0 offen
254; GCN-NEXT:    buffer_load_dword v28, v43, s[0:3], 0 offen
255; GCN-NEXT:    buffer_load_dword v29, v44, s[0:3], 0 offen
256; GCN-NEXT:    buffer_load_dword v30, v45, s[0:3], 0 offen
257; GCN-NEXT:    buffer_load_dword v31, v46, s[0:3], 0 offen
258; GCN-NEXT:    buffer_load_dword v32, v47, s[0:3], 0 offen
259; GCN-NEXT:    buffer_load_dword v33, v48, s[0:3], 0 offen
260; GCN-NEXT:    buffer_load_dword v34, v49, s[0:3], 0 offen
261; GCN-NEXT:    buffer_load_dword v35, v50, s[0:3], 0 offen
262; GCN-NEXT:    buffer_load_dword v36, v51, s[0:3], 0 offen
263; GCN-NEXT:    buffer_load_dword v37, v52, s[0:3], 0 offen
264; GCN-NEXT:    buffer_load_dword v38, v53, s[0:3], 0 offen
265; GCN-NEXT:    buffer_load_dword v39, v54, s[0:3], 0 offen
266; GCN-NEXT:    buffer_load_dword v40, v55, s[0:3], 0 offen
267; GCN-NEXT:    buffer_load_dword v41, v56, s[0:3], 0 offen
268; GCN-NEXT:    buffer_load_dword v42, v57, s[0:3], 0 offen
269; GCN-NEXT:    buffer_load_dword v43, v58, s[0:3], 0 offen
270; GCN-NEXT:    buffer_load_dword v44, v59, s[0:3], 0 offen
271; GCN-NEXT:    buffer_load_dword v45, v60, s[0:3], 0 offen
272; GCN-NEXT:    buffer_load_dword v46, v61, s[0:3], 0 offen
273; GCN-NEXT:    buffer_load_dword v47, v62, s[0:3], 0 offen
274; GCN-NEXT:    buffer_load_dword v48, v63, s[0:3], 0 offen
275; GCN-NEXT:    buffer_load_dword v49, v64, s[0:3], 0 offen
276; GCN-NEXT:    buffer_load_dword v50, v65, s[0:3], 0 offen
277; GCN-NEXT:    buffer_load_dword v51, v66, s[0:3], 0 offen
278; GCN-NEXT:    buffer_load_dword v52, v67, s[0:3], 0 offen
279; GCN-NEXT:    buffer_load_dword v53, v68, s[0:3], 0 offen
280; GCN-NEXT:    buffer_load_dword v54, v69, s[0:3], 0 offen
281; GCN-NEXT:    buffer_load_dword v55, v70, s[0:3], 0 offen
282; GCN-NEXT:    buffer_load_dword v56, v71, s[0:3], 0 offen
283; GCN-NEXT:    buffer_load_dword v57, v72, s[0:3], 0 offen
284; GCN-NEXT:    buffer_load_dword v58, v73, s[0:3], 0 offen
285; GCN-NEXT:    buffer_load_dword v59, v74, s[0:3], 0 offen
286; GCN-NEXT:    buffer_load_dword v60, v75, s[0:3], 0 offen
287; GCN-NEXT:    buffer_load_dword v61, v76, s[0:3], 0 offen
288; GCN-NEXT:    buffer_load_dword v62, v77, s[0:3], 0 offen
289; GCN-NEXT:    buffer_load_dword v63, v78, s[0:3], 0 offen
290; GCN-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:256
291; GCN-NEXT:    v_mov_b32_e32 v65, s9
292; GCN-NEXT:    s_add_u32 s6, s8, 16
293; GCN-NEXT:    v_mov_b32_e32 v64, s8
294; GCN-NEXT:    s_addc_u32 s7, s9, 0
295; GCN-NEXT:    s_waitcnt vmcnt(0)
296; GCN-NEXT:    global_store_dwordx4 v[64:65], v[0:3], off
297; GCN-NEXT:    s_nop 0
298; GCN-NEXT:    v_mov_b32_e32 v0, s6
299; GCN-NEXT:    v_mov_b32_e32 v1, s7
300; GCN-NEXT:    s_add_u32 s6, s8, 32
301; GCN-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
302; GCN-NEXT:    s_addc_u32 s7, s9, 0
303; GCN-NEXT:    v_mov_b32_e32 v0, s6
304; GCN-NEXT:    v_mov_b32_e32 v1, s7
305; GCN-NEXT:    s_add_u32 s6, s8, 48
306; GCN-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
307; GCN-NEXT:    s_addc_u32 s7, s9, 0
308; GCN-NEXT:    v_mov_b32_e32 v0, s6
309; GCN-NEXT:    v_mov_b32_e32 v1, s7
310; GCN-NEXT:    s_add_u32 s6, s8, 64
311; GCN-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
312; GCN-NEXT:    s_addc_u32 s7, s9, 0
313; GCN-NEXT:    v_mov_b32_e32 v0, s6
314; GCN-NEXT:    v_mov_b32_e32 v1, s7
315; GCN-NEXT:    s_add_u32 s6, s8, s4
316; GCN-NEXT:    s_addc_u32 s7, s9, 0
317; GCN-NEXT:    global_store_dwordx4 v[0:1], v[16:19], off
318; GCN-NEXT:    v_mov_b32_e32 v0, s6
319; GCN-NEXT:    s_add_u32 s4, s8, s5
320; GCN-NEXT:    v_mov_b32_e32 v1, s7
321; GCN-NEXT:    global_store_dwordx4 v[0:1], v[20:23], off
322; GCN-NEXT:    s_addc_u32 s5, s9, 0
323; GCN-NEXT:    v_mov_b32_e32 v0, s4
324; GCN-NEXT:    v_mov_b32_e32 v1, s5
325; GCN-NEXT:    s_add_u32 s4, s8, s10
326; GCN-NEXT:    global_store_dwordx4 v[0:1], v[24:27], off
327; GCN-NEXT:    s_addc_u32 s5, s9, 0
328; GCN-NEXT:    v_mov_b32_e32 v0, s4
329; GCN-NEXT:    v_mov_b32_e32 v1, s5
330; GCN-NEXT:    s_add_u32 s4, s8, 0x80
331; GCN-NEXT:    global_store_dwordx4 v[0:1], v[28:31], off
332; GCN-NEXT:    s_addc_u32 s5, s9, 0
333; GCN-NEXT:    v_mov_b32_e32 v0, s4
334; GCN-NEXT:    v_mov_b32_e32 v1, s5
335; GCN-NEXT:    s_add_u32 s4, s8, s11
336; GCN-NEXT:    global_store_dwordx4 v[0:1], v[32:35], off
337; GCN-NEXT:    s_addc_u32 s5, s9, 0
338; GCN-NEXT:    v_mov_b32_e32 v0, s4
339; GCN-NEXT:    v_mov_b32_e32 v1, s5
340; GCN-NEXT:    s_add_u32 s4, s8, s28
341; GCN-NEXT:    global_store_dwordx4 v[0:1], v[36:39], off
342; GCN-NEXT:    s_addc_u32 s5, s9, 0
343; GCN-NEXT:    v_mov_b32_e32 v0, s4
344; GCN-NEXT:    v_mov_b32_e32 v1, s5
345; GCN-NEXT:    s_add_u32 s4, s8, s29
346; GCN-NEXT:    global_store_dwordx4 v[0:1], v[40:43], off
347; GCN-NEXT:    s_addc_u32 s5, s9, 0
348; GCN-NEXT:    v_mov_b32_e32 v0, s4
349; GCN-NEXT:    v_mov_b32_e32 v1, s5
350; GCN-NEXT:    s_add_u32 s4, s8, 0xc0
351; GCN-NEXT:    global_store_dwordx4 v[0:1], v[44:47], off
352; GCN-NEXT:    s_addc_u32 s5, s9, 0
353; GCN-NEXT:    v_mov_b32_e32 v0, s4
354; GCN-NEXT:    v_mov_b32_e32 v1, s5
355; GCN-NEXT:    s_add_u32 s4, s8, s12
356; GCN-NEXT:    global_store_dwordx4 v[0:1], v[48:51], off
357; GCN-NEXT:    s_addc_u32 s5, s9, 0
358; GCN-NEXT:    v_mov_b32_e32 v0, s4
359; GCN-NEXT:    v_mov_b32_e32 v1, s5
360; GCN-NEXT:    s_add_u32 s4, s8, s13
361; GCN-NEXT:    global_store_dwordx4 v[0:1], v[52:55], off
362; GCN-NEXT:    s_addc_u32 s5, s9, 0
363; GCN-NEXT:    v_mov_b32_e32 v0, s4
364; GCN-NEXT:    v_mov_b32_e32 v1, s5
365; GCN-NEXT:    s_add_u32 s4, s8, s14
366; GCN-NEXT:    global_store_dwordx4 v[0:1], v[56:59], off
367; GCN-NEXT:    s_addc_u32 s5, s9, 0
368; GCN-NEXT:    v_mov_b32_e32 v0, s4
369; GCN-NEXT:    v_mov_b32_e32 v1, s5
370; GCN-NEXT:    global_store_dwordx4 v[0:1], v[60:63], off
371; GCN-NEXT:    s_endpgm
372  %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr
373  %insert = insertelement <64 x i32> %vec, i32 %val, i32 %idx
374  store <64 x i32> %insert, <64 x i32> addrspace(1)* %out.ptr
375  ret void
376}
377
378attributes #0 = { "amdgpu-waves-per-eu"="1,10" }
379