1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
3
4define arm_aapcs_vfpcc <4 x i32> @vcreate_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
5; CHECK-LABEL: vcreate_i32:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vmov q0[2], q0[0], r1, r3
8; CHECK-NEXT:    vmov q0[3], q0[1], r0, r2
9; CHECK-NEXT:    bx lr
10entry:
11  %conv = zext i32 %a to i64
12  %shl = shl nuw i64 %conv, 32
13  %conv1 = zext i32 %b to i64
14  %or = or i64 %shl, %conv1
15  %0 = insertelement <2 x i64> undef, i64 %or, i64 0
16  %conv2 = zext i32 %c to i64
17  %shl3 = shl nuw i64 %conv2, 32
18  %conv4 = zext i32 %d to i64
19  %or5 = or i64 %shl3, %conv4
20  %1 = insertelement <2 x i64> %0, i64 %or5, i64 1
21  %2 = bitcast <2 x i64> %1 to <4 x i32>
22  ret <4 x i32> %2
23}
24
25define arm_aapcs_vfpcc <4 x i32> @insert_0123(i32 %a, i32 %b, i32 %c, i32 %d) {
26; CHECK-LABEL: insert_0123:
27; CHECK:       @ %bb.0: @ %entry
28; CHECK-NEXT:    vmov q0[2], q0[0], r0, r2
29; CHECK-NEXT:    vmov q0[3], q0[1], r1, r3
30; CHECK-NEXT:    bx lr
31entry:
32  %v1 = insertelement <4 x i32> undef, i32 %a, i32 0
33  %v2 = insertelement <4 x i32> %v1, i32 %b, i32 1
34  %v3 = insertelement <4 x i32> %v2, i32 %c, i32 2
35  %v4 = insertelement <4 x i32> %v3, i32 %d, i32 3
36  ret <4 x i32> %v4
37}
38
39define arm_aapcs_vfpcc <4 x i32> @insert_3210(i32 %a, i32 %b, i32 %c, i32 %d) {
40; CHECK-LABEL: insert_3210:
41; CHECK:       @ %bb.0: @ %entry
42; CHECK-NEXT:    vmov q0[2], q0[0], r3, r1
43; CHECK-NEXT:    vmov q0[3], q0[1], r2, r0
44; CHECK-NEXT:    bx lr
45entry:
46  %v1 = insertelement <4 x i32> undef, i32 %a, i32 3
47  %v2 = insertelement <4 x i32> %v1, i32 %b, i32 2
48  %v3 = insertelement <4 x i32> %v2, i32 %c, i32 1
49  %v4 = insertelement <4 x i32> %v3, i32 %d, i32 0
50  ret <4 x i32> %v4
51}
52
53define arm_aapcs_vfpcc <4 x i32> @insert_0213(i32 %a, i32 %b, i32 %c, i32 %d) {
54; CHECK-LABEL: insert_0213:
55; CHECK:       @ %bb.0: @ %entry
56; CHECK-NEXT:    vmov q0[2], q0[0], r0, r1
57; CHECK-NEXT:    vmov q0[3], q0[1], r2, r3
58; CHECK-NEXT:    bx lr
59entry:
60  %v1 = insertelement <4 x i32> undef, i32 %a, i32 0
61  %v2 = insertelement <4 x i32> %v1, i32 %b, i32 2
62  %v3 = insertelement <4 x i32> %v2, i32 %c, i32 1
63  %v4 = insertelement <4 x i32> %v3, i32 %d, i32 3
64  ret <4 x i32> %v4
65}
66
67define arm_aapcs_vfpcc <4 x i32> @insert_0220(i32 %a, i32 %b, i32 %c, i32 %d) {
68; CHECK-LABEL: insert_0220:
69; CHECK:       @ %bb.0: @ %entry
70; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
71; CHECK-NEXT:    bx lr
72entry:
73  %v1 = insertelement <4 x i32> undef, i32 %a, i32 0
74  %v2 = insertelement <4 x i32> %v1, i32 %b, i32 2
75  %v3 = insertelement <4 x i32> %v2, i32 %c, i32 2
76  %v4 = insertelement <4 x i32> %v3, i32 %d, i32 0
77  ret <4 x i32> %v4
78}
79
80define arm_aapcs_vfpcc <4 x i32> @insert_321(i32 %a, i32 %b, i32 %c, i32 %d) {
81; CHECK-LABEL: insert_321:
82; CHECK:       @ %bb.0: @ %entry
83; CHECK-NEXT:    vmov.32 q0[2], r1
84; CHECK-NEXT:    vmov q0[3], q0[1], r2, r0
85; CHECK-NEXT:    bx lr
86entry:
87  %v1 = insertelement <4 x i32> undef, i32 %a, i32 3
88  %v2 = insertelement <4 x i32> %v1, i32 %b, i32 2
89  %v3 = insertelement <4 x i32> %v2, i32 %c, i32 1
90  ret <4 x i32> %v3
91}
92
93define arm_aapcs_vfpcc <4 x i32> @insert_310(i32 %a, i32 %b, i32 %c, i32 %d) {
94; CHECK-LABEL: insert_310:
95; CHECK:       @ %bb.0: @ %entry
96; CHECK-NEXT:    vmov.32 q0[0], r2
97; CHECK-NEXT:    vmov q0[3], q0[1], r1, r0
98; CHECK-NEXT:    bx lr
99entry:
100  %v1 = insertelement <4 x i32> undef, i32 %a, i32 3
101  %v2 = insertelement <4 x i32> %v1, i32 %b, i32 1
102  %v3 = insertelement <4 x i32> %v2, i32 %c, i32 0
103  ret <4 x i32> %v3
104}
105
106define arm_aapcs_vfpcc <4 x i32> @insert_320(i32 %a, i32 %b, i32 %c, i32 %d) {
107; CHECK-LABEL: insert_320:
108; CHECK:       @ %bb.0: @ %entry
109; CHECK-NEXT:    vmov q0[2], q0[0], r2, r1
110; CHECK-NEXT:    vmov.32 q0[3], r0
111; CHECK-NEXT:    bx lr
112entry:
113  %v1 = insertelement <4 x i32> undef, i32 %a, i32 3
114  %v2 = insertelement <4 x i32> %v1, i32 %b, i32 2
115  %v3 = insertelement <4 x i32> %v2, i32 %c, i32 0
116  ret <4 x i32> %v3
117}
118
119define arm_aapcs_vfpcc <4 x i32> @insert_31(i32 %a, i32 %b, i32 %c, i32 %d) {
120; CHECK-LABEL: insert_31:
121; CHECK:       @ %bb.0: @ %entry
122; CHECK-NEXT:    vmov q0[3], q0[1], r1, r0
123; CHECK-NEXT:    bx lr
124entry:
125  %v1 = insertelement <4 x i32> undef, i32 %a, i32 3
126  %v2 = insertelement <4 x i32> %v1, i32 %b, i32 1
127  ret <4 x i32> %v2
128}
129
130define arm_aapcs_vfpcc <4 x i32> @insert_32(i32 %a, i32 %b, i32 %c, i32 %d) {
131; CHECK-LABEL: insert_32:
132; CHECK:       @ %bb.0: @ %entry
133; CHECK-NEXT:    vmov.32 q0[2], r1
134; CHECK-NEXT:    vmov.32 q0[3], r0
135; CHECK-NEXT:    bx lr
136entry:
137  %v1 = insertelement <4 x i32> undef, i32 %a, i32 3
138  %v2 = insertelement <4 x i32> %v1, i32 %b, i32 2
139  ret <4 x i32> %v2
140}
141
142define arm_aapcs_vfpcc <4 x i32> @insert_3(i32 %a, i32 %b, i32 %c, i32 %d) {
143; CHECK-LABEL: insert_3:
144; CHECK:       @ %bb.0: @ %entry
145; CHECK-NEXT:    vmov.32 q0[3], r0
146; CHECK-NEXT:    bx lr
147entry:
148  %v1 = insertelement <4 x i32> undef, i32 %a, i32 3
149  ret <4 x i32> %v1
150}
151
152define arm_aapcs_vfpcc <4 x i32> @insert_210(i32 %a, i32 %b, i32 %c, i32 %d) {
153; CHECK-LABEL: insert_210:
154; CHECK:       @ %bb.0: @ %entry
155; CHECK-NEXT:    vmov.32 q0[1], r1
156; CHECK-NEXT:    vmov q0[2], q0[0], r2, r0
157; CHECK-NEXT:    bx lr
158entry:
159  %v1 = insertelement <4 x i32> undef, i32 %a, i32 2
160  %v2 = insertelement <4 x i32> %v1, i32 %b, i32 1
161  %v3 = insertelement <4 x i32> %v2, i32 %c, i32 0
162  ret <4 x i32> %v3
163}
164
165define arm_aapcs_vfpcc <4 x i32> @insert_20(i32 %a, i32 %b, i32 %c, i32 %d) {
166; CHECK-LABEL: insert_20:
167; CHECK:       @ %bb.0: @ %entry
168; CHECK-NEXT:    vmov q0[2], q0[0], r1, r0
169; CHECK-NEXT:    bx lr
170entry:
171  %v1 = insertelement <4 x i32> undef, i32 %a, i32 2
172  %v2 = insertelement <4 x i32> %v1, i32 %b, i32 0
173  ret <4 x i32> %v2
174}
175
176define arm_aapcs_vfpcc <4 x i32> @insert_21(i32 %a, i32 %b, i32 %c, i32 %d) {
177; CHECK-LABEL: insert_21:
178; CHECK:       @ %bb.0: @ %entry
179; CHECK-NEXT:    vmov.32 q0[1], r1
180; CHECK-NEXT:    vmov.32 q0[2], r0
181; CHECK-NEXT:    bx lr
182entry:
183  %v1 = insertelement <4 x i32> undef, i32 %a, i32 2
184  %v2 = insertelement <4 x i32> %v1, i32 %b, i32 1
185  ret <4 x i32> %v2
186}
187
188define arm_aapcs_vfpcc <4 x i32> @insert_2(i32 %a, i32 %b, i32 %c, i32 %d) {
189; CHECK-LABEL: insert_2:
190; CHECK:       @ %bb.0: @ %entry
191; CHECK-NEXT:    vmov.32 q0[2], r0
192; CHECK-NEXT:    bx lr
193entry:
194  %v1 = insertelement <4 x i32> undef, i32 %a, i32 2
195  ret <4 x i32> %v1
196}
197
198define arm_aapcs_vfpcc <4 x i32> @insert_10(i32 %a, i32 %b, i32 %c, i32 %d) {
199; CHECK-LABEL: insert_10:
200; CHECK:       @ %bb.0: @ %entry
201; CHECK-NEXT:    vmov.32 q0[0], r1
202; CHECK-NEXT:    vmov.32 q0[1], r0
203; CHECK-NEXT:    bx lr
204entry:
205  %v1 = insertelement <4 x i32> undef, i32 %a, i32 1
206  %v2 = insertelement <4 x i32> %v1, i32 %b, i32 0
207  ret <4 x i32> %v2
208}
209
210define arm_aapcs_vfpcc <4 x i32> @insert_1(i32 %a, i32 %b, i32 %c, i32 %d) {
211; CHECK-LABEL: insert_1:
212; CHECK:       @ %bb.0: @ %entry
213; CHECK-NEXT:    vmov.32 q0[1], r0
214; CHECK-NEXT:    bx lr
215entry:
216  %v1 = insertelement <4 x i32> undef, i32 %a, i32 1
217  ret <4 x i32> %v1
218}
219
220define arm_aapcs_vfpcc <4 x i32> @insert_0(i32 %a, i32 %b, i32 %c, i32 %d) {
221; CHECK-LABEL: insert_0:
222; CHECK:       @ %bb.0: @ %entry
223; CHECK-NEXT:    vmov.32 q0[0], r0
224; CHECK-NEXT:    bx lr
225entry:
226  %v1 = insertelement <4 x i32> undef, i32 %a, i32 0
227  ret <4 x i32> %v1
228}
229
230define hidden <8 x i16> @create_i16(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d, i16 zeroext %a2, i16 zeroext %b2, i16 zeroext %c2, i16 zeroext %d2) local_unnamed_addr #0 {
231; CHECK-LABEL: create_i16:
232; CHECK:       @ %bb.0: @ %entry
233; CHECK-NEXT:    .save {r4, r5, r7, lr}
234; CHECK-NEXT:    push {r4, r5, r7, lr}
235; CHECK-NEXT:    movs r7, #0
236; CHECK-NEXT:    orr.w r0, r1, r0, lsl #16
237; CHECK-NEXT:    lsll r2, r7, #16
238; CHECK-NEXT:    ldr r4, [sp, #24]
239; CHECK-NEXT:    orr.w r1, r0, r7
240; CHECK-NEXT:    ldrd r0, r7, [sp, #16]
241; CHECK-NEXT:    movs r5, #0
242; CHECK-NEXT:    lsll r4, r5, #16
243; CHECK-NEXT:    orr.w r0, r7, r0, lsl #16
244; CHECK-NEXT:    orr.w r12, r0, r5
245; CHECK-NEXT:    orr.w r0, r2, r3
246; CHECK-NEXT:    ldr r2, [sp, #28]
247; CHECK-NEXT:    mov r3, r12
248; CHECK-NEXT:    orrs r2, r4
249; CHECK-NEXT:    pop {r4, r5, r7, pc}
250entry:
251  %conv = zext i16 %a to i64
252  %shl = shl nuw i64 %conv, 48
253  %conv1 = zext i16 %b to i64
254  %shl2 = shl nuw nsw i64 %conv1, 32
255  %or = or i64 %shl2, %shl
256  %conv3 = zext i16 %c to i64
257  %shl4 = shl nuw nsw i64 %conv3, 16
258  %or5 = or i64 %or, %shl4
259  %conv6 = zext i16 %d to i64
260  %or7 = or i64 %or5, %conv6
261  %0 = insertelement <2 x i64> undef, i64 %or7, i64 0
262  %conv8 = zext i16 %a2 to i64
263  %shl9 = shl nuw i64 %conv8, 48
264  %conv10 = zext i16 %b2 to i64
265  %shl11 = shl nuw nsw i64 %conv10, 32
266  %or12 = or i64 %shl11, %shl9
267  %conv13 = zext i16 %c2 to i64
268  %shl14 = shl nuw nsw i64 %conv13, 16
269  %or15 = or i64 %or12, %shl14
270  %conv16 = zext i16 %d2 to i64
271  %or17 = or i64 %or15, %conv16
272  %1 = insertelement <2 x i64> %0, i64 %or17, i64 1
273  %2 = bitcast <2 x i64> %1 to <8 x i16>
274  ret <8 x i16> %2
275}
276
277define arm_aapcs_vfpcc <8 x i16> @insert_01234567(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) {
278; CHECK-LABEL: insert_01234567:
279; CHECK:       @ %bb.0: @ %entry
280; CHECK-NEXT:    vmov.16 q0[0], r0
281; CHECK-NEXT:    ldrh.w r0, [sp]
282; CHECK-NEXT:    vmov.16 q0[1], r1
283; CHECK-NEXT:    vmov.16 q0[2], r2
284; CHECK-NEXT:    vmov.16 q0[3], r3
285; CHECK-NEXT:    vmov.16 q0[4], r0
286; CHECK-NEXT:    ldrh.w r0, [sp, #4]
287; CHECK-NEXT:    vmov.16 q0[5], r0
288; CHECK-NEXT:    ldrh.w r0, [sp, #8]
289; CHECK-NEXT:    vmov.16 q0[6], r0
290; CHECK-NEXT:    ldrh.w r0, [sp, #12]
291; CHECK-NEXT:    vmov.16 q0[7], r0
292; CHECK-NEXT:    bx lr
293entry:
294  %v1 = insertelement <8 x i16> undef, i16 %a0, i32 0
295  %v2 = insertelement <8 x i16> %v1, i16 %a1, i32 1
296  %v3 = insertelement <8 x i16> %v2, i16 %a2, i32 2
297  %v4 = insertelement <8 x i16> %v3, i16 %a3, i32 3
298  %v5 = insertelement <8 x i16> %v4, i16 %a4, i32 4
299  %v6 = insertelement <8 x i16> %v5, i16 %a5, i32 5
300  %v7 = insertelement <8 x i16> %v6, i16 %a6, i32 6
301  %v8 = insertelement <8 x i16> %v7, i16 %a7, i32 7
302  ret <8 x i16> %v8
303}
304
305define hidden <16 x i8> @create_i8(i8 zeroext %a1, i8 zeroext %b1, i8 zeroext %c1, i8 zeroext %d1, i8 zeroext %a2, i8 zeroext %b2, i8 zeroext %c2, i8 zeroext %d2, i8 zeroext %a3, i8 zeroext %b3, i8 zeroext %c3, i8 zeroext %d3, i8 zeroext %a4, i8 zeroext %b4, i8 zeroext %c4, i8 zeroext %d4) local_unnamed_addr #0 {
306; CHECK-LABEL: create_i8:
307; CHECK:       @ %bb.0: @ %entry
308; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
309; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
310; CHECK-NEXT:    ldr r4, [sp, #40]
311; CHECK-NEXT:    mov.w r11, #0
312; CHECK-NEXT:    ldr r6, [sp, #36]
313; CHECK-NEXT:    movs r7, #0
314; CHECK-NEXT:    lsll r4, r11, #16
315; CHECK-NEXT:    mov r10, r1
316; CHECK-NEXT:    lsll r6, r7, #24
317; CHECK-NEXT:    mov r8, r3
318; CHECK-NEXT:    orr.w r1, r6, r4
319; CHECK-NEXT:    ldr r6, [sp, #44]
320; CHECK-NEXT:    movs r3, #0
321; CHECK-NEXT:    ldr r4, [sp, #72]
322; CHECK-NEXT:    lsll r6, r3, #8
323; CHECK-NEXT:    movs r5, #0
324; CHECK-NEXT:    orrs r1, r6
325; CHECK-NEXT:    ldr r6, [sp, #48]
326; CHECK-NEXT:    lsll r4, r5, #16
327; CHECK-NEXT:    mov.w r9, #0
328; CHECK-NEXT:    orr.w r12, r1, r6
329; CHECK-NEXT:    ldr r6, [sp, #68]
330; CHECK-NEXT:    movs r1, #0
331; CHECK-NEXT:    lsll r6, r1, #24
332; CHECK-NEXT:    orrs r6, r4
333; CHECK-NEXT:    ldr r4, [sp, #76]
334; CHECK-NEXT:    lsll r4, r9, #8
335; CHECK-NEXT:    orrs r6, r4
336; CHECK-NEXT:    ldr r4, [sp, #80]
337; CHECK-NEXT:    orr.w lr, r6, r4
338; CHECK-NEXT:    lsl.w r4, r10, #16
339; CHECK-NEXT:    orr.w r0, r4, r0, lsl #22
340; CHECK-NEXT:    orr.w r0, r0, r2, lsl #8
341; CHECK-NEXT:    add r0, r8
342; CHECK-NEXT:    orrs r0, r7
343; CHECK-NEXT:    orr.w r0, r0, r11
344; CHECK-NEXT:    orr.w r2, r0, r3
345; CHECK-NEXT:    ldr r0, [sp, #56]
346; CHECK-NEXT:    ldr r3, [sp, #52]
347; CHECK-NEXT:    lsls r0, r0, #16
348; CHECK-NEXT:    orr.w r0, r0, r3, lsl #22
349; CHECK-NEXT:    ldr r3, [sp, #60]
350; CHECK-NEXT:    orr.w r0, r0, r3, lsl #8
351; CHECK-NEXT:    ldr r3, [sp, #64]
352; CHECK-NEXT:    add r0, r3
353; CHECK-NEXT:    orrs r0, r1
354; CHECK-NEXT:    mov r1, r2
355; CHECK-NEXT:    orrs r0, r5
356; CHECK-NEXT:    mov r2, lr
357; CHECK-NEXT:    orr.w r3, r0, r9
358; CHECK-NEXT:    mov r0, r12
359; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
360entry:
361  %conv = zext i8 %a1 to i64
362  %shl = shl nuw nsw i64 %conv, 54
363  %conv1 = zext i8 %b1 to i64
364  %shl2 = shl nuw nsw i64 %conv1, 48
365  %or = or i64 %shl2, %shl
366  %conv3 = zext i8 %c1 to i64
367  %shl4 = shl nuw nsw i64 %conv3, 40
368  %or5 = or i64 %or, %shl4
369  %conv6 = zext i8 %d1 to i64
370  %shl7 = shl nuw nsw i64 %conv6, 32
371  %or8 = or i64 %or5, %shl7
372  %conv9 = zext i8 %a2 to i64
373  %shl10 = shl nuw nsw i64 %conv9, 24
374  %or11 = or i64 %or8, %shl10
375  %conv12 = zext i8 %b2 to i64
376  %shl13 = shl nuw nsw i64 %conv12, 16
377  %or14 = or i64 %or11, %shl13
378  %conv15 = zext i8 %c2 to i64
379  %shl16 = shl nuw nsw i64 %conv15, 8
380  %or17 = or i64 %or14, %shl16
381  %conv18 = zext i8 %d2 to i64
382  %or19 = or i64 %or17, %conv18
383  %0 = insertelement <2 x i64> undef, i64 %or19, i64 0
384  %conv20 = zext i8 %a3 to i64
385  %shl21 = shl nuw nsw i64 %conv20, 54
386  %conv22 = zext i8 %b3 to i64
387  %shl23 = shl nuw nsw i64 %conv22, 48
388  %or24 = or i64 %shl23, %shl21
389  %conv25 = zext i8 %c3 to i64
390  %shl26 = shl nuw nsw i64 %conv25, 40
391  %or27 = or i64 %or24, %shl26
392  %conv28 = zext i8 %d3 to i64
393  %shl29 = shl nuw nsw i64 %conv28, 32
394  %or30 = or i64 %or27, %shl29
395  %conv31 = zext i8 %a4 to i64
396  %shl32 = shl nuw nsw i64 %conv31, 24
397  %or33 = or i64 %or30, %shl32
398  %conv34 = zext i8 %b4 to i64
399  %shl35 = shl nuw nsw i64 %conv34, 16
400  %or36 = or i64 %or33, %shl35
401  %conv37 = zext i8 %c4 to i64
402  %shl38 = shl nuw nsw i64 %conv37, 8
403  %or39 = or i64 %or36, %shl38
404  %conv40 = zext i8 %d4 to i64
405  %or41 = or i64 %or39, %conv40
406  %1 = insertelement <2 x i64> %0, i64 %or41, i64 1
407  %2 = bitcast <2 x i64> %1 to <16 x i8>
408  ret <16 x i8> %2
409}
410
411define arm_aapcs_vfpcc <16 x i8> @insert_0123456789101112131415(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) {
412; CHECK-LABEL: insert_0123456789101112131415:
413; CHECK:       @ %bb.0: @ %entry
414; CHECK-NEXT:    vmov.8 q0[0], r0
415; CHECK-NEXT:    ldrb.w r0, [sp]
416; CHECK-NEXT:    vmov.8 q0[1], r1
417; CHECK-NEXT:    vmov.8 q0[2], r2
418; CHECK-NEXT:    vmov.8 q0[3], r3
419; CHECK-NEXT:    vmov.8 q0[4], r0
420; CHECK-NEXT:    ldrb.w r0, [sp, #4]
421; CHECK-NEXT:    vmov.8 q0[5], r0
422; CHECK-NEXT:    ldrb.w r0, [sp, #8]
423; CHECK-NEXT:    vmov.8 q0[6], r0
424; CHECK-NEXT:    ldrb.w r0, [sp, #12]
425; CHECK-NEXT:    vmov.8 q0[7], r0
426; CHECK-NEXT:    ldrb.w r0, [sp, #16]
427; CHECK-NEXT:    vmov.8 q0[8], r0
428; CHECK-NEXT:    ldrb.w r0, [sp, #20]
429; CHECK-NEXT:    vmov.8 q0[9], r0
430; CHECK-NEXT:    ldrb.w r0, [sp, #24]
431; CHECK-NEXT:    vmov.8 q0[10], r0
432; CHECK-NEXT:    ldrb.w r0, [sp, #28]
433; CHECK-NEXT:    vmov.8 q0[11], r0
434; CHECK-NEXT:    ldrb.w r0, [sp, #32]
435; CHECK-NEXT:    vmov.8 q0[12], r0
436; CHECK-NEXT:    ldrb.w r0, [sp, #36]
437; CHECK-NEXT:    vmov.8 q0[13], r0
438; CHECK-NEXT:    ldrb.w r0, [sp, #40]
439; CHECK-NEXT:    vmov.8 q0[14], r0
440; CHECK-NEXT:    ldrb.w r0, [sp, #44]
441; CHECK-NEXT:    vmov.8 q0[15], r0
442; CHECK-NEXT:    bx lr
443entry:
444  %v1 = insertelement <16 x i8> undef, i8 %a0, i32 0
445  %v2 = insertelement <16 x i8> %v1, i8 %a1, i32 1
446  %v3 = insertelement <16 x i8> %v2, i8 %a2, i32 2
447  %v4 = insertelement <16 x i8> %v3, i8 %a3, i32 3
448  %v5 = insertelement <16 x i8> %v4, i8 %a4, i32 4
449  %v6 = insertelement <16 x i8> %v5, i8 %a5, i32 5
450  %v7 = insertelement <16 x i8> %v6, i8 %a6, i32 6
451  %v8 = insertelement <16 x i8> %v7, i8 %a7, i32 7
452  %v9 = insertelement <16 x i8> %v8, i8 %a8, i32 8
453  %v10 = insertelement <16 x i8> %v9, i8 %a9, i32 9
454  %v11 = insertelement <16 x i8> %v10, i8 %a10, i32 10
455  %v12 = insertelement <16 x i8> %v11, i8 %a11, i32 11
456  %v13 = insertelement <16 x i8> %v12, i8 %a12, i32 12
457  %v14 = insertelement <16 x i8> %v13, i8 %a13, i32 13
458  %v15 = insertelement <16 x i8> %v14, i8 %a14, i32 14
459  %v16 = insertelement <16 x i8> %v15, i8 %a15, i32 15
460  ret <16 x i8> %v16
461}
462