1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
3; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
4
5define i8* @ldrwu32_4(i8* %x, i8* %y) {
6; CHECK-LABEL: ldrwu32_4:
7; CHECK:       @ %bb.0: @ %entry
8; CHECK-NEXT:    vldrw.u32 q0, [r0, #4]!
9; CHECK-NEXT:    vstrw.32 q0, [r1]
10; CHECK-NEXT:    bx lr
11entry:
12  %z = getelementptr inbounds i8, i8* %x, i32 4
13  %0 = bitcast i8* %z to <4 x i32>*
14  %1 = load <4 x i32>, <4 x i32>* %0, align 4
15  %2 = bitcast i8* %y to <4 x i32>*
16  store <4 x i32> %1, <4 x i32>* %2, align 4
17  ret i8* %z
18}
19
20define i8* @ldrwu32_3(i8* %x, i8* %y) {
21; CHECK-LE-LABEL: ldrwu32_3:
22; CHECK-LE:       @ %bb.0: @ %entry
23; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]!
24; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
25; CHECK-LE-NEXT:    bx lr
26;
27; CHECK-BE-LABEL: ldrwu32_3:
28; CHECK-BE:       @ %bb.0: @ %entry
29; CHECK-BE-NEXT:    adds r0, #3
30; CHECK-BE-NEXT:    vldrw.u32 q0, [r0]
31; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
32; CHECK-BE-NEXT:    bx lr
33entry:
34  %z = getelementptr inbounds i8, i8* %x, i32 3
35  %0 = bitcast i8* %z to <4 x i32>*
36  %1 = load <4 x i32>, <4 x i32>* %0, align 4
37  %2 = bitcast i8* %y to <4 x i32>*
38  store <4 x i32> %1, <4 x i32>* %2, align 4
39  ret i8* %z
40}
41
42define i8* @ldrwu32_m4(i8* %x, i8* %y) {
43; CHECK-LABEL: ldrwu32_m4:
44; CHECK:       @ %bb.0: @ %entry
45; CHECK-NEXT:    vldrw.u32 q0, [r0, #-4]!
46; CHECK-NEXT:    vstrw.32 q0, [r1]
47; CHECK-NEXT:    bx lr
48entry:
49  %z = getelementptr inbounds i8, i8* %x, i32 -4
50  %0 = bitcast i8* %z to <4 x i32>*
51  %1 = load <4 x i32>, <4 x i32>* %0, align 4
52  %2 = bitcast i8* %y to <4 x i32>*
53  store <4 x i32> %1, <4 x i32>* %2, align 4
54  ret i8* %z
55}
56
57define i8* @ldrwu32_508(i8* %x, i8* %y) {
58; CHECK-LABEL: ldrwu32_508:
59; CHECK:       @ %bb.0: @ %entry
60; CHECK-NEXT:    vldrw.u32 q0, [r0, #508]!
61; CHECK-NEXT:    vstrw.32 q0, [r1]
62; CHECK-NEXT:    bx lr
63entry:
64  %z = getelementptr inbounds i8, i8* %x, i32 508
65  %0 = bitcast i8* %z to <4 x i32>*
66  %1 = load <4 x i32>, <4 x i32>* %0, align 4
67  %2 = bitcast i8* %y to <4 x i32>*
68  store <4 x i32> %1, <4 x i32>* %2, align 4
69  ret i8* %z
70}
71
72define i8* @ldrwu32_512(i8* %x, i8* %y) {
73; CHECK-LABEL: ldrwu32_512:
74; CHECK:       @ %bb.0: @ %entry
75; CHECK-NEXT:    add.w r0, r0, #512
76; CHECK-NEXT:    vldrw.u32 q0, [r0]
77; CHECK-NEXT:    vstrw.32 q0, [r1]
78; CHECK-NEXT:    bx lr
79entry:
80  %z = getelementptr inbounds i8, i8* %x, i32 512
81  %0 = bitcast i8* %z to <4 x i32>*
82  %1 = load <4 x i32>, <4 x i32>* %0, align 4
83  %2 = bitcast i8* %y to <4 x i32>*
84  store <4 x i32> %1, <4 x i32>* %2, align 4
85  ret i8* %z
86}
87
88define i8* @ldrwu32_m508(i8* %x, i8* %y) {
89; CHECK-LABEL: ldrwu32_m508:
90; CHECK:       @ %bb.0: @ %entry
91; CHECK-NEXT:    vldrw.u32 q0, [r0, #-508]!
92; CHECK-NEXT:    vstrw.32 q0, [r1]
93; CHECK-NEXT:    bx lr
94entry:
95  %z = getelementptr inbounds i8, i8* %x, i32 -508
96  %0 = bitcast i8* %z to <4 x i32>*
97  %1 = load <4 x i32>, <4 x i32>* %0, align 4
98  %2 = bitcast i8* %y to <4 x i32>*
99  store <4 x i32> %1, <4 x i32>* %2, align 4
100  ret i8* %z
101}
102
103define i8* @ldrwu32_m512(i8* %x, i8* %y) {
104; CHECK-LABEL: ldrwu32_m512:
105; CHECK:       @ %bb.0: @ %entry
106; CHECK-NEXT:    sub.w r0, r0, #512
107; CHECK-NEXT:    vldrw.u32 q0, [r0]
108; CHECK-NEXT:    vstrw.32 q0, [r1]
109; CHECK-NEXT:    bx lr
110entry:
111  %z = getelementptr inbounds i8, i8* %x, i32 -512
112  %0 = bitcast i8* %z to <4 x i32>*
113  %1 = load <4 x i32>, <4 x i32>* %0, align 4
114  %2 = bitcast i8* %y to <4 x i32>*
115  store <4 x i32> %1, <4 x i32>* %2, align 4
116  ret i8* %z
117}
118
119
120define i8* @ldrhu32_4(i8* %x, i8* %y) {
121; CHECK-LABEL: ldrhu32_4:
122; CHECK:       @ %bb.0: @ %entry
123; CHECK-NEXT:    vldrh.u32 q0, [r0, #4]!
124; CHECK-NEXT:    vstrw.32 q0, [r1]
125; CHECK-NEXT:    bx lr
126entry:
127  %z = getelementptr inbounds i8, i8* %x, i32 4
128  %0 = bitcast i8* %z to <4 x i16>*
129  %1 = load <4 x i16>, <4 x i16>* %0, align 2
130  %2 = zext <4 x i16> %1 to <4 x i32>
131  %3 = bitcast i8* %y to <4 x i32>*
132  store <4 x i32> %2, <4 x i32>* %3, align 4
133  ret i8* %z
134}
135
136define i8* @ldrhu32_3(i8* %x, i8* %y) {
137; CHECK-LABEL: ldrhu32_3:
138; CHECK:       @ %bb.0: @ %entry
139; CHECK-NEXT:    adds r0, #3
140; CHECK-NEXT:    vldrh.u32 q0, [r0]
141; CHECK-NEXT:    vstrw.32 q0, [r1]
142; CHECK-NEXT:    bx lr
143entry:
144  %z = getelementptr inbounds i8, i8* %x, i32 3
145  %0 = bitcast i8* %z to <4 x i16>*
146  %1 = load <4 x i16>, <4 x i16>* %0, align 2
147  %2 = zext <4 x i16> %1 to <4 x i32>
148  %3 = bitcast i8* %y to <4 x i32>*
149  store <4 x i32> %2, <4 x i32>* %3, align 4
150  ret i8* %z
151}
152
153define i8* @ldrhu32_2(i8* %x, i8* %y) {
154; CHECK-LABEL: ldrhu32_2:
155; CHECK:       @ %bb.0: @ %entry
156; CHECK-NEXT:    vldrh.u32 q0, [r0, #2]!
157; CHECK-NEXT:    vstrw.32 q0, [r1]
158; CHECK-NEXT:    bx lr
159entry:
160  %z = getelementptr inbounds i8, i8* %x, i32 2
161  %0 = bitcast i8* %z to <4 x i16>*
162  %1 = load <4 x i16>, <4 x i16>* %0, align 2
163  %2 = zext <4 x i16> %1 to <4 x i32>
164  %3 = bitcast i8* %y to <4 x i32>*
165  store <4 x i32> %2, <4 x i32>* %3, align 4
166  ret i8* %z
167}
168
169define i8* @ldrhu32_254(i8* %x, i8* %y) {
170; CHECK-LABEL: ldrhu32_254:
171; CHECK:       @ %bb.0: @ %entry
172; CHECK-NEXT:    vldrh.u32 q0, [r0, #254]!
173; CHECK-NEXT:    vstrw.32 q0, [r1]
174; CHECK-NEXT:    bx lr
175entry:
176  %z = getelementptr inbounds i8, i8* %x, i32 254
177  %0 = bitcast i8* %z to <4 x i16>*
178  %1 = load <4 x i16>, <4 x i16>* %0, align 2
179  %2 = zext <4 x i16> %1 to <4 x i32>
180  %3 = bitcast i8* %y to <4 x i32>*
181  store <4 x i32> %2, <4 x i32>* %3, align 4
182  ret i8* %z
183}
184
185define i8* @ldrhu32_256(i8* %x, i8* %y) {
186; CHECK-LABEL: ldrhu32_256:
187; CHECK:       @ %bb.0: @ %entry
188; CHECK-NEXT:    add.w r0, r0, #256
189; CHECK-NEXT:    vldrh.u32 q0, [r0]
190; CHECK-NEXT:    vstrw.32 q0, [r1]
191; CHECK-NEXT:    bx lr
192entry:
193  %z = getelementptr inbounds i8, i8* %x, i32 256
194  %0 = bitcast i8* %z to <4 x i16>*
195  %1 = load <4 x i16>, <4 x i16>* %0, align 2
196  %2 = zext <4 x i16> %1 to <4 x i32>
197  %3 = bitcast i8* %y to <4 x i32>*
198  store <4 x i32> %2, <4 x i32>* %3, align 4
199  ret i8* %z
200}
201
202define i8* @ldrhu32_m254(i8* %x, i8* %y) {
203; CHECK-LABEL: ldrhu32_m254:
204; CHECK:       @ %bb.0: @ %entry
205; CHECK-NEXT:    vldrh.u32 q0, [r0, #-254]!
206; CHECK-NEXT:    vstrw.32 q0, [r1]
207; CHECK-NEXT:    bx lr
208entry:
209  %z = getelementptr inbounds i8, i8* %x, i32 -254
210  %0 = bitcast i8* %z to <4 x i16>*
211  %1 = load <4 x i16>, <4 x i16>* %0, align 2
212  %2 = zext <4 x i16> %1 to <4 x i32>
213  %3 = bitcast i8* %y to <4 x i32>*
214  store <4 x i32> %2, <4 x i32>* %3, align 4
215  ret i8* %z
216}
217
218define i8* @ldrhu32_m256(i8* %x, i8* %y) {
219; CHECK-LABEL: ldrhu32_m256:
220; CHECK:       @ %bb.0: @ %entry
221; CHECK-NEXT:    sub.w r0, r0, #256
222; CHECK-NEXT:    vldrh.u32 q0, [r0]
223; CHECK-NEXT:    vstrw.32 q0, [r1]
224; CHECK-NEXT:    bx lr
225entry:
226  %z = getelementptr inbounds i8, i8* %x, i32 -256
227  %0 = bitcast i8* %z to <4 x i16>*
228  %1 = load <4 x i16>, <4 x i16>* %0, align 2
229  %2 = zext <4 x i16> %1 to <4 x i32>
230  %3 = bitcast i8* %y to <4 x i32>*
231  store <4 x i32> %2, <4 x i32>* %3, align 4
232  ret i8* %z
233}
234
235
236define i8* @ldrhs32_4(i8* %x, i8* %y) {
237; CHECK-LABEL: ldrhs32_4:
238; CHECK:       @ %bb.0: @ %entry
239; CHECK-NEXT:    vldrh.s32 q0, [r0, #4]!
240; CHECK-NEXT:    vstrw.32 q0, [r1]
241; CHECK-NEXT:    bx lr
242entry:
243  %z = getelementptr inbounds i8, i8* %x, i32 4
244  %0 = bitcast i8* %z to <4 x i16>*
245  %1 = load <4 x i16>, <4 x i16>* %0, align 2
246  %2 = sext <4 x i16> %1 to <4 x i32>
247  %3 = bitcast i8* %y to <4 x i32>*
248  store <4 x i32> %2, <4 x i32>* %3, align 4
249  ret i8* %z
250}
251
252define i8* @ldrhs32_3(i8* %x, i8* %y) {
253; CHECK-LABEL: ldrhs32_3:
254; CHECK:       @ %bb.0: @ %entry
255; CHECK-NEXT:    adds r0, #3
256; CHECK-NEXT:    vldrh.s32 q0, [r0]
257; CHECK-NEXT:    vstrw.32 q0, [r1]
258; CHECK-NEXT:    bx lr
259entry:
260  %z = getelementptr inbounds i8, i8* %x, i32 3
261  %0 = bitcast i8* %z to <4 x i16>*
262  %1 = load <4 x i16>, <4 x i16>* %0, align 2
263  %2 = sext <4 x i16> %1 to <4 x i32>
264  %3 = bitcast i8* %y to <4 x i32>*
265  store <4 x i32> %2, <4 x i32>* %3, align 4
266  ret i8* %z
267}
268
269define i8* @ldrhs32_2(i8* %x, i8* %y) {
270; CHECK-LABEL: ldrhs32_2:
271; CHECK:       @ %bb.0: @ %entry
272; CHECK-NEXT:    vldrh.s32 q0, [r0, #2]!
273; CHECK-NEXT:    vstrw.32 q0, [r1]
274; CHECK-NEXT:    bx lr
275entry:
276  %z = getelementptr inbounds i8, i8* %x, i32 2
277  %0 = bitcast i8* %z to <4 x i16>*
278  %1 = load <4 x i16>, <4 x i16>* %0, align 2
279  %2 = sext <4 x i16> %1 to <4 x i32>
280  %3 = bitcast i8* %y to <4 x i32>*
281  store <4 x i32> %2, <4 x i32>* %3, align 4
282  ret i8* %z
283}
284
285define i8* @ldrhs32_254(i8* %x, i8* %y) {
286; CHECK-LABEL: ldrhs32_254:
287; CHECK:       @ %bb.0: @ %entry
288; CHECK-NEXT:    vldrh.s32 q0, [r0, #254]!
289; CHECK-NEXT:    vstrw.32 q0, [r1]
290; CHECK-NEXT:    bx lr
291entry:
292  %z = getelementptr inbounds i8, i8* %x, i32 254
293  %0 = bitcast i8* %z to <4 x i16>*
294  %1 = load <4 x i16>, <4 x i16>* %0, align 2
295  %2 = sext <4 x i16> %1 to <4 x i32>
296  %3 = bitcast i8* %y to <4 x i32>*
297  store <4 x i32> %2, <4 x i32>* %3, align 4
298  ret i8* %z
299}
300
301define i8* @ldrhs32_256(i8* %x, i8* %y) {
302; CHECK-LABEL: ldrhs32_256:
303; CHECK:       @ %bb.0: @ %entry
304; CHECK-NEXT:    add.w r0, r0, #256
305; CHECK-NEXT:    vldrh.s32 q0, [r0]
306; CHECK-NEXT:    vstrw.32 q0, [r1]
307; CHECK-NEXT:    bx lr
308entry:
309  %z = getelementptr inbounds i8, i8* %x, i32 256
310  %0 = bitcast i8* %z to <4 x i16>*
311  %1 = load <4 x i16>, <4 x i16>* %0, align 2
312  %2 = sext <4 x i16> %1 to <4 x i32>
313  %3 = bitcast i8* %y to <4 x i32>*
314  store <4 x i32> %2, <4 x i32>* %3, align 4
315  ret i8* %z
316}
317
318define i8* @ldrhs32_m254(i8* %x, i8* %y) {
319; CHECK-LABEL: ldrhs32_m254:
320; CHECK:       @ %bb.0: @ %entry
321; CHECK-NEXT:    vldrh.s32 q0, [r0, #-254]!
322; CHECK-NEXT:    vstrw.32 q0, [r1]
323; CHECK-NEXT:    bx lr
324entry:
325  %z = getelementptr inbounds i8, i8* %x, i32 -254
326  %0 = bitcast i8* %z to <4 x i16>*
327  %1 = load <4 x i16>, <4 x i16>* %0, align 2
328  %2 = sext <4 x i16> %1 to <4 x i32>
329  %3 = bitcast i8* %y to <4 x i32>*
330  store <4 x i32> %2, <4 x i32>* %3, align 4
331  ret i8* %z
332}
333
334define i8* @ldrhs32_m256(i8* %x, i8* %y) {
335; CHECK-LABEL: ldrhs32_m256:
336; CHECK:       @ %bb.0: @ %entry
337; CHECK-NEXT:    sub.w r0, r0, #256
338; CHECK-NEXT:    vldrh.s32 q0, [r0]
339; CHECK-NEXT:    vstrw.32 q0, [r1]
340; CHECK-NEXT:    bx lr
341entry:
342  %z = getelementptr inbounds i8, i8* %x, i32 -256
343  %0 = bitcast i8* %z to <4 x i16>*
344  %1 = load <4 x i16>, <4 x i16>* %0, align 2
345  %2 = sext <4 x i16> %1 to <4 x i32>
346  %3 = bitcast i8* %y to <4 x i32>*
347  store <4 x i32> %2, <4 x i32>* %3, align 4
348  ret i8* %z
349}
350
351
352define i8* @ldrhu16_4(i8* %x, i8* %y) {
353; CHECK-LABEL: ldrhu16_4:
354; CHECK:       @ %bb.0: @ %entry
355; CHECK-NEXT:    vldrh.u16 q0, [r0, #4]!
356; CHECK-NEXT:    vstrh.16 q0, [r1]
357; CHECK-NEXT:    bx lr
358entry:
359  %z = getelementptr inbounds i8, i8* %x, i32 4
360  %0 = bitcast i8* %z to <8 x i16>*
361  %1 = load <8 x i16>, <8 x i16>* %0, align 2
362  %2 = bitcast i8* %y to <8 x i16>*
363  store <8 x i16> %1, <8 x i16>* %2, align 2
364  ret i8* %z
365}
366
367define i8* @ldrhu16_3(i8* %x, i8* %y) {
368; CHECK-LE-LABEL: ldrhu16_3:
369; CHECK-LE:       @ %bb.0: @ %entry
370; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]!
371; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
372; CHECK-LE-NEXT:    bx lr
373;
374; CHECK-BE-LABEL: ldrhu16_3:
375; CHECK-BE:       @ %bb.0: @ %entry
376; CHECK-BE-NEXT:    adds r0, #3
377; CHECK-BE-NEXT:    vldrh.u16 q0, [r0]
378; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
379; CHECK-BE-NEXT:    bx lr
380entry:
381  %z = getelementptr inbounds i8, i8* %x, i32 3
382  %0 = bitcast i8* %z to <8 x i16>*
383  %1 = load <8 x i16>, <8 x i16>* %0, align 2
384  %2 = bitcast i8* %y to <8 x i16>*
385  store <8 x i16> %1, <8 x i16>* %2, align 2
386  ret i8* %z
387}
388
389define i8* @ldrhu16_2(i8* %x, i8* %y) {
390; CHECK-LABEL: ldrhu16_2:
391; CHECK:       @ %bb.0: @ %entry
392; CHECK-NEXT:    vldrh.u16 q0, [r0, #2]!
393; CHECK-NEXT:    vstrh.16 q0, [r1]
394; CHECK-NEXT:    bx lr
395entry:
396  %z = getelementptr inbounds i8, i8* %x, i32 2
397  %0 = bitcast i8* %z to <8 x i16>*
398  %1 = load <8 x i16>, <8 x i16>* %0, align 2
399  %2 = bitcast i8* %y to <8 x i16>*
400  store <8 x i16> %1, <8 x i16>* %2, align 2
401  ret i8* %z
402}
403
404define i8* @ldrhu16_254(i8* %x, i8* %y) {
405; CHECK-LABEL: ldrhu16_254:
406; CHECK:       @ %bb.0: @ %entry
407; CHECK-NEXT:    vldrh.u16 q0, [r0, #254]!
408; CHECK-NEXT:    vstrh.16 q0, [r1]
409; CHECK-NEXT:    bx lr
410entry:
411  %z = getelementptr inbounds i8, i8* %x, i32 254
412  %0 = bitcast i8* %z to <8 x i16>*
413  %1 = load <8 x i16>, <8 x i16>* %0, align 2
414  %2 = bitcast i8* %y to <8 x i16>*
415  store <8 x i16> %1, <8 x i16>* %2, align 2
416  ret i8* %z
417}
418
419define i8* @ldrhu16_256(i8* %x, i8* %y) {
420; CHECK-LABEL: ldrhu16_256:
421; CHECK:       @ %bb.0: @ %entry
422; CHECK-NEXT:    add.w r0, r0, #256
423; CHECK-NEXT:    vldrh.u16 q0, [r0]
424; CHECK-NEXT:    vstrh.16 q0, [r1]
425; CHECK-NEXT:    bx lr
426entry:
427  %z = getelementptr inbounds i8, i8* %x, i32 256
428  %0 = bitcast i8* %z to <8 x i16>*
429  %1 = load <8 x i16>, <8 x i16>* %0, align 2
430  %2 = bitcast i8* %y to <8 x i16>*
431  store <8 x i16> %1, <8 x i16>* %2, align 2
432  ret i8* %z
433}
434
435define i8* @ldrhu16_m254(i8* %x, i8* %y) {
436; CHECK-LABEL: ldrhu16_m254:
437; CHECK:       @ %bb.0: @ %entry
438; CHECK-NEXT:    vldrh.u16 q0, [r0, #-254]!
439; CHECK-NEXT:    vstrh.16 q0, [r1]
440; CHECK-NEXT:    bx lr
441entry:
442  %z = getelementptr inbounds i8, i8* %x, i32 -254
443  %0 = bitcast i8* %z to <8 x i16>*
444  %1 = load <8 x i16>, <8 x i16>* %0, align 2
445  %2 = bitcast i8* %y to <8 x i16>*
446  store <8 x i16> %1, <8 x i16>* %2, align 2
447  ret i8* %z
448}
449
450define i8* @ldrhu16_m256(i8* %x, i8* %y) {
451; CHECK-LABEL: ldrhu16_m256:
452; CHECK:       @ %bb.0: @ %entry
453; CHECK-NEXT:    sub.w r0, r0, #256
454; CHECK-NEXT:    vldrh.u16 q0, [r0]
455; CHECK-NEXT:    vstrh.16 q0, [r1]
456; CHECK-NEXT:    bx lr
457entry:
458  %z = getelementptr inbounds i8, i8* %x, i32 -256
459  %0 = bitcast i8* %z to <8 x i16>*
460  %1 = load <8 x i16>, <8 x i16>* %0, align 2
461  %2 = bitcast i8* %y to <8 x i16>*
462  store <8 x i16> %1, <8 x i16>* %2, align 2
463  ret i8* %z
464}
465
466
467define i8* @ldrbu32_4(i8* %x, i8* %y) {
468; CHECK-LABEL: ldrbu32_4:
469; CHECK:       @ %bb.0: @ %entry
470; CHECK-NEXT:    vldrb.u32 q0, [r0, #4]!
471; CHECK-NEXT:    vstrw.32 q0, [r1]
472; CHECK-NEXT:    bx lr
473entry:
474  %z = getelementptr inbounds i8, i8* %x, i32 4
475  %0 = bitcast i8* %z to <4 x i8>*
476  %1 = load <4 x i8>, <4 x i8>* %0, align 1
477  %2 = zext <4 x i8> %1 to <4 x i32>
478  %3 = bitcast i8* %y to <4 x i32>*
479  store <4 x i32> %2, <4 x i32>* %3, align 4
480  ret i8* %z
481}
482
483define i8* @ldrbu32_3(i8* %x, i8* %y) {
484; CHECK-LABEL: ldrbu32_3:
485; CHECK:       @ %bb.0: @ %entry
486; CHECK-NEXT:    vldrb.u32 q0, [r0, #3]!
487; CHECK-NEXT:    vstrw.32 q0, [r1]
488; CHECK-NEXT:    bx lr
489entry:
490  %z = getelementptr inbounds i8, i8* %x, i32 3
491  %0 = bitcast i8* %z to <4 x i8>*
492  %1 = load <4 x i8>, <4 x i8>* %0, align 1
493  %2 = zext <4 x i8> %1 to <4 x i32>
494  %3 = bitcast i8* %y to <4 x i32>*
495  store <4 x i32> %2, <4 x i32>* %3, align 4
496  ret i8* %z
497}
498
499define i8* @ldrbu32_127(i8* %x, i8* %y) {
500; CHECK-LABEL: ldrbu32_127:
501; CHECK:       @ %bb.0: @ %entry
502; CHECK-NEXT:    vldrb.u32 q0, [r0, #127]!
503; CHECK-NEXT:    vstrw.32 q0, [r1]
504; CHECK-NEXT:    bx lr
505entry:
506  %z = getelementptr inbounds i8, i8* %x, i32 127
507  %0 = bitcast i8* %z to <4 x i8>*
508  %1 = load <4 x i8>, <4 x i8>* %0, align 1
509  %2 = zext <4 x i8> %1 to <4 x i32>
510  %3 = bitcast i8* %y to <4 x i32>*
511  store <4 x i32> %2, <4 x i32>* %3, align 4
512  ret i8* %z
513}
514
515define i8* @ldrbu32_128(i8* %x, i8* %y) {
516; CHECK-LABEL: ldrbu32_128:
517; CHECK:       @ %bb.0: @ %entry
518; CHECK-NEXT:    adds r0, #128
519; CHECK-NEXT:    vldrb.u32 q0, [r0]
520; CHECK-NEXT:    vstrw.32 q0, [r1]
521; CHECK-NEXT:    bx lr
522entry:
523  %z = getelementptr inbounds i8, i8* %x, i32 128
524  %0 = bitcast i8* %z to <4 x i8>*
525  %1 = load <4 x i8>, <4 x i8>* %0, align 1
526  %2 = zext <4 x i8> %1 to <4 x i32>
527  %3 = bitcast i8* %y to <4 x i32>*
528  store <4 x i32> %2, <4 x i32>* %3, align 4
529  ret i8* %z
530}
531
532define i8* @ldrbu32_m127(i8* %x, i8* %y) {
533; CHECK-LABEL: ldrbu32_m127:
534; CHECK:       @ %bb.0: @ %entry
535; CHECK-NEXT:    vldrb.u32 q0, [r0, #-127]!
536; CHECK-NEXT:    vstrw.32 q0, [r1]
537; CHECK-NEXT:    bx lr
538entry:
539  %z = getelementptr inbounds i8, i8* %x, i32 -127
540  %0 = bitcast i8* %z to <4 x i8>*
541  %1 = load <4 x i8>, <4 x i8>* %0, align 1
542  %2 = zext <4 x i8> %1 to <4 x i32>
543  %3 = bitcast i8* %y to <4 x i32>*
544  store <4 x i32> %2, <4 x i32>* %3, align 4
545  ret i8* %z
546}
547
548define i8* @ldrbu32_m128(i8* %x, i8* %y) {
549; CHECK-LABEL: ldrbu32_m128:
550; CHECK:       @ %bb.0: @ %entry
551; CHECK-NEXT:    subs r0, #128
552; CHECK-NEXT:    vldrb.u32 q0, [r0]
553; CHECK-NEXT:    vstrw.32 q0, [r1]
554; CHECK-NEXT:    bx lr
555entry:
556  %z = getelementptr inbounds i8, i8* %x, i32 -128
557  %0 = bitcast i8* %z to <4 x i8>*
558  %1 = load <4 x i8>, <4 x i8>* %0, align 1
559  %2 = zext <4 x i8> %1 to <4 x i32>
560  %3 = bitcast i8* %y to <4 x i32>*
561  store <4 x i32> %2, <4 x i32>* %3, align 4
562  ret i8* %z
563}
564
565
566define i8* @ldrbs32_4(i8* %x, i8* %y) {
567; CHECK-LABEL: ldrbs32_4:
568; CHECK:       @ %bb.0: @ %entry
569; CHECK-NEXT:    vldrb.s32 q0, [r0, #4]!
570; CHECK-NEXT:    vstrw.32 q0, [r1]
571; CHECK-NEXT:    bx lr
572entry:
573  %z = getelementptr inbounds i8, i8* %x, i32 4
574  %0 = bitcast i8* %z to <4 x i8>*
575  %1 = load <4 x i8>, <4 x i8>* %0, align 1
576  %2 = sext <4 x i8> %1 to <4 x i32>
577  %3 = bitcast i8* %y to <4 x i32>*
578  store <4 x i32> %2, <4 x i32>* %3, align 4
579  ret i8* %z
580}
581
582define i8* @ldrbs32_3(i8* %x, i8* %y) {
583; CHECK-LABEL: ldrbs32_3:
584; CHECK:       @ %bb.0: @ %entry
585; CHECK-NEXT:    vldrb.s32 q0, [r0, #3]!
586; CHECK-NEXT:    vstrw.32 q0, [r1]
587; CHECK-NEXT:    bx lr
588entry:
589  %z = getelementptr inbounds i8, i8* %x, i32 3
590  %0 = bitcast i8* %z to <4 x i8>*
591  %1 = load <4 x i8>, <4 x i8>* %0, align 1
592  %2 = sext <4 x i8> %1 to <4 x i32>
593  %3 = bitcast i8* %y to <4 x i32>*
594  store <4 x i32> %2, <4 x i32>* %3, align 4
595  ret i8* %z
596}
597
598define i8* @ldrbs32_127(i8* %x, i8* %y) {
599; CHECK-LABEL: ldrbs32_127:
600; CHECK:       @ %bb.0: @ %entry
601; CHECK-NEXT:    vldrb.s32 q0, [r0, #127]!
602; CHECK-NEXT:    vstrw.32 q0, [r1]
603; CHECK-NEXT:    bx lr
604entry:
605  %z = getelementptr inbounds i8, i8* %x, i32 127
606  %0 = bitcast i8* %z to <4 x i8>*
607  %1 = load <4 x i8>, <4 x i8>* %0, align 1
608  %2 = sext <4 x i8> %1 to <4 x i32>
609  %3 = bitcast i8* %y to <4 x i32>*
610  store <4 x i32> %2, <4 x i32>* %3, align 4
611  ret i8* %z
612}
613
614define i8* @ldrbs32_128(i8* %x, i8* %y) {
615; CHECK-LABEL: ldrbs32_128:
616; CHECK:       @ %bb.0: @ %entry
617; CHECK-NEXT:    adds r0, #128
618; CHECK-NEXT:    vldrb.s32 q0, [r0]
619; CHECK-NEXT:    vstrw.32 q0, [r1]
620; CHECK-NEXT:    bx lr
621entry:
622  %z = getelementptr inbounds i8, i8* %x, i32 128
623  %0 = bitcast i8* %z to <4 x i8>*
624  %1 = load <4 x i8>, <4 x i8>* %0, align 1
625  %2 = sext <4 x i8> %1 to <4 x i32>
626  %3 = bitcast i8* %y to <4 x i32>*
627  store <4 x i32> %2, <4 x i32>* %3, align 4
628  ret i8* %z
629}
630
631define i8* @ldrbs32_m127(i8* %x, i8* %y) {
632; CHECK-LABEL: ldrbs32_m127:
633; CHECK:       @ %bb.0: @ %entry
634; CHECK-NEXT:    vldrb.s32 q0, [r0, #-127]!
635; CHECK-NEXT:    vstrw.32 q0, [r1]
636; CHECK-NEXT:    bx lr
637entry:
638  %z = getelementptr inbounds i8, i8* %x, i32 -127
639  %0 = bitcast i8* %z to <4 x i8>*
640  %1 = load <4 x i8>, <4 x i8>* %0, align 1
641  %2 = sext <4 x i8> %1 to <4 x i32>
642  %3 = bitcast i8* %y to <4 x i32>*
643  store <4 x i32> %2, <4 x i32>* %3, align 4
644  ret i8* %z
645}
646
647define i8* @ldrbs32_m128(i8* %x, i8* %y) {
648; CHECK-LABEL: ldrbs32_m128:
649; CHECK:       @ %bb.0: @ %entry
650; CHECK-NEXT:    subs r0, #128
651; CHECK-NEXT:    vldrb.s32 q0, [r0]
652; CHECK-NEXT:    vstrw.32 q0, [r1]
653; CHECK-NEXT:    bx lr
654entry:
655  %z = getelementptr inbounds i8, i8* %x, i32 -128
656  %0 = bitcast i8* %z to <4 x i8>*
657  %1 = load <4 x i8>, <4 x i8>* %0, align 1
658  %2 = sext <4 x i8> %1 to <4 x i32>
659  %3 = bitcast i8* %y to <4 x i32>*
660  store <4 x i32> %2, <4 x i32>* %3, align 4
661  ret i8* %z
662}
663
664
665define i8* @ldrbu16_4(i8* %x, i8* %y) {
666; CHECK-LABEL: ldrbu16_4:
667; CHECK:       @ %bb.0: @ %entry
668; CHECK-NEXT:    vldrb.u16 q0, [r0, #4]!
669; CHECK-NEXT:    vstrh.16 q0, [r1]
670; CHECK-NEXT:    bx lr
671entry:
672  %z = getelementptr inbounds i8, i8* %x, i32 4
673  %0 = bitcast i8* %z to <8 x i8>*
674  %1 = load <8 x i8>, <8 x i8>* %0, align 1
675  %2 = zext <8 x i8> %1 to <8 x i16>
676  %3 = bitcast i8* %y to <8 x i16>*
677  store <8 x i16> %2, <8 x i16>* %3, align 2
678  ret i8* %z
679}
680
681define i8* @ldrbu16_3(i8* %x, i8* %y) {
682; CHECK-LABEL: ldrbu16_3:
683; CHECK:       @ %bb.0: @ %entry
684; CHECK-NEXT:    vldrb.u16 q0, [r0, #3]!
685; CHECK-NEXT:    vstrh.16 q0, [r1]
686; CHECK-NEXT:    bx lr
687entry:
688  %z = getelementptr inbounds i8, i8* %x, i32 3
689  %0 = bitcast i8* %z to <8 x i8>*
690  %1 = load <8 x i8>, <8 x i8>* %0, align 1
691  %2 = zext <8 x i8> %1 to <8 x i16>
692  %3 = bitcast i8* %y to <8 x i16>*
693  store <8 x i16> %2, <8 x i16>* %3, align 2
694  ret i8* %z
695}
696
697define i8* @ldrbu16_127(i8* %x, i8* %y) {
698; CHECK-LABEL: ldrbu16_127:
699; CHECK:       @ %bb.0: @ %entry
700; CHECK-NEXT:    vldrb.u16 q0, [r0, #127]!
701; CHECK-NEXT:    vstrh.16 q0, [r1]
702; CHECK-NEXT:    bx lr
703entry:
704  %z = getelementptr inbounds i8, i8* %x, i32 127
705  %0 = bitcast i8* %z to <8 x i8>*
706  %1 = load <8 x i8>, <8 x i8>* %0, align 1
707  %2 = zext <8 x i8> %1 to <8 x i16>
708  %3 = bitcast i8* %y to <8 x i16>*
709  store <8 x i16> %2, <8 x i16>* %3, align 2
710  ret i8* %z
711}
712
713define i8* @ldrbu16_128(i8* %x, i8* %y) {
714; CHECK-LABEL: ldrbu16_128:
715; CHECK:       @ %bb.0: @ %entry
716; CHECK-NEXT:    adds r0, #128
717; CHECK-NEXT:    vldrb.u16 q0, [r0]
718; CHECK-NEXT:    vstrh.16 q0, [r1]
719; CHECK-NEXT:    bx lr
720entry:
721  %z = getelementptr inbounds i8, i8* %x, i32 128
722  %0 = bitcast i8* %z to <8 x i8>*
723  %1 = load <8 x i8>, <8 x i8>* %0, align 1
724  %2 = zext <8 x i8> %1 to <8 x i16>
725  %3 = bitcast i8* %y to <8 x i16>*
726  store <8 x i16> %2, <8 x i16>* %3, align 2
727  ret i8* %z
728}
729
730define i8* @ldrbu16_m127(i8* %x, i8* %y) {
731; CHECK-LABEL: ldrbu16_m127:
732; CHECK:       @ %bb.0: @ %entry
733; CHECK-NEXT:    vldrb.u16 q0, [r0, #-127]!
734; CHECK-NEXT:    vstrh.16 q0, [r1]
735; CHECK-NEXT:    bx lr
736entry:
737  %z = getelementptr inbounds i8, i8* %x, i32 -127
738  %0 = bitcast i8* %z to <8 x i8>*
739  %1 = load <8 x i8>, <8 x i8>* %0, align 1
740  %2 = zext <8 x i8> %1 to <8 x i16>
741  %3 = bitcast i8* %y to <8 x i16>*
742  store <8 x i16> %2, <8 x i16>* %3, align 2
743  ret i8* %z
744}
745
746define i8* @ldrbu16_m128(i8* %x, i8* %y) {
747; CHECK-LABEL: ldrbu16_m128:
748; CHECK:       @ %bb.0: @ %entry
749; CHECK-NEXT:    subs r0, #128
750; CHECK-NEXT:    vldrb.u16 q0, [r0]
751; CHECK-NEXT:    vstrh.16 q0, [r1]
752; CHECK-NEXT:    bx lr
753entry:
754  %z = getelementptr inbounds i8, i8* %x, i32 -128
755  %0 = bitcast i8* %z to <8 x i8>*
756  %1 = load <8 x i8>, <8 x i8>* %0, align 1
757  %2 = zext <8 x i8> %1 to <8 x i16>
758  %3 = bitcast i8* %y to <8 x i16>*
759  store <8 x i16> %2, <8 x i16>* %3, align 2
760  ret i8* %z
761}
762
763
764define i8* @ldrbs16_4(i8* %x, i8* %y) {
765; CHECK-LABEL: ldrbs16_4:
766; CHECK:       @ %bb.0: @ %entry
767; CHECK-NEXT:    vldrb.s16 q0, [r0, #4]!
768; CHECK-NEXT:    vstrh.16 q0, [r1]
769; CHECK-NEXT:    bx lr
770entry:
771  %z = getelementptr inbounds i8, i8* %x, i32 4
772  %0 = bitcast i8* %z to <8 x i8>*
773  %1 = load <8 x i8>, <8 x i8>* %0, align 1
774  %2 = sext <8 x i8> %1 to <8 x i16>
775  %3 = bitcast i8* %y to <8 x i16>*
776  store <8 x i16> %2, <8 x i16>* %3, align 2
777  ret i8* %z
778}
779
780define i8* @ldrbs16_3(i8* %x, i8* %y) {
781; CHECK-LABEL: ldrbs16_3:
782; CHECK:       @ %bb.0: @ %entry
783; CHECK-NEXT:    vldrb.s16 q0, [r0, #3]!
784; CHECK-NEXT:    vstrh.16 q0, [r1]
785; CHECK-NEXT:    bx lr
786entry:
787  %z = getelementptr inbounds i8, i8* %x, i32 3
788  %0 = bitcast i8* %z to <8 x i8>*
789  %1 = load <8 x i8>, <8 x i8>* %0, align 1
790  %2 = sext <8 x i8> %1 to <8 x i16>
791  %3 = bitcast i8* %y to <8 x i16>*
792  store <8 x i16> %2, <8 x i16>* %3, align 2
793  ret i8* %z
794}
795
796define i8* @ldrbs16_127(i8* %x, i8* %y) {
797; CHECK-LABEL: ldrbs16_127:
798; CHECK:       @ %bb.0: @ %entry
799; CHECK-NEXT:    vldrb.s16 q0, [r0, #127]!
800; CHECK-NEXT:    vstrh.16 q0, [r1]
801; CHECK-NEXT:    bx lr
802entry:
803  %z = getelementptr inbounds i8, i8* %x, i32 127
804  %0 = bitcast i8* %z to <8 x i8>*
805  %1 = load <8 x i8>, <8 x i8>* %0, align 1
806  %2 = sext <8 x i8> %1 to <8 x i16>
807  %3 = bitcast i8* %y to <8 x i16>*
808  store <8 x i16> %2, <8 x i16>* %3, align 2
809  ret i8* %z
810}
811
812define i8* @ldrbs16_128(i8* %x, i8* %y) {
813; CHECK-LABEL: ldrbs16_128:
814; CHECK:       @ %bb.0: @ %entry
815; CHECK-NEXT:    adds r0, #128
816; CHECK-NEXT:    vldrb.s16 q0, [r0]
817; CHECK-NEXT:    vstrh.16 q0, [r1]
818; CHECK-NEXT:    bx lr
819entry:
820  %z = getelementptr inbounds i8, i8* %x, i32 128
821  %0 = bitcast i8* %z to <8 x i8>*
822  %1 = load <8 x i8>, <8 x i8>* %0, align 1
823  %2 = sext <8 x i8> %1 to <8 x i16>
824  %3 = bitcast i8* %y to <8 x i16>*
825  store <8 x i16> %2, <8 x i16>* %3, align 2
826  ret i8* %z
827}
828
829define i8* @ldrbs16_m127(i8* %x, i8* %y) {
830; CHECK-LABEL: ldrbs16_m127:
831; CHECK:       @ %bb.0: @ %entry
832; CHECK-NEXT:    vldrb.s16 q0, [r0, #-127]!
833; CHECK-NEXT:    vstrh.16 q0, [r1]
834; CHECK-NEXT:    bx lr
835entry:
836  %z = getelementptr inbounds i8, i8* %x, i32 -127
837  %0 = bitcast i8* %z to <8 x i8>*
838  %1 = load <8 x i8>, <8 x i8>* %0, align 1
839  %2 = sext <8 x i8> %1 to <8 x i16>
840  %3 = bitcast i8* %y to <8 x i16>*
841  store <8 x i16> %2, <8 x i16>* %3, align 2
842  ret i8* %z
843}
844
845define i8* @ldrbs16_m128(i8* %x, i8* %y) {
846; CHECK-LABEL: ldrbs16_m128:
847; CHECK:       @ %bb.0: @ %entry
848; CHECK-NEXT:    subs r0, #128
849; CHECK-NEXT:    vldrb.s16 q0, [r0]
850; CHECK-NEXT:    vstrh.16 q0, [r1]
851; CHECK-NEXT:    bx lr
852entry:
853  %z = getelementptr inbounds i8, i8* %x, i32 -128
854  %0 = bitcast i8* %z to <8 x i8>*
855  %1 = load <8 x i8>, <8 x i8>* %0, align 1
856  %2 = sext <8 x i8> %1 to <8 x i16>
857  %3 = bitcast i8* %y to <8 x i16>*
858  store <8 x i16> %2, <8 x i16>* %3, align 2
859  ret i8* %z
860}
861
862
863define i8* @ldrbu8_4(i8* %x, i8* %y) {
864; CHECK-LABEL: ldrbu8_4:
865; CHECK:       @ %bb.0: @ %entry
866; CHECK-NEXT:    vldrb.u8 q0, [r0, #4]!
867; CHECK-NEXT:    vstrb.8 q0, [r1]
868; CHECK-NEXT:    bx lr
869entry:
870  %z = getelementptr inbounds i8, i8* %x, i32 4
871  %0 = bitcast i8* %z to <16 x i8>*
872  %1 = load <16 x i8>, <16 x i8>* %0, align 1
873  %2 = bitcast i8* %y to <16 x i8>*
874  store <16 x i8> %1, <16 x i8>* %2, align 1
875  ret i8* %z
876}
877
878define i8* @ldrbu8_3(i8* %x, i8* %y) {
879; CHECK-LABEL: ldrbu8_3:
880; CHECK:       @ %bb.0: @ %entry
881; CHECK-NEXT:    vldrb.u8 q0, [r0, #3]!
882; CHECK-NEXT:    vstrb.8 q0, [r1]
883; CHECK-NEXT:    bx lr
884entry:
885  %z = getelementptr inbounds i8, i8* %x, i32 3
886  %0 = bitcast i8* %z to <16 x i8>*
887  %1 = load <16 x i8>, <16 x i8>* %0, align 1
888  %2 = bitcast i8* %y to <16 x i8>*
889  store <16 x i8> %1, <16 x i8>* %2, align 1
890  ret i8* %z
891}
892
893define i8* @ldrbu8_127(i8* %x, i8* %y) {
894; CHECK-LABEL: ldrbu8_127:
895; CHECK:       @ %bb.0: @ %entry
896; CHECK-NEXT:    vldrb.u8 q0, [r0, #127]!
897; CHECK-NEXT:    vstrb.8 q0, [r1]
898; CHECK-NEXT:    bx lr
899entry:
900  %z = getelementptr inbounds i8, i8* %x, i32 127
901  %0 = bitcast i8* %z to <16 x i8>*
902  %1 = load <16 x i8>, <16 x i8>* %0, align 1
903  %2 = bitcast i8* %y to <16 x i8>*
904  store <16 x i8> %1, <16 x i8>* %2, align 1
905  ret i8* %z
906}
907
908define i8* @ldrbu8_128(i8* %x, i8* %y) {
909; CHECK-LABEL: ldrbu8_128:
910; CHECK:       @ %bb.0: @ %entry
911; CHECK-NEXT:    adds r0, #128
912; CHECK-NEXT:    vldrb.u8 q0, [r0]
913; CHECK-NEXT:    vstrb.8 q0, [r1]
914; CHECK-NEXT:    bx lr
915entry:
916  %z = getelementptr inbounds i8, i8* %x, i32 128
917  %0 = bitcast i8* %z to <16 x i8>*
918  %1 = load <16 x i8>, <16 x i8>* %0, align 1
919  %2 = bitcast i8* %y to <16 x i8>*
920  store <16 x i8> %1, <16 x i8>* %2, align 1
921  ret i8* %z
922}
923
924define i8* @ldrbu8_m127(i8* %x, i8* %y) {
925; CHECK-LABEL: ldrbu8_m127:
926; CHECK:       @ %bb.0: @ %entry
927; CHECK-NEXT:    vldrb.u8 q0, [r0, #-127]!
928; CHECK-NEXT:    vstrb.8 q0, [r1]
929; CHECK-NEXT:    bx lr
930entry:
931  %z = getelementptr inbounds i8, i8* %x, i32 -127
932  %0 = bitcast i8* %z to <16 x i8>*
933  %1 = load <16 x i8>, <16 x i8>* %0, align 1
934  %2 = bitcast i8* %y to <16 x i8>*
935  store <16 x i8> %1, <16 x i8>* %2, align 1
936  ret i8* %z
937}
938
939define i8* @ldrbu8_m128(i8* %x, i8* %y) {
940; CHECK-LABEL: ldrbu8_m128:
941; CHECK:       @ %bb.0: @ %entry
942; CHECK-NEXT:    subs r0, #128
943; CHECK-NEXT:    vldrb.u8 q0, [r0]
944; CHECK-NEXT:    vstrb.8 q0, [r1]
945; CHECK-NEXT:    bx lr
946entry:
947  %z = getelementptr inbounds i8, i8* %x, i32 -128
948  %0 = bitcast i8* %z to <16 x i8>*
949  %1 = load <16 x i8>, <16 x i8>* %0, align 1
950  %2 = bitcast i8* %y to <16 x i8>*
951  store <16 x i8> %1, <16 x i8>* %2, align 1
952  ret i8* %z
953}
954
955
956define i8* @ldrwf32_4(i8* %x, i8* %y) {
957; CHECK-LABEL: ldrwf32_4:
958; CHECK:       @ %bb.0: @ %entry
959; CHECK-NEXT:    vldrw.u32 q0, [r0, #4]!
960; CHECK-NEXT:    vstrw.32 q0, [r1]
961; CHECK-NEXT:    bx lr
962entry:
963  %z = getelementptr inbounds i8, i8* %x, i32 4
964  %0 = bitcast i8* %z to <4 x float>*
965  %1 = load <4 x float>, <4 x float>* %0, align 4
966  %2 = bitcast i8* %y to <4 x float>*
967  store <4 x float> %1, <4 x float>* %2, align 4
968  ret i8* %z
969}
970
971define i8* @ldrwf16_4(i8* %x, i8* %y) {
972; CHECK-LABEL: ldrwf16_4:
973; CHECK:       @ %bb.0: @ %entry
974; CHECK-NEXT:    vldrh.u16 q0, [r0, #4]!
975; CHECK-NEXT:    vstrh.16 q0, [r1]
976; CHECK-NEXT:    bx lr
977entry:
978  %z = getelementptr inbounds i8, i8* %x, i32 4
979  %0 = bitcast i8* %z to <8 x half>*
980  %1 = load <8 x half>, <8 x half>* %0, align 2
981  %2 = bitcast i8* %y to <8 x half>*
982  store <8 x half> %1, <8 x half>* %2, align 2
983  ret i8* %z
984}
985
986define i8* @ldrwi32_align1(i8* %x, i8* %y) {
987; CHECK-LE-LABEL: ldrwi32_align1:
988; CHECK-LE:       @ %bb.0: @ %entry
989; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]!
990; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
991; CHECK-LE-NEXT:    bx lr
992;
993; CHECK-BE-LABEL: ldrwi32_align1:
994; CHECK-BE:       @ %bb.0: @ %entry
995; CHECK-BE-NEXT:    vldrb.u8 q0, [r0, #3]
996; CHECK-BE-NEXT:    adds r0, #3
997; CHECK-BE-NEXT:    vrev32.8 q0, q0
998; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
999; CHECK-BE-NEXT:    bx lr
1000entry:
1001  %z = getelementptr inbounds i8, i8* %x, i32 3
1002  %0 = bitcast i8* %z to <4 x i32>*
1003  %1 = load <4 x i32>, <4 x i32>* %0, align 1
1004  %2 = bitcast i8* %y to <4 x i32>*
1005  store <4 x i32> %1, <4 x i32>* %2, align 4
1006  ret i8* %z
1007}
1008
1009define i8* @ldrhi16_align1(i8* %x, i8* %y) {
1010; CHECK-LE-LABEL: ldrhi16_align1:
1011; CHECK-LE:       @ %bb.0: @ %entry
1012; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]!
1013; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
1014; CHECK-LE-NEXT:    bx lr
1015;
1016; CHECK-BE-LABEL: ldrhi16_align1:
1017; CHECK-BE:       @ %bb.0: @ %entry
1018; CHECK-BE-NEXT:    vldrb.u8 q0, [r0, #3]
1019; CHECK-BE-NEXT:    adds r0, #3
1020; CHECK-BE-NEXT:    vrev16.8 q0, q0
1021; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
1022; CHECK-BE-NEXT:    bx lr
1023entry:
1024  %z = getelementptr inbounds i8, i8* %x, i32 3
1025  %0 = bitcast i8* %z to <8 x i16>*
1026  %1 = load <8 x i16>, <8 x i16>* %0, align 1
1027  %2 = bitcast i8* %y to <8 x i16>*
1028  store <8 x i16> %1, <8 x i16>* %2, align 2
1029  ret i8* %z
1030}
1031
1032define i8* @ldrhi32_align1(i8* %x, i8* %y) {
1033; CHECK-LABEL: ldrhi32_align1:
1034; CHECK:       @ %bb.0: @ %entry
1035; CHECK-NEXT:    .pad #8
1036; CHECK-NEXT:    sub sp, #8
1037; CHECK-NEXT:    ldr r2, [r0, #3]!
1038; CHECK-NEXT:    str r2, [sp]
1039; CHECK-NEXT:    ldr r2, [r0, #4]
1040; CHECK-NEXT:    str r2, [sp, #4]
1041; CHECK-NEXT:    mov r2, sp
1042; CHECK-NEXT:    vldrh.s32 q0, [r2]
1043; CHECK-NEXT:    vstrw.32 q0, [r1]
1044; CHECK-NEXT:    add sp, #8
1045; CHECK-NEXT:    bx lr
1046entry:
1047  %z = getelementptr inbounds i8, i8* %x, i32 3
1048  %0 = bitcast i8* %z to <4 x i16>*
1049  %1 = load <4 x i16>, <4 x i16>* %0, align 1
1050  %2 = bitcast i8* %y to <4 x i32>*
1051  %3 = sext <4 x i16> %1 to <4 x i32>
1052  store <4 x i32> %3, <4 x i32>* %2, align 4
1053  ret i8* %z
1054}
1055
1056define i8* @ldrf32_align1(i8* %x, i8* %y) {
1057; CHECK-LE-LABEL: ldrf32_align1:
1058; CHECK-LE:       @ %bb.0: @ %entry
1059; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]!
1060; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
1061; CHECK-LE-NEXT:    bx lr
1062;
1063; CHECK-BE-LABEL: ldrf32_align1:
1064; CHECK-BE:       @ %bb.0: @ %entry
1065; CHECK-BE-NEXT:    vldrb.u8 q0, [r0, #3]
1066; CHECK-BE-NEXT:    adds r0, #3
1067; CHECK-BE-NEXT:    vrev32.8 q0, q0
1068; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
1069; CHECK-BE-NEXT:    bx lr
1070entry:
1071  %z = getelementptr inbounds i8, i8* %x, i32 3
1072  %0 = bitcast i8* %z to <4 x float>*
1073  %1 = load <4 x float>, <4 x float>* %0, align 1
1074  %2 = bitcast i8* %y to <4 x float>*
1075  store <4 x float> %1, <4 x float>* %2, align 4
1076  ret i8* %z
1077}
1078
1079define i8* @ldrf16_align1(i8* %x, i8* %y) {
1080; CHECK-LE-LABEL: ldrf16_align1:
1081; CHECK-LE:       @ %bb.0: @ %entry
1082; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]!
1083; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
1084; CHECK-LE-NEXT:    bx lr
1085;
1086; CHECK-BE-LABEL: ldrf16_align1:
1087; CHECK-BE:       @ %bb.0: @ %entry
1088; CHECK-BE-NEXT:    vldrb.u8 q0, [r0, #3]
1089; CHECK-BE-NEXT:    adds r0, #3
1090; CHECK-BE-NEXT:    vrev16.8 q0, q0
1091; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
1092; CHECK-BE-NEXT:    bx lr
1093entry:
1094  %z = getelementptr inbounds i8, i8* %x, i32 3
1095  %0 = bitcast i8* %z to <8 x half>*
1096  %1 = load <8 x half>, <8 x half>* %0, align 1
1097  %2 = bitcast i8* %y to <8 x half>*
1098  store <8 x half> %1, <8 x half>* %2, align 2
1099  ret i8* %z
1100}
1101
1102define i8* @ldrh16_align8(i8* %x, i8* %y) {
1103; CHECK-LE-LABEL: ldrh16_align8:
1104; CHECK-LE:       @ %bb.0: @ %entry
1105; CHECK-LE-NEXT:    vldrw.u32 q0, [r0, #4]!
1106; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
1107; CHECK-LE-NEXT:    bx lr
1108;
1109; CHECK-BE-LABEL: ldrh16_align8:
1110; CHECK-BE:       @ %bb.0: @ %entry
1111; CHECK-BE-NEXT:    vldrh.u16 q0, [r0, #4]!
1112; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
1113; CHECK-BE-NEXT:    bx lr
1114entry:
1115  %z = getelementptr inbounds i8, i8* %x, i32 4
1116  %0 = bitcast i8* %z to <8 x i16>*
1117  %1 = load <8 x i16>, <8 x i16>* %0, align 8
1118  %2 = bitcast i8* %y to <8 x i16>*
1119  store <8 x i16> %1, <8 x i16>* %2, align 2
1120  ret i8* %z
1121}
1122
1123
1124
1125
1126
1127define i8* @strw32_4(i8* %y, i8* %x) {
1128; CHECK-LE-LABEL: strw32_4:
1129; CHECK-LE:       @ %bb.0: @ %entry
1130; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
1131; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #4]!
1132; CHECK-LE-NEXT:    bx lr
1133;
1134; CHECK-BE-LABEL: strw32_4:
1135; CHECK-BE:       @ %bb.0: @ %entry
1136; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
1137; CHECK-BE-NEXT:    vstrw.32 q0, [r0, #4]!
1138; CHECK-BE-NEXT:    bx lr
1139entry:
1140  %z = getelementptr inbounds i8, i8* %y, i32 4
1141  %0 = bitcast i8* %x to <4 x i32>*
1142  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1143  %2 = bitcast i8* %z to <4 x i32>*
1144  store <4 x i32> %1, <4 x i32>* %2, align 4
1145  ret i8* %z
1146}
1147
1148define i8* @strw32_3(i8* %y, i8* %x) {
1149; CHECK-LE-LABEL: strw32_3:
1150; CHECK-LE:       @ %bb.0: @ %entry
1151; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
1152; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]!
1153; CHECK-LE-NEXT:    bx lr
1154;
1155; CHECK-BE-LABEL: strw32_3:
1156; CHECK-BE:       @ %bb.0: @ %entry
1157; CHECK-BE-NEXT:    adds r0, #3
1158; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
1159; CHECK-BE-NEXT:    vstrw.32 q0, [r0]
1160; CHECK-BE-NEXT:    bx lr
1161entry:
1162  %z = getelementptr inbounds i8, i8* %y, i32 3
1163  %0 = bitcast i8* %x to <4 x i32>*
1164  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1165  %2 = bitcast i8* %z to <4 x i32>*
1166  store <4 x i32> %1, <4 x i32>* %2, align 4
1167  ret i8* %z
1168}
1169
1170define i8* @strw32_m4(i8* %y, i8* %x) {
1171; CHECK-LE-LABEL: strw32_m4:
1172; CHECK-LE:       @ %bb.0: @ %entry
1173; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
1174; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #-4]!
1175; CHECK-LE-NEXT:    bx lr
1176;
1177; CHECK-BE-LABEL: strw32_m4:
1178; CHECK-BE:       @ %bb.0: @ %entry
1179; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
1180; CHECK-BE-NEXT:    vstrw.32 q0, [r0, #-4]!
1181; CHECK-BE-NEXT:    bx lr
1182entry:
1183  %z = getelementptr inbounds i8, i8* %y, i32 -4
1184  %0 = bitcast i8* %x to <4 x i32>*
1185  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1186  %2 = bitcast i8* %z to <4 x i32>*
1187  store <4 x i32> %1, <4 x i32>* %2, align 4
1188  ret i8* %z
1189}
1190
1191define i8* @strw32_508(i8* %y, i8* %x) {
1192; CHECK-LABEL: strw32_508:
1193; CHECK:       @ %bb.0: @ %entry
1194; CHECK-NEXT:    vldrw.u32 q0, [r1]
1195; CHECK-NEXT:    vstrw.32 q0, [r0, #508]!
1196; CHECK-NEXT:    bx lr
1197entry:
1198  %z = getelementptr inbounds i8, i8* %y, i32 508
1199  %0 = bitcast i8* %x to <4 x i32>*
1200  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1201  %2 = bitcast i8* %z to <4 x i32>*
1202  store <4 x i32> %1, <4 x i32>* %2, align 4
1203  ret i8* %z
1204}
1205
1206define i8* @strw32_512(i8* %y, i8* %x) {
1207; CHECK-LABEL: strw32_512:
1208; CHECK:       @ %bb.0: @ %entry
1209; CHECK-NEXT:    add.w r0, r0, #512
1210; CHECK-NEXT:    vldrw.u32 q0, [r1]
1211; CHECK-NEXT:    vstrw.32 q0, [r0]
1212; CHECK-NEXT:    bx lr
1213entry:
1214  %z = getelementptr inbounds i8, i8* %y, i32 512
1215  %0 = bitcast i8* %x to <4 x i32>*
1216  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1217  %2 = bitcast i8* %z to <4 x i32>*
1218  store <4 x i32> %1, <4 x i32>* %2, align 4
1219  ret i8* %z
1220}
1221
1222define i8* @strw32_m508(i8* %y, i8* %x) {
1223; CHECK-LABEL: strw32_m508:
1224; CHECK:       @ %bb.0: @ %entry
1225; CHECK-NEXT:    vldrw.u32 q0, [r1]
1226; CHECK-NEXT:    vstrw.32 q0, [r0, #-508]!
1227; CHECK-NEXT:    bx lr
1228entry:
1229  %z = getelementptr inbounds i8, i8* %y, i32 -508
1230  %0 = bitcast i8* %x to <4 x i32>*
1231  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1232  %2 = bitcast i8* %z to <4 x i32>*
1233  store <4 x i32> %1, <4 x i32>* %2, align 4
1234  ret i8* %z
1235}
1236
1237define i8* @strw32_m512(i8* %y, i8* %x) {
1238; CHECK-LABEL: strw32_m512:
1239; CHECK:       @ %bb.0: @ %entry
1240; CHECK-NEXT:    sub.w r0, r0, #512
1241; CHECK-NEXT:    vldrw.u32 q0, [r1]
1242; CHECK-NEXT:    vstrw.32 q0, [r0]
1243; CHECK-NEXT:    bx lr
1244entry:
1245  %z = getelementptr inbounds i8, i8* %y, i32 -512
1246  %0 = bitcast i8* %x to <4 x i32>*
1247  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1248  %2 = bitcast i8* %z to <4 x i32>*
1249  store <4 x i32> %1, <4 x i32>* %2, align 4
1250  ret i8* %z
1251}
1252
1253
1254define i8* @strh32_4(i8* %y, i8* %x) {
1255; CHECK-LABEL: strh32_4:
1256; CHECK:       @ %bb.0: @ %entry
1257; CHECK-NEXT:    vldrh.u32 q0, [r1]
1258; CHECK-NEXT:    vstrh.32 q0, [r0, #4]!
1259; CHECK-NEXT:    bx lr
1260entry:
1261  %z = getelementptr inbounds i8, i8* %y, i32 4
1262  %0 = bitcast i8* %x to <4 x i16>*
1263  %1 = load <4 x i16>, <4 x i16>* %0, align 2
1264  %2 = bitcast i8* %z to <4 x i16>*
1265  store <4 x i16> %1, <4 x i16>* %2, align 2
1266  ret i8* %z
1267}
1268
1269define i8* @strh32_3(i8* %y, i8* %x) {
1270; CHECK-LABEL: strh32_3:
1271; CHECK:       @ %bb.0: @ %entry
1272; CHECK-NEXT:    adds r0, #3
1273; CHECK-NEXT:    vldrh.u32 q0, [r1]
1274; CHECK-NEXT:    vstrh.32 q0, [r0]
1275; CHECK-NEXT:    bx lr
1276entry:
1277  %z = getelementptr inbounds i8, i8* %y, i32 3
1278  %0 = bitcast i8* %x to <4 x i16>*
1279  %1 = load <4 x i16>, <4 x i16>* %0, align 2
1280  %2 = bitcast i8* %z to <4 x i16>*
1281  store <4 x i16> %1, <4 x i16>* %2, align 2
1282  ret i8* %z
1283}
1284
1285define i8* @strh32_2(i8* %y, i8* %x) {
1286; CHECK-LABEL: strh32_2:
1287; CHECK:       @ %bb.0: @ %entry
1288; CHECK-NEXT:    vldrh.u32 q0, [r1]
1289; CHECK-NEXT:    vstrh.32 q0, [r0, #2]!
1290; CHECK-NEXT:    bx lr
1291entry:
1292  %z = getelementptr inbounds i8, i8* %y, i32 2
1293  %0 = bitcast i8* %x to <4 x i16>*
1294  %1 = load <4 x i16>, <4 x i16>* %0, align 2
1295  %2 = bitcast i8* %z to <4 x i16>*
1296  store <4 x i16> %1, <4 x i16>* %2, align 2
1297  ret i8* %z
1298}
1299
1300define i8* @strh32_254(i8* %y, i8* %x) {
1301; CHECK-LABEL: strh32_254:
1302; CHECK:       @ %bb.0: @ %entry
1303; CHECK-NEXT:    vldrh.u32 q0, [r1]
1304; CHECK-NEXT:    vstrh.32 q0, [r0, #254]!
1305; CHECK-NEXT:    bx lr
1306entry:
1307  %z = getelementptr inbounds i8, i8* %y, i32 254
1308  %0 = bitcast i8* %x to <4 x i16>*
1309  %1 = load <4 x i16>, <4 x i16>* %0, align 2
1310  %2 = bitcast i8* %z to <4 x i16>*
1311  store <4 x i16> %1, <4 x i16>* %2, align 2
1312  ret i8* %z
1313}
1314
1315define i8* @strh32_256(i8* %y, i8* %x) {
1316; CHECK-LABEL: strh32_256:
1317; CHECK:       @ %bb.0: @ %entry
1318; CHECK-NEXT:    add.w r0, r0, #256
1319; CHECK-NEXT:    vldrh.u32 q0, [r1]
1320; CHECK-NEXT:    vstrh.32 q0, [r0]
1321; CHECK-NEXT:    bx lr
1322entry:
1323  %z = getelementptr inbounds i8, i8* %y, i32 256
1324  %0 = bitcast i8* %x to <4 x i16>*
1325  %1 = load <4 x i16>, <4 x i16>* %0, align 2
1326  %2 = bitcast i8* %z to <4 x i16>*
1327  store <4 x i16> %1, <4 x i16>* %2, align 2
1328  ret i8* %z
1329}
1330
1331define i8* @strh32_m254(i8* %y, i8* %x) {
1332; CHECK-LABEL: strh32_m254:
1333; CHECK:       @ %bb.0: @ %entry
1334; CHECK-NEXT:    vldrh.u32 q0, [r1]
1335; CHECK-NEXT:    vstrh.32 q0, [r0, #-254]!
1336; CHECK-NEXT:    bx lr
1337entry:
1338  %z = getelementptr inbounds i8, i8* %y, i32 -254
1339  %0 = bitcast i8* %x to <4 x i16>*
1340  %1 = load <4 x i16>, <4 x i16>* %0, align 2
1341  %2 = bitcast i8* %z to <4 x i16>*
1342  store <4 x i16> %1, <4 x i16>* %2, align 2
1343  ret i8* %z
1344}
1345
1346define i8* @strh32_m256(i8* %y, i8* %x) {
1347; CHECK-LABEL: strh32_m256:
1348; CHECK:       @ %bb.0: @ %entry
1349; CHECK-NEXT:    sub.w r0, r0, #256
1350; CHECK-NEXT:    vldrh.u32 q0, [r1]
1351; CHECK-NEXT:    vstrh.32 q0, [r0]
1352; CHECK-NEXT:    bx lr
1353entry:
1354  %z = getelementptr inbounds i8, i8* %y, i32 -256
1355  %0 = bitcast i8* %x to <4 x i16>*
1356  %1 = load <4 x i16>, <4 x i16>* %0, align 2
1357  %2 = bitcast i8* %z to <4 x i16>*
1358  store <4 x i16> %1, <4 x i16>* %2, align 2
1359  ret i8* %z
1360}
1361
1362
1363define i8* @strh16_4(i8* %y, i8* %x) {
1364; CHECK-LE-LABEL: strh16_4:
1365; CHECK-LE:       @ %bb.0: @ %entry
1366; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
1367; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #4]!
1368; CHECK-LE-NEXT:    bx lr
1369;
1370; CHECK-BE-LABEL: strh16_4:
1371; CHECK-BE:       @ %bb.0: @ %entry
1372; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
1373; CHECK-BE-NEXT:    vstrh.16 q0, [r0, #4]!
1374; CHECK-BE-NEXT:    bx lr
1375entry:
1376  %z = getelementptr inbounds i8, i8* %y, i32 4
1377  %0 = bitcast i8* %x to <8 x i16>*
1378  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1379  %2 = bitcast i8* %z to <8 x i16>*
1380  store <8 x i16> %1, <8 x i16>* %2, align 2
1381  ret i8* %z
1382}
1383
1384define i8* @strh16_3(i8* %y, i8* %x) {
1385; CHECK-LE-LABEL: strh16_3:
1386; CHECK-LE:       @ %bb.0: @ %entry
1387; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
1388; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]!
1389; CHECK-LE-NEXT:    bx lr
1390;
1391; CHECK-BE-LABEL: strh16_3:
1392; CHECK-BE:       @ %bb.0: @ %entry
1393; CHECK-BE-NEXT:    adds r0, #3
1394; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
1395; CHECK-BE-NEXT:    vstrh.16 q0, [r0]
1396; CHECK-BE-NEXT:    bx lr
1397entry:
1398  %z = getelementptr inbounds i8, i8* %y, i32 3
1399  %0 = bitcast i8* %x to <8 x i16>*
1400  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1401  %2 = bitcast i8* %z to <8 x i16>*
1402  store <8 x i16> %1, <8 x i16>* %2, align 2
1403  ret i8* %z
1404}
1405
1406define i8* @strh16_2(i8* %y, i8* %x) {
1407; CHECK-LE-LABEL: strh16_2:
1408; CHECK-LE:       @ %bb.0: @ %entry
1409; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
1410; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #2]!
1411; CHECK-LE-NEXT:    bx lr
1412;
1413; CHECK-BE-LABEL: strh16_2:
1414; CHECK-BE:       @ %bb.0: @ %entry
1415; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
1416; CHECK-BE-NEXT:    vstrh.16 q0, [r0, #2]!
1417; CHECK-BE-NEXT:    bx lr
1418entry:
1419  %z = getelementptr inbounds i8, i8* %y, i32 2
1420  %0 = bitcast i8* %x to <8 x i16>*
1421  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1422  %2 = bitcast i8* %z to <8 x i16>*
1423  store <8 x i16> %1, <8 x i16>* %2, align 2
1424  ret i8* %z
1425}
1426
1427define i8* @strh16_254(i8* %y, i8* %x) {
1428; CHECK-LABEL: strh16_254:
1429; CHECK:       @ %bb.0: @ %entry
1430; CHECK-NEXT:    vldrh.u16 q0, [r1]
1431; CHECK-NEXT:    vstrh.16 q0, [r0, #254]!
1432; CHECK-NEXT:    bx lr
1433entry:
1434  %z = getelementptr inbounds i8, i8* %y, i32 254
1435  %0 = bitcast i8* %x to <8 x i16>*
1436  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1437  %2 = bitcast i8* %z to <8 x i16>*
1438  store <8 x i16> %1, <8 x i16>* %2, align 2
1439  ret i8* %z
1440}
1441
1442define i8* @strh16_256(i8* %y, i8* %x) {
1443; CHECK-LABEL: strh16_256:
1444; CHECK:       @ %bb.0: @ %entry
1445; CHECK-NEXT:    add.w r0, r0, #256
1446; CHECK-NEXT:    vldrh.u16 q0, [r1]
1447; CHECK-NEXT:    vstrh.16 q0, [r0]
1448; CHECK-NEXT:    bx lr
1449entry:
1450  %z = getelementptr inbounds i8, i8* %y, i32 256
1451  %0 = bitcast i8* %x to <8 x i16>*
1452  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1453  %2 = bitcast i8* %z to <8 x i16>*
1454  store <8 x i16> %1, <8 x i16>* %2, align 2
1455  ret i8* %z
1456}
1457
1458define i8* @strh16_m254(i8* %y, i8* %x) {
1459; CHECK-LABEL: strh16_m254:
1460; CHECK:       @ %bb.0: @ %entry
1461; CHECK-NEXT:    vldrh.u16 q0, [r1]
1462; CHECK-NEXT:    vstrh.16 q0, [r0, #-254]!
1463; CHECK-NEXT:    bx lr
1464entry:
1465  %z = getelementptr inbounds i8, i8* %y, i32 -254
1466  %0 = bitcast i8* %x to <8 x i16>*
1467  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1468  %2 = bitcast i8* %z to <8 x i16>*
1469  store <8 x i16> %1, <8 x i16>* %2, align 2
1470  ret i8* %z
1471}
1472
1473define i8* @strh16_m256(i8* %y, i8* %x) {
1474; CHECK-LABEL: strh16_m256:
1475; CHECK:       @ %bb.0: @ %entry
1476; CHECK-NEXT:    sub.w r0, r0, #256
1477; CHECK-NEXT:    vldrh.u16 q0, [r1]
1478; CHECK-NEXT:    vstrh.16 q0, [r0]
1479; CHECK-NEXT:    bx lr
1480entry:
1481  %z = getelementptr inbounds i8, i8* %y, i32 -256
1482  %0 = bitcast i8* %x to <8 x i16>*
1483  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1484  %2 = bitcast i8* %z to <8 x i16>*
1485  store <8 x i16> %1, <8 x i16>* %2, align 2
1486  ret i8* %z
1487}
1488
1489
1490define i8* @strb32_4(i8* %y, i8* %x) {
1491; CHECK-LABEL: strb32_4:
1492; CHECK:       @ %bb.0: @ %entry
1493; CHECK-NEXT:    vldrb.u32 q0, [r1]
1494; CHECK-NEXT:    vstrb.32 q0, [r0, #4]!
1495; CHECK-NEXT:    bx lr
1496entry:
1497  %z = getelementptr inbounds i8, i8* %y, i32 4
1498  %0 = bitcast i8* %x to <4 x i8>*
1499  %1 = load <4 x i8>, <4 x i8>* %0, align 1
1500  %2 = bitcast i8* %z to <4 x i8>*
1501  store <4 x i8> %1, <4 x i8>* %2, align 1
1502  ret i8* %z
1503}
1504
1505define i8* @strb32_3(i8* %y, i8* %x) {
1506; CHECK-LABEL: strb32_3:
1507; CHECK:       @ %bb.0: @ %entry
1508; CHECK-NEXT:    vldrb.u32 q0, [r1]
1509; CHECK-NEXT:    vstrb.32 q0, [r0, #3]!
1510; CHECK-NEXT:    bx lr
1511entry:
1512  %z = getelementptr inbounds i8, i8* %y, i32 3
1513  %0 = bitcast i8* %x to <4 x i8>*
1514  %1 = load <4 x i8>, <4 x i8>* %0, align 1
1515  %2 = bitcast i8* %z to <4 x i8>*
1516  store <4 x i8> %1, <4 x i8>* %2, align 1
1517  ret i8* %z
1518}
1519
1520define i8* @strb32_127(i8* %y, i8* %x) {
1521; CHECK-LABEL: strb32_127:
1522; CHECK:       @ %bb.0: @ %entry
1523; CHECK-NEXT:    vldrb.u32 q0, [r1]
1524; CHECK-NEXT:    vstrb.32 q0, [r0, #127]!
1525; CHECK-NEXT:    bx lr
1526entry:
1527  %z = getelementptr inbounds i8, i8* %y, i32 127
1528  %0 = bitcast i8* %x to <4 x i8>*
1529  %1 = load <4 x i8>, <4 x i8>* %0, align 1
1530  %2 = bitcast i8* %z to <4 x i8>*
1531  store <4 x i8> %1, <4 x i8>* %2, align 1
1532  ret i8* %z
1533}
1534
1535define i8* @strb32_128(i8* %y, i8* %x) {
1536; CHECK-LABEL: strb32_128:
1537; CHECK:       @ %bb.0: @ %entry
1538; CHECK-NEXT:    adds r0, #128
1539; CHECK-NEXT:    vldrb.u32 q0, [r1]
1540; CHECK-NEXT:    vstrb.32 q0, [r0]
1541; CHECK-NEXT:    bx lr
1542entry:
1543  %z = getelementptr inbounds i8, i8* %y, i32 128
1544  %0 = bitcast i8* %x to <4 x i8>*
1545  %1 = load <4 x i8>, <4 x i8>* %0, align 1
1546  %2 = bitcast i8* %z to <4 x i8>*
1547  store <4 x i8> %1, <4 x i8>* %2, align 1
1548  ret i8* %z
1549}
1550
1551define i8* @strb32_m127(i8* %y, i8* %x) {
1552; CHECK-LABEL: strb32_m127:
1553; CHECK:       @ %bb.0: @ %entry
1554; CHECK-NEXT:    vldrb.u32 q0, [r1]
1555; CHECK-NEXT:    vstrb.32 q0, [r0, #-127]!
1556; CHECK-NEXT:    bx lr
1557entry:
1558  %z = getelementptr inbounds i8, i8* %y, i32 -127
1559  %0 = bitcast i8* %x to <4 x i8>*
1560  %1 = load <4 x i8>, <4 x i8>* %0, align 1
1561  %2 = bitcast i8* %z to <4 x i8>*
1562  store <4 x i8> %1, <4 x i8>* %2, align 1
1563  ret i8* %z
1564}
1565
1566define i8* @strb32_m128(i8* %y, i8* %x) {
1567; CHECK-LABEL: strb32_m128:
1568; CHECK:       @ %bb.0: @ %entry
1569; CHECK-NEXT:    subs r0, #128
1570; CHECK-NEXT:    vldrb.u32 q0, [r1]
1571; CHECK-NEXT:    vstrb.32 q0, [r0]
1572; CHECK-NEXT:    bx lr
1573entry:
1574  %z = getelementptr inbounds i8, i8* %y, i32 -128
1575  %0 = bitcast i8* %x to <4 x i8>*
1576  %1 = load <4 x i8>, <4 x i8>* %0, align 1
1577  %2 = bitcast i8* %z to <4 x i8>*
1578  store <4 x i8> %1, <4 x i8>* %2, align 1
1579  ret i8* %z
1580}
1581
1582
1583define i8* @strb16_4(i8* %y, i8* %x) {
1584; CHECK-LABEL: strb16_4:
1585; CHECK:       @ %bb.0: @ %entry
1586; CHECK-NEXT:    vldrb.u16 q0, [r1]
1587; CHECK-NEXT:    vstrb.16 q0, [r0, #4]!
1588; CHECK-NEXT:    bx lr
1589entry:
1590  %z = getelementptr inbounds i8, i8* %y, i32 4
1591  %0 = bitcast i8* %x to <8 x i8>*
1592  %1 = load <8 x i8>, <8 x i8>* %0, align 1
1593  %2 = bitcast i8* %z to <8 x i8>*
1594  store <8 x i8> %1, <8 x i8>* %2, align 1
1595  ret i8* %z
1596}
1597
1598define i8* @strb16_3(i8* %y, i8* %x) {
1599; CHECK-LABEL: strb16_3:
1600; CHECK:       @ %bb.0: @ %entry
1601; CHECK-NEXT:    vldrb.u16 q0, [r1]
1602; CHECK-NEXT:    vstrb.16 q0, [r0, #3]!
1603; CHECK-NEXT:    bx lr
1604entry:
1605  %z = getelementptr inbounds i8, i8* %y, i32 3
1606  %0 = bitcast i8* %x to <8 x i8>*
1607  %1 = load <8 x i8>, <8 x i8>* %0, align 1
1608  %2 = bitcast i8* %z to <8 x i8>*
1609  store <8 x i8> %1, <8 x i8>* %2, align 1
1610  ret i8* %z
1611}
1612
1613define i8* @strb16_127(i8* %y, i8* %x) {
1614; CHECK-LABEL: strb16_127:
1615; CHECK:       @ %bb.0: @ %entry
1616; CHECK-NEXT:    vldrb.u16 q0, [r1]
1617; CHECK-NEXT:    vstrb.16 q0, [r0, #127]!
1618; CHECK-NEXT:    bx lr
1619entry:
1620  %z = getelementptr inbounds i8, i8* %y, i32 127
1621  %0 = bitcast i8* %x to <8 x i8>*
1622  %1 = load <8 x i8>, <8 x i8>* %0, align 1
1623  %2 = bitcast i8* %z to <8 x i8>*
1624  store <8 x i8> %1, <8 x i8>* %2, align 1
1625  ret i8* %z
1626}
1627
1628define i8* @strb16_128(i8* %y, i8* %x) {
1629; CHECK-LABEL: strb16_128:
1630; CHECK:       @ %bb.0: @ %entry
1631; CHECK-NEXT:    adds r0, #128
1632; CHECK-NEXT:    vldrb.u16 q0, [r1]
1633; CHECK-NEXT:    vstrb.16 q0, [r0]
1634; CHECK-NEXT:    bx lr
1635entry:
1636  %z = getelementptr inbounds i8, i8* %y, i32 128
1637  %0 = bitcast i8* %x to <8 x i8>*
1638  %1 = load <8 x i8>, <8 x i8>* %0, align 1
1639  %2 = bitcast i8* %z to <8 x i8>*
1640  store <8 x i8> %1, <8 x i8>* %2, align 1
1641  ret i8* %z
1642}
1643
1644define i8* @strb16_m127(i8* %y, i8* %x) {
1645; CHECK-LABEL: strb16_m127:
1646; CHECK:       @ %bb.0: @ %entry
1647; CHECK-NEXT:    vldrb.u16 q0, [r1]
1648; CHECK-NEXT:    vstrb.16 q0, [r0, #-127]!
1649; CHECK-NEXT:    bx lr
1650entry:
1651  %z = getelementptr inbounds i8, i8* %y, i32 -127
1652  %0 = bitcast i8* %x to <8 x i8>*
1653  %1 = load <8 x i8>, <8 x i8>* %0, align 1
1654  %2 = bitcast i8* %z to <8 x i8>*
1655  store <8 x i8> %1, <8 x i8>* %2, align 1
1656  ret i8* %z
1657}
1658
1659define i8* @strb16_m128(i8* %y, i8* %x) {
1660; CHECK-LABEL: strb16_m128:
1661; CHECK:       @ %bb.0: @ %entry
1662; CHECK-NEXT:    subs r0, #128
1663; CHECK-NEXT:    vldrb.u16 q0, [r1]
1664; CHECK-NEXT:    vstrb.16 q0, [r0]
1665; CHECK-NEXT:    bx lr
1666entry:
1667  %z = getelementptr inbounds i8, i8* %y, i32 -128
1668  %0 = bitcast i8* %x to <8 x i8>*
1669  %1 = load <8 x i8>, <8 x i8>* %0, align 1
1670  %2 = bitcast i8* %z to <8 x i8>*
1671  store <8 x i8> %1, <8 x i8>* %2, align 1
1672  ret i8* %z
1673}
1674
1675
1676define i8* @strb8_4(i8* %y, i8* %x) {
1677; CHECK-LABEL: strb8_4:
1678; CHECK:       @ %bb.0: @ %entry
1679; CHECK-NEXT:    vldrb.u8 q0, [r1]
1680; CHECK-NEXT:    vstrb.8 q0, [r0, #4]!
1681; CHECK-NEXT:    bx lr
1682entry:
1683  %z = getelementptr inbounds i8, i8* %y, i32 4
1684  %0 = bitcast i8* %x to <16 x i8>*
1685  %1 = load <16 x i8>, <16 x i8>* %0, align 1
1686  %2 = bitcast i8* %z to <16 x i8>*
1687  store <16 x i8> %1, <16 x i8>* %2, align 1
1688  ret i8* %z
1689}
1690
1691define i8* @strb8_3(i8* %y, i8* %x) {
1692; CHECK-LABEL: strb8_3:
1693; CHECK:       @ %bb.0: @ %entry
1694; CHECK-NEXT:    vldrb.u8 q0, [r1]
1695; CHECK-NEXT:    vstrb.8 q0, [r0, #3]!
1696; CHECK-NEXT:    bx lr
1697entry:
1698  %z = getelementptr inbounds i8, i8* %y, i32 3
1699  %0 = bitcast i8* %x to <16 x i8>*
1700  %1 = load <16 x i8>, <16 x i8>* %0, align 1
1701  %2 = bitcast i8* %z to <16 x i8>*
1702  store <16 x i8> %1, <16 x i8>* %2, align 1
1703  ret i8* %z
1704}
1705
1706define i8* @strb8_127(i8* %y, i8* %x) {
1707; CHECK-LABEL: strb8_127:
1708; CHECK:       @ %bb.0: @ %entry
1709; CHECK-NEXT:    vldrb.u8 q0, [r1]
1710; CHECK-NEXT:    vstrb.8 q0, [r0, #127]!
1711; CHECK-NEXT:    bx lr
1712entry:
1713  %z = getelementptr inbounds i8, i8* %y, i32 127
1714  %0 = bitcast i8* %x to <16 x i8>*
1715  %1 = load <16 x i8>, <16 x i8>* %0, align 1
1716  %2 = bitcast i8* %z to <16 x i8>*
1717  store <16 x i8> %1, <16 x i8>* %2, align 1
1718  ret i8* %z
1719}
1720
1721define i8* @strb8_128(i8* %y, i8* %x) {
1722; CHECK-LABEL: strb8_128:
1723; CHECK:       @ %bb.0: @ %entry
1724; CHECK-NEXT:    adds r0, #128
1725; CHECK-NEXT:    vldrb.u8 q0, [r1]
1726; CHECK-NEXT:    vstrb.8 q0, [r0]
1727; CHECK-NEXT:    bx lr
1728entry:
1729  %z = getelementptr inbounds i8, i8* %y, i32 128
1730  %0 = bitcast i8* %x to <16 x i8>*
1731  %1 = load <16 x i8>, <16 x i8>* %0, align 1
1732  %2 = bitcast i8* %z to <16 x i8>*
1733  store <16 x i8> %1, <16 x i8>* %2, align 1
1734  ret i8* %z
1735}
1736
1737define i8* @strb8_m127(i8* %y, i8* %x) {
1738; CHECK-LABEL: strb8_m127:
1739; CHECK:       @ %bb.0: @ %entry
1740; CHECK-NEXT:    vldrb.u8 q0, [r1]
1741; CHECK-NEXT:    vstrb.8 q0, [r0, #-127]!
1742; CHECK-NEXT:    bx lr
1743entry:
1744  %z = getelementptr inbounds i8, i8* %y, i32 -127
1745  %0 = bitcast i8* %x to <16 x i8>*
1746  %1 = load <16 x i8>, <16 x i8>* %0, align 1
1747  %2 = bitcast i8* %z to <16 x i8>*
1748  store <16 x i8> %1, <16 x i8>* %2, align 1
1749  ret i8* %z
1750}
1751
1752define i8* @strb8_m128(i8* %y, i8* %x) {
1753; CHECK-LABEL: strb8_m128:
1754; CHECK:       @ %bb.0: @ %entry
1755; CHECK-NEXT:    subs r0, #128
1756; CHECK-NEXT:    vldrb.u8 q0, [r1]
1757; CHECK-NEXT:    vstrb.8 q0, [r0]
1758; CHECK-NEXT:    bx lr
1759entry:
1760  %z = getelementptr inbounds i8, i8* %y, i32 -128
1761  %0 = bitcast i8* %x to <16 x i8>*
1762  %1 = load <16 x i8>, <16 x i8>* %0, align 1
1763  %2 = bitcast i8* %z to <16 x i8>*
1764  store <16 x i8> %1, <16 x i8>* %2, align 1
1765  ret i8* %z
1766}
1767
1768
1769define i8* @strf32_4(i8* %y, i8* %x) {
1770; CHECK-LE-LABEL: strf32_4:
1771; CHECK-LE:       @ %bb.0: @ %entry
1772; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
1773; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #4]!
1774; CHECK-LE-NEXT:    bx lr
1775;
1776; CHECK-BE-LABEL: strf32_4:
1777; CHECK-BE:       @ %bb.0: @ %entry
1778; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
1779; CHECK-BE-NEXT:    vstrw.32 q0, [r0, #4]!
1780; CHECK-BE-NEXT:    bx lr
1781entry:
1782  %z = getelementptr inbounds i8, i8* %y, i32 4
1783  %0 = bitcast i8* %x to <4 x float>*
1784  %1 = load <4 x float>, <4 x float>* %0, align 4
1785  %2 = bitcast i8* %z to <4 x float>*
1786  store <4 x float> %1, <4 x float>* %2, align 4
1787  ret i8* %z
1788}
1789
1790define i8* @strf16_4(i8* %y, i8* %x) {
1791; CHECK-LE-LABEL: strf16_4:
1792; CHECK-LE:       @ %bb.0: @ %entry
1793; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
1794; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #4]!
1795; CHECK-LE-NEXT:    bx lr
1796;
1797; CHECK-BE-LABEL: strf16_4:
1798; CHECK-BE:       @ %bb.0: @ %entry
1799; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
1800; CHECK-BE-NEXT:    vstrh.16 q0, [r0, #4]!
1801; CHECK-BE-NEXT:    bx lr
1802entry:
1803  %z = getelementptr inbounds i8, i8* %y, i32 4
1804  %0 = bitcast i8* %x to <8 x half>*
1805  %1 = load <8 x half>, <8 x half>* %0, align 2
1806  %2 = bitcast i8* %z to <8 x half>*
1807  store <8 x half> %1, <8 x half>* %2, align 2
1808  ret i8* %z
1809}
1810
1811define i8* @strwi32_align1(i8* %y, i8* %x) {
1812; CHECK-LE-LABEL: strwi32_align1:
1813; CHECK-LE:       @ %bb.0: @ %entry
1814; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
1815; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]!
1816; CHECK-LE-NEXT:    bx lr
1817;
1818; CHECK-BE-LABEL: strwi32_align1:
1819; CHECK-BE:       @ %bb.0: @ %entry
1820; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
1821; CHECK-BE-NEXT:    vrev32.8 q0, q0
1822; CHECK-BE-NEXT:    vstrb.8 q0, [r0, #3]
1823; CHECK-BE-NEXT:    adds r0, #3
1824; CHECK-BE-NEXT:    bx lr
1825entry:
1826  %z = getelementptr inbounds i8, i8* %y, i32 3
1827  %0 = bitcast i8* %x to <4 x i32>*
1828  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1829  %2 = bitcast i8* %z to <4 x i32>*
1830  store <4 x i32> %1, <4 x i32>* %2, align 1
1831  ret i8* %z
1832}
1833
1834define i8* @strhi16_align1(i8* %y, i8* %x) {
1835; CHECK-LE-LABEL: strhi16_align1:
1836; CHECK-LE:       @ %bb.0: @ %entry
1837; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
1838; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]!
1839; CHECK-LE-NEXT:    bx lr
1840;
1841; CHECK-BE-LABEL: strhi16_align1:
1842; CHECK-BE:       @ %bb.0: @ %entry
1843; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
1844; CHECK-BE-NEXT:    vrev16.8 q0, q0
1845; CHECK-BE-NEXT:    vstrb.8 q0, [r0, #3]
1846; CHECK-BE-NEXT:    adds r0, #3
1847; CHECK-BE-NEXT:    bx lr
1848entry:
1849  %z = getelementptr inbounds i8, i8* %y, i32 3
1850  %0 = bitcast i8* %x to <8 x i16>*
1851  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1852  %2 = bitcast i8* %z to <8 x i16>*
1853  store <8 x i16> %1, <8 x i16>* %2, align 1
1854  ret i8* %z
1855}
1856
1857define i8* @strhi32_align1(i8* %y, i8* %x) {
1858; CHECK-LABEL: strhi32_align1:
1859; CHECK:       @ %bb.0: @ %entry
1860; CHECK-NEXT:    .pad #8
1861; CHECK-NEXT:    sub sp, #8
1862; CHECK-NEXT:    vldrw.u32 q0, [r1]
1863; CHECK-NEXT:    mov r1, sp
1864; CHECK-NEXT:    vstrh.32 q0, [r1]
1865; CHECK-NEXT:    ldrd r1, r2, [sp]
1866; CHECK-NEXT:    str r1, [r0, #3]!
1867; CHECK-NEXT:    str r2, [r0, #4]
1868; CHECK-NEXT:    add sp, #8
1869; CHECK-NEXT:    bx lr
1870entry:
1871  %z = getelementptr inbounds i8, i8* %y, i32 3
1872  %0 = bitcast i8* %x to <4 x i32>*
1873  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1874  %2 = bitcast i8* %z to <4 x i16>*
1875  %3 = trunc <4 x i32> %1 to <4 x i16>
1876  store <4 x i16> %3, <4 x i16>* %2, align 1
1877  ret i8* %z
1878}
1879
1880define i8* @strf32_align1(i8* %y, i8* %x) {
1881; CHECK-LE-LABEL: strf32_align1:
1882; CHECK-LE:       @ %bb.0: @ %entry
1883; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
1884; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]!
1885; CHECK-LE-NEXT:    bx lr
1886;
1887; CHECK-BE-LABEL: strf32_align1:
1888; CHECK-BE:       @ %bb.0: @ %entry
1889; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
1890; CHECK-BE-NEXT:    vrev32.8 q0, q0
1891; CHECK-BE-NEXT:    vstrb.8 q0, [r0, #3]
1892; CHECK-BE-NEXT:    adds r0, #3
1893; CHECK-BE-NEXT:    bx lr
1894entry:
1895  %z = getelementptr inbounds i8, i8* %y, i32 3
1896  %0 = bitcast i8* %x to <4 x float>*
1897  %1 = load <4 x float>, <4 x float>* %0, align 4
1898  %2 = bitcast i8* %z to <4 x float>*
1899  store <4 x float> %1, <4 x float>* %2, align 1
1900  ret i8* %z
1901}
1902
1903define i8* @strf16_align1(i8* %y, i8* %x) {
1904; CHECK-LE-LABEL: strf16_align1:
1905; CHECK-LE:       @ %bb.0: @ %entry
1906; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
1907; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]!
1908; CHECK-LE-NEXT:    bx lr
1909;
1910; CHECK-BE-LABEL: strf16_align1:
1911; CHECK-BE:       @ %bb.0: @ %entry
1912; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
1913; CHECK-BE-NEXT:    vrev16.8 q0, q0
1914; CHECK-BE-NEXT:    vstrb.8 q0, [r0, #3]
1915; CHECK-BE-NEXT:    adds r0, #3
1916; CHECK-BE-NEXT:    bx lr
1917entry:
1918  %z = getelementptr inbounds i8, i8* %y, i32 3
1919  %0 = bitcast i8* %x to <8 x half>*
1920  %1 = load <8 x half>, <8 x half>* %0, align 2
1921  %2 = bitcast i8* %z to <8 x half>*
1922  store <8 x half> %1, <8 x half>* %2, align 1
1923  ret i8* %z
1924}
1925
1926define i8* @strf16_align8(i8* %y, i8* %x) {
1927; CHECK-LE-LABEL: strf16_align8:
1928; CHECK-LE:       @ %bb.0: @ %entry
1929; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
1930; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #16]!
1931; CHECK-LE-NEXT:    bx lr
1932;
1933; CHECK-BE-LABEL: strf16_align8:
1934; CHECK-BE:       @ %bb.0: @ %entry
1935; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
1936; CHECK-BE-NEXT:    vstrh.16 q0, [r0, #16]!
1937; CHECK-BE-NEXT:    bx lr
1938entry:
1939  %z = getelementptr inbounds i8, i8* %y, i32 16
1940  %0 = bitcast i8* %x to <8 x i16>*
1941  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1942  %2 = bitcast i8* %z to <8 x i16>*
1943  store <8 x i16> %1, <8 x i16>* %2, align 8
1944  ret i8* %z
1945}
1946