1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
3; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
4
5define i8* @ldrwu32_4(i8* %x, i8* %y) {
6; CHECK-LABEL: ldrwu32_4:
7; CHECK:       @ %bb.0: @ %entry
8; CHECK-NEXT:    vldrw.u32 q0, [r0, #4]
9; CHECK-NEXT:    vstrw.32 q0, [r1]
10; CHECK-NEXT:    bx lr
11entry:
12  %z = getelementptr inbounds i8, i8* %x, i32 4
13  %0 = bitcast i8* %z to <4 x i32>*
14  %1 = load <4 x i32>, <4 x i32>* %0, align 4
15  %2 = bitcast i8* %y to <4 x i32>*
16  store <4 x i32> %1, <4 x i32>* %2, align 4
17  ret i8* %x
18}
19
20define i8* @ldrwu32_3(i8* %x, i8* %y) {
21; CHECK-LABEL: ldrwu32_3:
22; CHECK:       @ %bb.0: @ %entry
23; CHECK-NEXT:    adds r2, r0, #3
24; CHECK-NEXT:    vldrw.u32 q0, [r2]
25; CHECK-NEXT:    vstrw.32 q0, [r1]
26; CHECK-NEXT:    bx lr
27entry:
28  %z = getelementptr inbounds i8, i8* %x, i32 3
29  %0 = bitcast i8* %z to <4 x i32>*
30  %1 = load <4 x i32>, <4 x i32>* %0, align 4
31  %2 = bitcast i8* %y to <4 x i32>*
32  store <4 x i32> %1, <4 x i32>* %2, align 4
33  ret i8* %x
34}
35
36define i8* @ldrwu32_m4(i8* %x, i8* %y) {
37; CHECK-LABEL: ldrwu32_m4:
38; CHECK:       @ %bb.0: @ %entry
39; CHECK-NEXT:    vldrw.u32 q0, [r0, #-4]
40; CHECK-NEXT:    vstrw.32 q0, [r1]
41; CHECK-NEXT:    bx lr
42entry:
43  %z = getelementptr inbounds i8, i8* %x, i32 -4
44  %0 = bitcast i8* %z to <4 x i32>*
45  %1 = load <4 x i32>, <4 x i32>* %0, align 4
46  %2 = bitcast i8* %y to <4 x i32>*
47  store <4 x i32> %1, <4 x i32>* %2, align 4
48  ret i8* %x
49}
50
51define i8* @ldrwu32_508(i8* %x, i8* %y) {
52; CHECK-LABEL: ldrwu32_508:
53; CHECK:       @ %bb.0: @ %entry
54; CHECK-NEXT:    vldrw.u32 q0, [r0, #508]
55; CHECK-NEXT:    vstrw.32 q0, [r1]
56; CHECK-NEXT:    bx lr
57entry:
58  %z = getelementptr inbounds i8, i8* %x, i32 508
59  %0 = bitcast i8* %z to <4 x i32>*
60  %1 = load <4 x i32>, <4 x i32>* %0, align 4
61  %2 = bitcast i8* %y to <4 x i32>*
62  store <4 x i32> %1, <4 x i32>* %2, align 4
63  ret i8* %x
64}
65
66define i8* @ldrwu32_512(i8* %x, i8* %y) {
67; CHECK-LABEL: ldrwu32_512:
68; CHECK:       @ %bb.0: @ %entry
69; CHECK-NEXT:    add.w r2, r0, #512
70; CHECK-NEXT:    vldrw.u32 q0, [r2]
71; CHECK-NEXT:    vstrw.32 q0, [r1]
72; CHECK-NEXT:    bx lr
73entry:
74  %z = getelementptr inbounds i8, i8* %x, i32 512
75  %0 = bitcast i8* %z to <4 x i32>*
76  %1 = load <4 x i32>, <4 x i32>* %0, align 4
77  %2 = bitcast i8* %y to <4 x i32>*
78  store <4 x i32> %1, <4 x i32>* %2, align 4
79  ret i8* %x
80}
81
82define i8* @ldrwu32_m508(i8* %x, i8* %y) {
83; CHECK-LABEL: ldrwu32_m508:
84; CHECK:       @ %bb.0: @ %entry
85; CHECK-NEXT:    vldrw.u32 q0, [r0, #-508]
86; CHECK-NEXT:    vstrw.32 q0, [r1]
87; CHECK-NEXT:    bx lr
88entry:
89  %z = getelementptr inbounds i8, i8* %x, i32 -508
90  %0 = bitcast i8* %z to <4 x i32>*
91  %1 = load <4 x i32>, <4 x i32>* %0, align 4
92  %2 = bitcast i8* %y to <4 x i32>*
93  store <4 x i32> %1, <4 x i32>* %2, align 4
94  ret i8* %x
95}
96
97define i8* @ldrwu32_m512(i8* %x, i8* %y) {
98; CHECK-LABEL: ldrwu32_m512:
99; CHECK:       @ %bb.0: @ %entry
100; CHECK-NEXT:    sub.w r2, r0, #512
101; CHECK-NEXT:    vldrw.u32 q0, [r2]
102; CHECK-NEXT:    vstrw.32 q0, [r1]
103; CHECK-NEXT:    bx lr
104entry:
105  %z = getelementptr inbounds i8, i8* %x, i32 -512
106  %0 = bitcast i8* %z to <4 x i32>*
107  %1 = load <4 x i32>, <4 x i32>* %0, align 4
108  %2 = bitcast i8* %y to <4 x i32>*
109  store <4 x i32> %1, <4 x i32>* %2, align 4
110  ret i8* %x
111}
112
113
114define i8* @ldrhu32_4(i8* %x, i8* %y) {
115; CHECK-LABEL: ldrhu32_4:
116; CHECK:       @ %bb.0: @ %entry
117; CHECK-NEXT:    vldrh.u32 q0, [r0, #4]
118; CHECK-NEXT:    vstrw.32 q0, [r1]
119; CHECK-NEXT:    bx lr
120entry:
121  %z = getelementptr inbounds i8, i8* %x, i32 4
122  %0 = bitcast i8* %z to <4 x i16>*
123  %1 = load <4 x i16>, <4 x i16>* %0, align 2
124  %2 = zext <4 x i16> %1 to <4 x i32>
125  %3 = bitcast i8* %y to <4 x i32>*
126  store <4 x i32> %2, <4 x i32>* %3, align 4
127  ret i8* %x
128}
129
130define i8* @ldrhu32_3(i8* %x, i8* %y) {
131; CHECK-LABEL: ldrhu32_3:
132; CHECK:       @ %bb.0: @ %entry
133; CHECK-NEXT:    adds r2, r0, #3
134; CHECK-NEXT:    vldrh.u32 q0, [r2]
135; CHECK-NEXT:    vstrw.32 q0, [r1]
136; CHECK-NEXT:    bx lr
137entry:
138  %z = getelementptr inbounds i8, i8* %x, i32 3
139  %0 = bitcast i8* %z to <4 x i16>*
140  %1 = load <4 x i16>, <4 x i16>* %0, align 2
141  %2 = zext <4 x i16> %1 to <4 x i32>
142  %3 = bitcast i8* %y to <4 x i32>*
143  store <4 x i32> %2, <4 x i32>* %3, align 4
144  ret i8* %x
145}
146
147define i8* @ldrhu32_2(i8* %x, i8* %y) {
148; CHECK-LABEL: ldrhu32_2:
149; CHECK:       @ %bb.0: @ %entry
150; CHECK-NEXT:    vldrh.u32 q0, [r0, #2]
151; CHECK-NEXT:    vstrw.32 q0, [r1]
152; CHECK-NEXT:    bx lr
153entry:
154  %z = getelementptr inbounds i8, i8* %x, i32 2
155  %0 = bitcast i8* %z to <4 x i16>*
156  %1 = load <4 x i16>, <4 x i16>* %0, align 2
157  %2 = zext <4 x i16> %1 to <4 x i32>
158  %3 = bitcast i8* %y to <4 x i32>*
159  store <4 x i32> %2, <4 x i32>* %3, align 4
160  ret i8* %x
161}
162
163define i8* @ldrhu32_254(i8* %x, i8* %y) {
164; CHECK-LABEL: ldrhu32_254:
165; CHECK:       @ %bb.0: @ %entry
166; CHECK-NEXT:    vldrh.u32 q0, [r0, #254]
167; CHECK-NEXT:    vstrw.32 q0, [r1]
168; CHECK-NEXT:    bx lr
169entry:
170  %z = getelementptr inbounds i8, i8* %x, i32 254
171  %0 = bitcast i8* %z to <4 x i16>*
172  %1 = load <4 x i16>, <4 x i16>* %0, align 2
173  %2 = zext <4 x i16> %1 to <4 x i32>
174  %3 = bitcast i8* %y to <4 x i32>*
175  store <4 x i32> %2, <4 x i32>* %3, align 4
176  ret i8* %x
177}
178
179define i8* @ldrhu32_256(i8* %x, i8* %y) {
180; CHECK-LABEL: ldrhu32_256:
181; CHECK:       @ %bb.0: @ %entry
182; CHECK-NEXT:    add.w r2, r0, #256
183; CHECK-NEXT:    vldrh.u32 q0, [r2]
184; CHECK-NEXT:    vstrw.32 q0, [r1]
185; CHECK-NEXT:    bx lr
186entry:
187  %z = getelementptr inbounds i8, i8* %x, i32 256
188  %0 = bitcast i8* %z to <4 x i16>*
189  %1 = load <4 x i16>, <4 x i16>* %0, align 2
190  %2 = zext <4 x i16> %1 to <4 x i32>
191  %3 = bitcast i8* %y to <4 x i32>*
192  store <4 x i32> %2, <4 x i32>* %3, align 4
193  ret i8* %x
194}
195
196define i8* @ldrhu32_m254(i8* %x, i8* %y) {
197; CHECK-LABEL: ldrhu32_m254:
198; CHECK:       @ %bb.0: @ %entry
199; CHECK-NEXT:    vldrh.u32 q0, [r0, #-254]
200; CHECK-NEXT:    vstrw.32 q0, [r1]
201; CHECK-NEXT:    bx lr
202entry:
203  %z = getelementptr inbounds i8, i8* %x, i32 -254
204  %0 = bitcast i8* %z to <4 x i16>*
205  %1 = load <4 x i16>, <4 x i16>* %0, align 2
206  %2 = zext <4 x i16> %1 to <4 x i32>
207  %3 = bitcast i8* %y to <4 x i32>*
208  store <4 x i32> %2, <4 x i32>* %3, align 4
209  ret i8* %x
210}
211
212define i8* @ldrhu32_m256(i8* %x, i8* %y) {
213; CHECK-LABEL: ldrhu32_m256:
214; CHECK:       @ %bb.0: @ %entry
215; CHECK-NEXT:    sub.w r2, r0, #256
216; CHECK-NEXT:    vldrh.u32 q0, [r2]
217; CHECK-NEXT:    vstrw.32 q0, [r1]
218; CHECK-NEXT:    bx lr
219entry:
220  %z = getelementptr inbounds i8, i8* %x, i32 -256
221  %0 = bitcast i8* %z to <4 x i16>*
222  %1 = load <4 x i16>, <4 x i16>* %0, align 2
223  %2 = zext <4 x i16> %1 to <4 x i32>
224  %3 = bitcast i8* %y to <4 x i32>*
225  store <4 x i32> %2, <4 x i32>* %3, align 4
226  ret i8* %x
227}
228
229
230define i8* @ldrhs32_4(i8* %x, i8* %y) {
231; CHECK-LABEL: ldrhs32_4:
232; CHECK:       @ %bb.0: @ %entry
233; CHECK-NEXT:    vldrh.s32 q0, [r0, #4]
234; CHECK-NEXT:    vstrw.32 q0, [r1]
235; CHECK-NEXT:    bx lr
236entry:
237  %z = getelementptr inbounds i8, i8* %x, i32 4
238  %0 = bitcast i8* %z to <4 x i16>*
239  %1 = load <4 x i16>, <4 x i16>* %0, align 2
240  %2 = sext <4 x i16> %1 to <4 x i32>
241  %3 = bitcast i8* %y to <4 x i32>*
242  store <4 x i32> %2, <4 x i32>* %3, align 4
243  ret i8* %x
244}
245
246define i8* @ldrhs32_3(i8* %x, i8* %y) {
247; CHECK-LABEL: ldrhs32_3:
248; CHECK:       @ %bb.0: @ %entry
249; CHECK-NEXT:    adds r2, r0, #3
250; CHECK-NEXT:    vldrh.s32 q0, [r2]
251; CHECK-NEXT:    vstrw.32 q0, [r1]
252; CHECK-NEXT:    bx lr
253entry:
254  %z = getelementptr inbounds i8, i8* %x, i32 3
255  %0 = bitcast i8* %z to <4 x i16>*
256  %1 = load <4 x i16>, <4 x i16>* %0, align 2
257  %2 = sext <4 x i16> %1 to <4 x i32>
258  %3 = bitcast i8* %y to <4 x i32>*
259  store <4 x i32> %2, <4 x i32>* %3, align 4
260  ret i8* %x
261}
262
263define i8* @ldrhs32_2(i8* %x, i8* %y) {
264; CHECK-LABEL: ldrhs32_2:
265; CHECK:       @ %bb.0: @ %entry
266; CHECK-NEXT:    vldrh.s32 q0, [r0, #2]
267; CHECK-NEXT:    vstrw.32 q0, [r1]
268; CHECK-NEXT:    bx lr
269entry:
270  %z = getelementptr inbounds i8, i8* %x, i32 2
271  %0 = bitcast i8* %z to <4 x i16>*
272  %1 = load <4 x i16>, <4 x i16>* %0, align 2
273  %2 = sext <4 x i16> %1 to <4 x i32>
274  %3 = bitcast i8* %y to <4 x i32>*
275  store <4 x i32> %2, <4 x i32>* %3, align 4
276  ret i8* %x
277}
278
279define i8* @ldrhs32_254(i8* %x, i8* %y) {
280; CHECK-LABEL: ldrhs32_254:
281; CHECK:       @ %bb.0: @ %entry
282; CHECK-NEXT:    vldrh.s32 q0, [r0, #254]
283; CHECK-NEXT:    vstrw.32 q0, [r1]
284; CHECK-NEXT:    bx lr
285entry:
286  %z = getelementptr inbounds i8, i8* %x, i32 254
287  %0 = bitcast i8* %z to <4 x i16>*
288  %1 = load <4 x i16>, <4 x i16>* %0, align 2
289  %2 = sext <4 x i16> %1 to <4 x i32>
290  %3 = bitcast i8* %y to <4 x i32>*
291  store <4 x i32> %2, <4 x i32>* %3, align 4
292  ret i8* %x
293}
294
295define i8* @ldrhs32_256(i8* %x, i8* %y) {
296; CHECK-LABEL: ldrhs32_256:
297; CHECK:       @ %bb.0: @ %entry
298; CHECK-NEXT:    add.w r2, r0, #256
299; CHECK-NEXT:    vldrh.s32 q0, [r2]
300; CHECK-NEXT:    vstrw.32 q0, [r1]
301; CHECK-NEXT:    bx lr
302entry:
303  %z = getelementptr inbounds i8, i8* %x, i32 256
304  %0 = bitcast i8* %z to <4 x i16>*
305  %1 = load <4 x i16>, <4 x i16>* %0, align 2
306  %2 = sext <4 x i16> %1 to <4 x i32>
307  %3 = bitcast i8* %y to <4 x i32>*
308  store <4 x i32> %2, <4 x i32>* %3, align 4
309  ret i8* %x
310}
311
312define i8* @ldrhs32_m254(i8* %x, i8* %y) {
313; CHECK-LABEL: ldrhs32_m254:
314; CHECK:       @ %bb.0: @ %entry
315; CHECK-NEXT:    vldrh.s32 q0, [r0, #-254]
316; CHECK-NEXT:    vstrw.32 q0, [r1]
317; CHECK-NEXT:    bx lr
318entry:
319  %z = getelementptr inbounds i8, i8* %x, i32 -254
320  %0 = bitcast i8* %z to <4 x i16>*
321  %1 = load <4 x i16>, <4 x i16>* %0, align 2
322  %2 = sext <4 x i16> %1 to <4 x i32>
323  %3 = bitcast i8* %y to <4 x i32>*
324  store <4 x i32> %2, <4 x i32>* %3, align 4
325  ret i8* %x
326}
327
328define i8* @ldrhs32_m256(i8* %x, i8* %y) {
329; CHECK-LABEL: ldrhs32_m256:
330; CHECK:       @ %bb.0: @ %entry
331; CHECK-NEXT:    sub.w r2, r0, #256
332; CHECK-NEXT:    vldrh.s32 q0, [r2]
333; CHECK-NEXT:    vstrw.32 q0, [r1]
334; CHECK-NEXT:    bx lr
335entry:
336  %z = getelementptr inbounds i8, i8* %x, i32 -256
337  %0 = bitcast i8* %z to <4 x i16>*
338  %1 = load <4 x i16>, <4 x i16>* %0, align 2
339  %2 = sext <4 x i16> %1 to <4 x i32>
340  %3 = bitcast i8* %y to <4 x i32>*
341  store <4 x i32> %2, <4 x i32>* %3, align 4
342  ret i8* %x
343}
344
345
346define i8* @ldrhu16_4(i8* %x, i8* %y) {
347; CHECK-LABEL: ldrhu16_4:
348; CHECK:       @ %bb.0: @ %entry
349; CHECK-NEXT:    vldrh.u16 q0, [r0, #4]
350; CHECK-NEXT:    vstrh.16 q0, [r1]
351; CHECK-NEXT:    bx lr
352entry:
353  %z = getelementptr inbounds i8, i8* %x, i32 4
354  %0 = bitcast i8* %z to <8 x i16>*
355  %1 = load <8 x i16>, <8 x i16>* %0, align 2
356  %2 = bitcast i8* %y to <8 x i16>*
357  store <8 x i16> %1, <8 x i16>* %2, align 2
358  ret i8* %x
359}
360
361define i8* @ldrhu16_3(i8* %x, i8* %y) {
362; CHECK-LABEL: ldrhu16_3:
363; CHECK:       @ %bb.0: @ %entry
364; CHECK-NEXT:    adds r2, r0, #3
365; CHECK-NEXT:    vldrh.u16 q0, [r2]
366; CHECK-NEXT:    vstrh.16 q0, [r1]
367; CHECK-NEXT:    bx lr
368entry:
369  %z = getelementptr inbounds i8, i8* %x, i32 3
370  %0 = bitcast i8* %z to <8 x i16>*
371  %1 = load <8 x i16>, <8 x i16>* %0, align 2
372  %2 = bitcast i8* %y to <8 x i16>*
373  store <8 x i16> %1, <8 x i16>* %2, align 2
374  ret i8* %x
375}
376
377define i8* @ldrhu16_2(i8* %x, i8* %y) {
378; CHECK-LABEL: ldrhu16_2:
379; CHECK:       @ %bb.0: @ %entry
380; CHECK-NEXT:    vldrh.u16 q0, [r0, #2]
381; CHECK-NEXT:    vstrh.16 q0, [r1]
382; CHECK-NEXT:    bx lr
383entry:
384  %z = getelementptr inbounds i8, i8* %x, i32 2
385  %0 = bitcast i8* %z to <8 x i16>*
386  %1 = load <8 x i16>, <8 x i16>* %0, align 2
387  %2 = bitcast i8* %y to <8 x i16>*
388  store <8 x i16> %1, <8 x i16>* %2, align 2
389  ret i8* %x
390}
391
392define i8* @ldrhu16_254(i8* %x, i8* %y) {
393; CHECK-LABEL: ldrhu16_254:
394; CHECK:       @ %bb.0: @ %entry
395; CHECK-NEXT:    vldrh.u16 q0, [r0, #254]
396; CHECK-NEXT:    vstrh.16 q0, [r1]
397; CHECK-NEXT:    bx lr
398entry:
399  %z = getelementptr inbounds i8, i8* %x, i32 254
400  %0 = bitcast i8* %z to <8 x i16>*
401  %1 = load <8 x i16>, <8 x i16>* %0, align 2
402  %2 = bitcast i8* %y to <8 x i16>*
403  store <8 x i16> %1, <8 x i16>* %2, align 2
404  ret i8* %x
405}
406
407define i8* @ldrhu16_256(i8* %x, i8* %y) {
408; CHECK-LABEL: ldrhu16_256:
409; CHECK:       @ %bb.0: @ %entry
410; CHECK-NEXT:    add.w r2, r0, #256
411; CHECK-NEXT:    vldrh.u16 q0, [r2]
412; CHECK-NEXT:    vstrh.16 q0, [r1]
413; CHECK-NEXT:    bx lr
414entry:
415  %z = getelementptr inbounds i8, i8* %x, i32 256
416  %0 = bitcast i8* %z to <8 x i16>*
417  %1 = load <8 x i16>, <8 x i16>* %0, align 2
418  %2 = bitcast i8* %y to <8 x i16>*
419  store <8 x i16> %1, <8 x i16>* %2, align 2
420  ret i8* %x
421}
422
423define i8* @ldrhu16_m254(i8* %x, i8* %y) {
424; CHECK-LABEL: ldrhu16_m254:
425; CHECK:       @ %bb.0: @ %entry
426; CHECK-NEXT:    vldrh.u16 q0, [r0, #-254]
427; CHECK-NEXT:    vstrh.16 q0, [r1]
428; CHECK-NEXT:    bx lr
429entry:
430  %z = getelementptr inbounds i8, i8* %x, i32 -254
431  %0 = bitcast i8* %z to <8 x i16>*
432  %1 = load <8 x i16>, <8 x i16>* %0, align 2
433  %2 = bitcast i8* %y to <8 x i16>*
434  store <8 x i16> %1, <8 x i16>* %2, align 2
435  ret i8* %x
436}
437
438define i8* @ldrhu16_m256(i8* %x, i8* %y) {
439; CHECK-LABEL: ldrhu16_m256:
440; CHECK:       @ %bb.0: @ %entry
441; CHECK-NEXT:    sub.w r2, r0, #256
442; CHECK-NEXT:    vldrh.u16 q0, [r2]
443; CHECK-NEXT:    vstrh.16 q0, [r1]
444; CHECK-NEXT:    bx lr
445entry:
446  %z = getelementptr inbounds i8, i8* %x, i32 -256
447  %0 = bitcast i8* %z to <8 x i16>*
448  %1 = load <8 x i16>, <8 x i16>* %0, align 2
449  %2 = bitcast i8* %y to <8 x i16>*
450  store <8 x i16> %1, <8 x i16>* %2, align 2
451  ret i8* %x
452}
453
454
455define i8* @ldrbu32_4(i8* %x, i8* %y) {
456; CHECK-LABEL: ldrbu32_4:
457; CHECK:       @ %bb.0: @ %entry
458; CHECK-NEXT:    vldrb.u32 q0, [r0, #4]
459; CHECK-NEXT:    vstrw.32 q0, [r1]
460; CHECK-NEXT:    bx lr
461entry:
462  %z = getelementptr inbounds i8, i8* %x, i32 4
463  %0 = bitcast i8* %z to <4 x i8>*
464  %1 = load <4 x i8>, <4 x i8>* %0, align 1
465  %2 = zext <4 x i8> %1 to <4 x i32>
466  %3 = bitcast i8* %y to <4 x i32>*
467  store <4 x i32> %2, <4 x i32>* %3, align 4
468  ret i8* %x
469}
470
471define i8* @ldrbu32_3(i8* %x, i8* %y) {
472; CHECK-LABEL: ldrbu32_3:
473; CHECK:       @ %bb.0: @ %entry
474; CHECK-NEXT:    vldrb.u32 q0, [r0, #3]
475; CHECK-NEXT:    vstrw.32 q0, [r1]
476; CHECK-NEXT:    bx lr
477entry:
478  %z = getelementptr inbounds i8, i8* %x, i32 3
479  %0 = bitcast i8* %z to <4 x i8>*
480  %1 = load <4 x i8>, <4 x i8>* %0, align 1
481  %2 = zext <4 x i8> %1 to <4 x i32>
482  %3 = bitcast i8* %y to <4 x i32>*
483  store <4 x i32> %2, <4 x i32>* %3, align 4
484  ret i8* %x
485}
486
487define i8* @ldrbu32_127(i8* %x, i8* %y) {
488; CHECK-LABEL: ldrbu32_127:
489; CHECK:       @ %bb.0: @ %entry
490; CHECK-NEXT:    vldrb.u32 q0, [r0, #127]
491; CHECK-NEXT:    vstrw.32 q0, [r1]
492; CHECK-NEXT:    bx lr
493entry:
494  %z = getelementptr inbounds i8, i8* %x, i32 127
495  %0 = bitcast i8* %z to <4 x i8>*
496  %1 = load <4 x i8>, <4 x i8>* %0, align 1
497  %2 = zext <4 x i8> %1 to <4 x i32>
498  %3 = bitcast i8* %y to <4 x i32>*
499  store <4 x i32> %2, <4 x i32>* %3, align 4
500  ret i8* %x
501}
502
503define i8* @ldrbu32_128(i8* %x, i8* %y) {
504; CHECK-LABEL: ldrbu32_128:
505; CHECK:       @ %bb.0: @ %entry
506; CHECK-NEXT:    add.w r2, r0, #128
507; CHECK-NEXT:    vldrb.u32 q0, [r2]
508; CHECK-NEXT:    vstrw.32 q0, [r1]
509; CHECK-NEXT:    bx lr
510entry:
511  %z = getelementptr inbounds i8, i8* %x, i32 128
512  %0 = bitcast i8* %z to <4 x i8>*
513  %1 = load <4 x i8>, <4 x i8>* %0, align 1
514  %2 = zext <4 x i8> %1 to <4 x i32>
515  %3 = bitcast i8* %y to <4 x i32>*
516  store <4 x i32> %2, <4 x i32>* %3, align 4
517  ret i8* %x
518}
519
520define i8* @ldrbu32_m127(i8* %x, i8* %y) {
521; CHECK-LABEL: ldrbu32_m127:
522; CHECK:       @ %bb.0: @ %entry
523; CHECK-NEXT:    vldrb.u32 q0, [r0, #-127]
524; CHECK-NEXT:    vstrw.32 q0, [r1]
525; CHECK-NEXT:    bx lr
526entry:
527  %z = getelementptr inbounds i8, i8* %x, i32 -127
528  %0 = bitcast i8* %z to <4 x i8>*
529  %1 = load <4 x i8>, <4 x i8>* %0, align 1
530  %2 = zext <4 x i8> %1 to <4 x i32>
531  %3 = bitcast i8* %y to <4 x i32>*
532  store <4 x i32> %2, <4 x i32>* %3, align 4
533  ret i8* %x
534}
535
536define i8* @ldrbu32_m128(i8* %x, i8* %y) {
537; CHECK-LABEL: ldrbu32_m128:
538; CHECK:       @ %bb.0: @ %entry
539; CHECK-NEXT:    sub.w r2, r0, #128
540; CHECK-NEXT:    vldrb.u32 q0, [r2]
541; CHECK-NEXT:    vstrw.32 q0, [r1]
542; CHECK-NEXT:    bx lr
543entry:
544  %z = getelementptr inbounds i8, i8* %x, i32 -128
545  %0 = bitcast i8* %z to <4 x i8>*
546  %1 = load <4 x i8>, <4 x i8>* %0, align 1
547  %2 = zext <4 x i8> %1 to <4 x i32>
548  %3 = bitcast i8* %y to <4 x i32>*
549  store <4 x i32> %2, <4 x i32>* %3, align 4
550  ret i8* %x
551}
552
553
554define i8* @ldrbs32_4(i8* %x, i8* %y) {
555; CHECK-LABEL: ldrbs32_4:
556; CHECK:       @ %bb.0: @ %entry
557; CHECK-NEXT:    vldrb.s32 q0, [r0, #4]
558; CHECK-NEXT:    vstrw.32 q0, [r1]
559; CHECK-NEXT:    bx lr
560entry:
561  %z = getelementptr inbounds i8, i8* %x, i32 4
562  %0 = bitcast i8* %z to <4 x i8>*
563  %1 = load <4 x i8>, <4 x i8>* %0, align 1
564  %2 = sext <4 x i8> %1 to <4 x i32>
565  %3 = bitcast i8* %y to <4 x i32>*
566  store <4 x i32> %2, <4 x i32>* %3, align 4
567  ret i8* %x
568}
569
570define i8* @ldrbs32_3(i8* %x, i8* %y) {
571; CHECK-LABEL: ldrbs32_3:
572; CHECK:       @ %bb.0: @ %entry
573; CHECK-NEXT:    vldrb.s32 q0, [r0, #3]
574; CHECK-NEXT:    vstrw.32 q0, [r1]
575; CHECK-NEXT:    bx lr
576entry:
577  %z = getelementptr inbounds i8, i8* %x, i32 3
578  %0 = bitcast i8* %z to <4 x i8>*
579  %1 = load <4 x i8>, <4 x i8>* %0, align 1
580  %2 = sext <4 x i8> %1 to <4 x i32>
581  %3 = bitcast i8* %y to <4 x i32>*
582  store <4 x i32> %2, <4 x i32>* %3, align 4
583  ret i8* %x
584}
585
586define i8* @ldrbs32_127(i8* %x, i8* %y) {
587; CHECK-LABEL: ldrbs32_127:
588; CHECK:       @ %bb.0: @ %entry
589; CHECK-NEXT:    vldrb.s32 q0, [r0, #127]
590; CHECK-NEXT:    vstrw.32 q0, [r1]
591; CHECK-NEXT:    bx lr
592entry:
593  %z = getelementptr inbounds i8, i8* %x, i32 127
594  %0 = bitcast i8* %z to <4 x i8>*
595  %1 = load <4 x i8>, <4 x i8>* %0, align 1
596  %2 = sext <4 x i8> %1 to <4 x i32>
597  %3 = bitcast i8* %y to <4 x i32>*
598  store <4 x i32> %2, <4 x i32>* %3, align 4
599  ret i8* %x
600}
601
602define i8* @ldrbs32_128(i8* %x, i8* %y) {
603; CHECK-LABEL: ldrbs32_128:
604; CHECK:       @ %bb.0: @ %entry
605; CHECK-NEXT:    add.w r2, r0, #128
606; CHECK-NEXT:    vldrb.s32 q0, [r2]
607; CHECK-NEXT:    vstrw.32 q0, [r1]
608; CHECK-NEXT:    bx lr
609entry:
610  %z = getelementptr inbounds i8, i8* %x, i32 128
611  %0 = bitcast i8* %z to <4 x i8>*
612  %1 = load <4 x i8>, <4 x i8>* %0, align 1
613  %2 = sext <4 x i8> %1 to <4 x i32>
614  %3 = bitcast i8* %y to <4 x i32>*
615  store <4 x i32> %2, <4 x i32>* %3, align 4
616  ret i8* %x
617}
618
619define i8* @ldrbs32_m127(i8* %x, i8* %y) {
620; CHECK-LABEL: ldrbs32_m127:
621; CHECK:       @ %bb.0: @ %entry
622; CHECK-NEXT:    vldrb.s32 q0, [r0, #-127]
623; CHECK-NEXT:    vstrw.32 q0, [r1]
624; CHECK-NEXT:    bx lr
625entry:
626  %z = getelementptr inbounds i8, i8* %x, i32 -127
627  %0 = bitcast i8* %z to <4 x i8>*
628  %1 = load <4 x i8>, <4 x i8>* %0, align 1
629  %2 = sext <4 x i8> %1 to <4 x i32>
630  %3 = bitcast i8* %y to <4 x i32>*
631  store <4 x i32> %2, <4 x i32>* %3, align 4
632  ret i8* %x
633}
634
635define i8* @ldrbs32_m128(i8* %x, i8* %y) {
636; CHECK-LABEL: ldrbs32_m128:
637; CHECK:       @ %bb.0: @ %entry
638; CHECK-NEXT:    sub.w r2, r0, #128
639; CHECK-NEXT:    vldrb.s32 q0, [r2]
640; CHECK-NEXT:    vstrw.32 q0, [r1]
641; CHECK-NEXT:    bx lr
642entry:
643  %z = getelementptr inbounds i8, i8* %x, i32 -128
644  %0 = bitcast i8* %z to <4 x i8>*
645  %1 = load <4 x i8>, <4 x i8>* %0, align 1
646  %2 = sext <4 x i8> %1 to <4 x i32>
647  %3 = bitcast i8* %y to <4 x i32>*
648  store <4 x i32> %2, <4 x i32>* %3, align 4
649  ret i8* %x
650}
651
652
653define i8* @ldrbu16_4(i8* %x, i8* %y) {
654; CHECK-LABEL: ldrbu16_4:
655; CHECK:       @ %bb.0: @ %entry
656; CHECK-NEXT:    vldrb.u16 q0, [r0, #4]
657; CHECK-NEXT:    vstrh.16 q0, [r1]
658; CHECK-NEXT:    bx lr
659entry:
660  %z = getelementptr inbounds i8, i8* %x, i32 4
661  %0 = bitcast i8* %z to <8 x i8>*
662  %1 = load <8 x i8>, <8 x i8>* %0, align 1
663  %2 = zext <8 x i8> %1 to <8 x i16>
664  %3 = bitcast i8* %y to <8 x i16>*
665  store <8 x i16> %2, <8 x i16>* %3, align 2
666  ret i8* %x
667}
668
669define i8* @ldrbu16_3(i8* %x, i8* %y) {
670; CHECK-LABEL: ldrbu16_3:
671; CHECK:       @ %bb.0: @ %entry
672; CHECK-NEXT:    vldrb.u16 q0, [r0, #3]
673; CHECK-NEXT:    vstrh.16 q0, [r1]
674; CHECK-NEXT:    bx lr
675entry:
676  %z = getelementptr inbounds i8, i8* %x, i32 3
677  %0 = bitcast i8* %z to <8 x i8>*
678  %1 = load <8 x i8>, <8 x i8>* %0, align 1
679  %2 = zext <8 x i8> %1 to <8 x i16>
680  %3 = bitcast i8* %y to <8 x i16>*
681  store <8 x i16> %2, <8 x i16>* %3, align 2
682  ret i8* %x
683}
684
685define i8* @ldrbu16_127(i8* %x, i8* %y) {
686; CHECK-LABEL: ldrbu16_127:
687; CHECK:       @ %bb.0: @ %entry
688; CHECK-NEXT:    vldrb.u16 q0, [r0, #127]
689; CHECK-NEXT:    vstrh.16 q0, [r1]
690; CHECK-NEXT:    bx lr
691entry:
692  %z = getelementptr inbounds i8, i8* %x, i32 127
693  %0 = bitcast i8* %z to <8 x i8>*
694  %1 = load <8 x i8>, <8 x i8>* %0, align 1
695  %2 = zext <8 x i8> %1 to <8 x i16>
696  %3 = bitcast i8* %y to <8 x i16>*
697  store <8 x i16> %2, <8 x i16>* %3, align 2
698  ret i8* %x
699}
700
701define i8* @ldrbu16_128(i8* %x, i8* %y) {
702; CHECK-LABEL: ldrbu16_128:
703; CHECK:       @ %bb.0: @ %entry
704; CHECK-NEXT:    add.w r2, r0, #128
705; CHECK-NEXT:    vldrb.u16 q0, [r2]
706; CHECK-NEXT:    vstrh.16 q0, [r1]
707; CHECK-NEXT:    bx lr
708entry:
709  %z = getelementptr inbounds i8, i8* %x, i32 128
710  %0 = bitcast i8* %z to <8 x i8>*
711  %1 = load <8 x i8>, <8 x i8>* %0, align 1
712  %2 = zext <8 x i8> %1 to <8 x i16>
713  %3 = bitcast i8* %y to <8 x i16>*
714  store <8 x i16> %2, <8 x i16>* %3, align 2
715  ret i8* %x
716}
717
718define i8* @ldrbu16_m127(i8* %x, i8* %y) {
719; CHECK-LABEL: ldrbu16_m127:
720; CHECK:       @ %bb.0: @ %entry
721; CHECK-NEXT:    vldrb.u16 q0, [r0, #-127]
722; CHECK-NEXT:    vstrh.16 q0, [r1]
723; CHECK-NEXT:    bx lr
724entry:
725  %z = getelementptr inbounds i8, i8* %x, i32 -127
726  %0 = bitcast i8* %z to <8 x i8>*
727  %1 = load <8 x i8>, <8 x i8>* %0, align 1
728  %2 = zext <8 x i8> %1 to <8 x i16>
729  %3 = bitcast i8* %y to <8 x i16>*
730  store <8 x i16> %2, <8 x i16>* %3, align 2
731  ret i8* %x
732}
733
734define i8* @ldrbu16_m128(i8* %x, i8* %y) {
735; CHECK-LABEL: ldrbu16_m128:
736; CHECK:       @ %bb.0: @ %entry
737; CHECK-NEXT:    sub.w r2, r0, #128
738; CHECK-NEXT:    vldrb.u16 q0, [r2]
739; CHECK-NEXT:    vstrh.16 q0, [r1]
740; CHECK-NEXT:    bx lr
741entry:
742  %z = getelementptr inbounds i8, i8* %x, i32 -128
743  %0 = bitcast i8* %z to <8 x i8>*
744  %1 = load <8 x i8>, <8 x i8>* %0, align 1
745  %2 = zext <8 x i8> %1 to <8 x i16>
746  %3 = bitcast i8* %y to <8 x i16>*
747  store <8 x i16> %2, <8 x i16>* %3, align 2
748  ret i8* %x
749}
750
751
752define i8* @ldrbs16_4(i8* %x, i8* %y) {
753; CHECK-LABEL: ldrbs16_4:
754; CHECK:       @ %bb.0: @ %entry
755; CHECK-NEXT:    vldrb.s16 q0, [r0, #4]
756; CHECK-NEXT:    vstrh.16 q0, [r1]
757; CHECK-NEXT:    bx lr
758entry:
759  %z = getelementptr inbounds i8, i8* %x, i32 4
760  %0 = bitcast i8* %z to <8 x i8>*
761  %1 = load <8 x i8>, <8 x i8>* %0, align 1
762  %2 = sext <8 x i8> %1 to <8 x i16>
763  %3 = bitcast i8* %y to <8 x i16>*
764  store <8 x i16> %2, <8 x i16>* %3, align 2
765  ret i8* %x
766}
767
768define i8* @ldrbs16_3(i8* %x, i8* %y) {
769; CHECK-LABEL: ldrbs16_3:
770; CHECK:       @ %bb.0: @ %entry
771; CHECK-NEXT:    vldrb.s16 q0, [r0, #3]
772; CHECK-NEXT:    vstrh.16 q0, [r1]
773; CHECK-NEXT:    bx lr
774entry:
775  %z = getelementptr inbounds i8, i8* %x, i32 3
776  %0 = bitcast i8* %z to <8 x i8>*
777  %1 = load <8 x i8>, <8 x i8>* %0, align 1
778  %2 = sext <8 x i8> %1 to <8 x i16>
779  %3 = bitcast i8* %y to <8 x i16>*
780  store <8 x i16> %2, <8 x i16>* %3, align 2
781  ret i8* %x
782}
783
784define i8* @ldrbs16_127(i8* %x, i8* %y) {
785; CHECK-LABEL: ldrbs16_127:
786; CHECK:       @ %bb.0: @ %entry
787; CHECK-NEXT:    vldrb.s16 q0, [r0, #127]
788; CHECK-NEXT:    vstrh.16 q0, [r1]
789; CHECK-NEXT:    bx lr
790entry:
791  %z = getelementptr inbounds i8, i8* %x, i32 127
792  %0 = bitcast i8* %z to <8 x i8>*
793  %1 = load <8 x i8>, <8 x i8>* %0, align 1
794  %2 = sext <8 x i8> %1 to <8 x i16>
795  %3 = bitcast i8* %y to <8 x i16>*
796  store <8 x i16> %2, <8 x i16>* %3, align 2
797  ret i8* %x
798}
799
800define i8* @ldrbs16_128(i8* %x, i8* %y) {
801; CHECK-LABEL: ldrbs16_128:
802; CHECK:       @ %bb.0: @ %entry
803; CHECK-NEXT:    add.w r2, r0, #128
804; CHECK-NEXT:    vldrb.s16 q0, [r2]
805; CHECK-NEXT:    vstrh.16 q0, [r1]
806; CHECK-NEXT:    bx lr
807entry:
808  %z = getelementptr inbounds i8, i8* %x, i32 128
809  %0 = bitcast i8* %z to <8 x i8>*
810  %1 = load <8 x i8>, <8 x i8>* %0, align 1
811  %2 = sext <8 x i8> %1 to <8 x i16>
812  %3 = bitcast i8* %y to <8 x i16>*
813  store <8 x i16> %2, <8 x i16>* %3, align 2
814  ret i8* %x
815}
816
817define i8* @ldrbs16_m127(i8* %x, i8* %y) {
818; CHECK-LABEL: ldrbs16_m127:
819; CHECK:       @ %bb.0: @ %entry
820; CHECK-NEXT:    vldrb.s16 q0, [r0, #-127]
821; CHECK-NEXT:    vstrh.16 q0, [r1]
822; CHECK-NEXT:    bx lr
823entry:
824  %z = getelementptr inbounds i8, i8* %x, i32 -127
825  %0 = bitcast i8* %z to <8 x i8>*
826  %1 = load <8 x i8>, <8 x i8>* %0, align 1
827  %2 = sext <8 x i8> %1 to <8 x i16>
828  %3 = bitcast i8* %y to <8 x i16>*
829  store <8 x i16> %2, <8 x i16>* %3, align 2
830  ret i8* %x
831}
832
833define i8* @ldrbs16_m128(i8* %x, i8* %y) {
834; CHECK-LABEL: ldrbs16_m128:
835; CHECK:       @ %bb.0: @ %entry
836; CHECK-NEXT:    sub.w r2, r0, #128
837; CHECK-NEXT:    vldrb.s16 q0, [r2]
838; CHECK-NEXT:    vstrh.16 q0, [r1]
839; CHECK-NEXT:    bx lr
840entry:
841  %z = getelementptr inbounds i8, i8* %x, i32 -128
842  %0 = bitcast i8* %z to <8 x i8>*
843  %1 = load <8 x i8>, <8 x i8>* %0, align 1
844  %2 = sext <8 x i8> %1 to <8 x i16>
845  %3 = bitcast i8* %y to <8 x i16>*
846  store <8 x i16> %2, <8 x i16>* %3, align 2
847  ret i8* %x
848}
849
850
851define i8* @ldrbu8_4(i8* %x, i8* %y) {
852; CHECK-LABEL: ldrbu8_4:
853; CHECK:       @ %bb.0: @ %entry
854; CHECK-NEXT:    vldrb.u8 q0, [r0, #4]
855; CHECK-NEXT:    vstrb.8 q0, [r1]
856; CHECK-NEXT:    bx lr
857entry:
858  %z = getelementptr inbounds i8, i8* %x, i32 4
859  %0 = bitcast i8* %z to <16 x i8>*
860  %1 = load <16 x i8>, <16 x i8>* %0, align 1
861  %2 = bitcast i8* %y to <16 x i8>*
862  store <16 x i8> %1, <16 x i8>* %2, align 1
863  ret i8* %x
864}
865
866define i8* @ldrbu8_3(i8* %x, i8* %y) {
867; CHECK-LABEL: ldrbu8_3:
868; CHECK:       @ %bb.0: @ %entry
869; CHECK-NEXT:    vldrb.u8 q0, [r0, #3]
870; CHECK-NEXT:    vstrb.8 q0, [r1]
871; CHECK-NEXT:    bx lr
872entry:
873  %z = getelementptr inbounds i8, i8* %x, i32 3
874  %0 = bitcast i8* %z to <16 x i8>*
875  %1 = load <16 x i8>, <16 x i8>* %0, align 1
876  %2 = bitcast i8* %y to <16 x i8>*
877  store <16 x i8> %1, <16 x i8>* %2, align 1
878  ret i8* %x
879}
880
881define i8* @ldrbu8_127(i8* %x, i8* %y) {
882; CHECK-LABEL: ldrbu8_127:
883; CHECK:       @ %bb.0: @ %entry
884; CHECK-NEXT:    vldrb.u8 q0, [r0, #127]
885; CHECK-NEXT:    vstrb.8 q0, [r1]
886; CHECK-NEXT:    bx lr
887entry:
888  %z = getelementptr inbounds i8, i8* %x, i32 127
889  %0 = bitcast i8* %z to <16 x i8>*
890  %1 = load <16 x i8>, <16 x i8>* %0, align 1
891  %2 = bitcast i8* %y to <16 x i8>*
892  store <16 x i8> %1, <16 x i8>* %2, align 1
893  ret i8* %x
894}
895
896define i8* @ldrbu8_128(i8* %x, i8* %y) {
897; CHECK-LABEL: ldrbu8_128:
898; CHECK:       @ %bb.0: @ %entry
899; CHECK-NEXT:    add.w r2, r0, #128
900; CHECK-NEXT:    vldrb.u8 q0, [r2]
901; CHECK-NEXT:    vstrb.8 q0, [r1]
902; CHECK-NEXT:    bx lr
903entry:
904  %z = getelementptr inbounds i8, i8* %x, i32 128
905  %0 = bitcast i8* %z to <16 x i8>*
906  %1 = load <16 x i8>, <16 x i8>* %0, align 1
907  %2 = bitcast i8* %y to <16 x i8>*
908  store <16 x i8> %1, <16 x i8>* %2, align 1
909  ret i8* %x
910}
911
912define i8* @ldrbu8_m127(i8* %x, i8* %y) {
913; CHECK-LABEL: ldrbu8_m127:
914; CHECK:       @ %bb.0: @ %entry
915; CHECK-NEXT:    vldrb.u8 q0, [r0, #-127]
916; CHECK-NEXT:    vstrb.8 q0, [r1]
917; CHECK-NEXT:    bx lr
918entry:
919  %z = getelementptr inbounds i8, i8* %x, i32 -127
920  %0 = bitcast i8* %z to <16 x i8>*
921  %1 = load <16 x i8>, <16 x i8>* %0, align 1
922  %2 = bitcast i8* %y to <16 x i8>*
923  store <16 x i8> %1, <16 x i8>* %2, align 1
924  ret i8* %x
925}
926
927define i8* @ldrbu8_m128(i8* %x, i8* %y) {
928; CHECK-LABEL: ldrbu8_m128:
929; CHECK:       @ %bb.0: @ %entry
930; CHECK-NEXT:    sub.w r2, r0, #128
931; CHECK-NEXT:    vldrb.u8 q0, [r2]
932; CHECK-NEXT:    vstrb.8 q0, [r1]
933; CHECK-NEXT:    bx lr
934entry:
935  %z = getelementptr inbounds i8, i8* %x, i32 -128
936  %0 = bitcast i8* %z to <16 x i8>*
937  %1 = load <16 x i8>, <16 x i8>* %0, align 1
938  %2 = bitcast i8* %y to <16 x i8>*
939  store <16 x i8> %1, <16 x i8>* %2, align 1
940  ret i8* %x
941}
942
943
944define i8* @ldrwf32_4(i8* %x, i8* %y) {
945; CHECK-LABEL: ldrwf32_4:
946; CHECK:       @ %bb.0: @ %entry
947; CHECK-NEXT:    vldrw.u32 q0, [r0, #4]
948; CHECK-NEXT:    vstrw.32 q0, [r1]
949; CHECK-NEXT:    bx lr
950entry:
951  %z = getelementptr inbounds i8, i8* %x, i32 4
952  %0 = bitcast i8* %z to <4 x float>*
953  %1 = load <4 x float>, <4 x float>* %0, align 4
954  %2 = bitcast i8* %y to <4 x float>*
955  store <4 x float> %1, <4 x float>* %2, align 4
956  ret i8* %x
957}
958
959define i8* @ldrwf16_4(i8* %x, i8* %y) {
960; CHECK-LABEL: ldrwf16_4:
961; CHECK:       @ %bb.0: @ %entry
962; CHECK-NEXT:    vldrh.u16 q0, [r0, #4]
963; CHECK-NEXT:    vstrh.16 q0, [r1]
964; CHECK-NEXT:    bx lr
965entry:
966  %z = getelementptr inbounds i8, i8* %x, i32 4
967  %0 = bitcast i8* %z to <8 x half>*
968  %1 = load <8 x half>, <8 x half>* %0, align 2
969  %2 = bitcast i8* %y to <8 x half>*
970  store <8 x half> %1, <8 x half>* %2, align 2
971  ret i8* %x
972}
973
974define i8* @ldrwi32_align1(i8* %x, i8* %y) {
975; CHECK-LE-LABEL: ldrwi32_align1:
976; CHECK-LE:       @ %bb.0: @ %entry
977; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]
978; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
979; CHECK-LE-NEXT:    bx lr
980;
981; CHECK-BE-LABEL: ldrwi32_align1:
982; CHECK-BE:       @ %bb.0: @ %entry
983; CHECK-BE-NEXT:    vldrb.u8 q0, [r0, #3]
984; CHECK-BE-NEXT:    vrev32.8 q0, q0
985; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
986; CHECK-BE-NEXT:    bx lr
987entry:
988  %z = getelementptr inbounds i8, i8* %x, i32 3
989  %0 = bitcast i8* %z to <4 x i32>*
990  %1 = load <4 x i32>, <4 x i32>* %0, align 1
991  %2 = bitcast i8* %y to <4 x i32>*
992  store <4 x i32> %1, <4 x i32>* %2, align 4
993  ret i8* %x
994}
995
996define i8* @ldrhi16_align1(i8* %x, i8* %y) {
997; CHECK-LE-LABEL: ldrhi16_align1:
998; CHECK-LE:       @ %bb.0: @ %entry
999; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]
1000; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
1001; CHECK-LE-NEXT:    bx lr
1002;
1003; CHECK-BE-LABEL: ldrhi16_align1:
1004; CHECK-BE:       @ %bb.0: @ %entry
1005; CHECK-BE-NEXT:    vldrb.u8 q0, [r0, #3]
1006; CHECK-BE-NEXT:    vrev16.8 q0, q0
1007; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
1008; CHECK-BE-NEXT:    bx lr
1009entry:
1010  %z = getelementptr inbounds i8, i8* %x, i32 3
1011  %0 = bitcast i8* %z to <8 x i16>*
1012  %1 = load <8 x i16>, <8 x i16>* %0, align 1
1013  %2 = bitcast i8* %y to <8 x i16>*
1014  store <8 x i16> %1, <8 x i16>* %2, align 2
1015  ret i8* %x
1016}
1017
1018define i8* @ldrhi32_align1(i8* %x, i8* %y) {
1019; CHECK-LABEL: ldrhi32_align1:
1020; CHECK:       @ %bb.0: @ %entry
1021; CHECK-NEXT:    .pad #8
1022; CHECK-NEXT:    sub sp, #8
1023; CHECK-NEXT:    ldr.w r3, [r0, #7]
1024; CHECK-NEXT:    ldr.w r2, [r0, #3]
1025; CHECK-NEXT:    strd r2, r3, [sp]
1026; CHECK-NEXT:    mov r2, sp
1027; CHECK-NEXT:    vldrh.s32 q0, [r2]
1028; CHECK-NEXT:    vstrw.32 q0, [r1]
1029; CHECK-NEXT:    add sp, #8
1030; CHECK-NEXT:    bx lr
1031entry:
1032  %z = getelementptr inbounds i8, i8* %x, i32 3
1033  %0 = bitcast i8* %z to <4 x i16>*
1034  %1 = load <4 x i16>, <4 x i16>* %0, align 1
1035  %2 = bitcast i8* %y to <4 x i32>*
1036  %3 = sext <4 x i16> %1 to <4 x i32>
1037  store <4 x i32> %3, <4 x i32>* %2, align 4
1038  ret i8* %x
1039}
1040
1041define i8* @ldrf32_align1(i8* %x, i8* %y) {
1042; CHECK-LE-LABEL: ldrf32_align1:
1043; CHECK-LE:       @ %bb.0: @ %entry
1044; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]
1045; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
1046; CHECK-LE-NEXT:    bx lr
1047;
1048; CHECK-BE-LABEL: ldrf32_align1:
1049; CHECK-BE:       @ %bb.0: @ %entry
1050; CHECK-BE-NEXT:    vldrb.u8 q0, [r0, #3]
1051; CHECK-BE-NEXT:    vrev32.8 q0, q0
1052; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
1053; CHECK-BE-NEXT:    bx lr
1054entry:
1055  %z = getelementptr inbounds i8, i8* %x, i32 3
1056  %0 = bitcast i8* %z to <4 x float>*
1057  %1 = load <4 x float>, <4 x float>* %0, align 1
1058  %2 = bitcast i8* %y to <4 x float>*
1059  store <4 x float> %1, <4 x float>* %2, align 4
1060  ret i8* %x
1061}
1062
1063define i8* @ldrf16_align1(i8* %x, i8* %y) {
1064; CHECK-LE-LABEL: ldrf16_align1:
1065; CHECK-LE:       @ %bb.0: @ %entry
1066; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]
1067; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
1068; CHECK-LE-NEXT:    bx lr
1069;
1070; CHECK-BE-LABEL: ldrf16_align1:
1071; CHECK-BE:       @ %bb.0: @ %entry
1072; CHECK-BE-NEXT:    vldrb.u8 q0, [r0, #3]
1073; CHECK-BE-NEXT:    vrev16.8 q0, q0
1074; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
1075; CHECK-BE-NEXT:    bx lr
1076entry:
1077  %z = getelementptr inbounds i8, i8* %x, i32 3
1078  %0 = bitcast i8* %z to <8 x half>*
1079  %1 = load <8 x half>, <8 x half>* %0, align 1
1080  %2 = bitcast i8* %y to <8 x half>*
1081  store <8 x half> %1, <8 x half>* %2, align 2
1082  ret i8* %x
1083}
1084
1085define i8* @ldrh16_align8(i8* %x, i8* %y) {
1086; CHECK-LE-LABEL: ldrh16_align8:
1087; CHECK-LE:       @ %bb.0: @ %entry
1088; CHECK-LE-NEXT:    vldrw.u32 q0, [r0, #4]
1089; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
1090; CHECK-LE-NEXT:    bx lr
1091;
1092; CHECK-BE-LABEL: ldrh16_align8:
1093; CHECK-BE:       @ %bb.0: @ %entry
1094; CHECK-BE-NEXT:    vldrh.u16 q0, [r0, #4]
1095; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
1096; CHECK-BE-NEXT:    bx lr
1097entry:
1098  %z = getelementptr inbounds i8, i8* %x, i32 4
1099  %0 = bitcast i8* %z to <8 x i16>*
1100  %1 = load <8 x i16>, <8 x i16>* %0, align 8
1101  %2 = bitcast i8* %y to <8 x i16>*
1102  store <8 x i16> %1, <8 x i16>* %2, align 2
1103  ret i8* %x
1104}
1105
1106
1107
1108
1109
1110define i8* @strw32_4(i8* %y, i8* %x) {
1111; CHECK-LABEL: strw32_4:
1112; CHECK:       @ %bb.0: @ %entry
1113; CHECK-NEXT:    vldrw.u32 q0, [r1]
1114; CHECK-NEXT:    vstrw.32 q0, [r0, #4]
1115; CHECK-NEXT:    bx lr
1116entry:
1117  %z = getelementptr inbounds i8, i8* %y, i32 4
1118  %0 = bitcast i8* %x to <4 x i32>*
1119  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1120  %2 = bitcast i8* %z to <4 x i32>*
1121  store <4 x i32> %1, <4 x i32>* %2, align 4
1122  ret i8* %y
1123}
1124
1125define i8* @strw32_3(i8* %y, i8* %x) {
1126; CHECK-LABEL: strw32_3:
1127; CHECK:       @ %bb.0: @ %entry
1128; CHECK-NEXT:    vldrw.u32 q0, [r1]
1129; CHECK-NEXT:    adds r1, r0, #3
1130; CHECK-NEXT:    vstrw.32 q0, [r1]
1131; CHECK-NEXT:    bx lr
1132entry:
1133  %z = getelementptr inbounds i8, i8* %y, i32 3
1134  %0 = bitcast i8* %x to <4 x i32>*
1135  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1136  %2 = bitcast i8* %z to <4 x i32>*
1137  store <4 x i32> %1, <4 x i32>* %2, align 4
1138  ret i8* %y
1139}
1140
1141define i8* @strw32_m4(i8* %y, i8* %x) {
1142; CHECK-LABEL: strw32_m4:
1143; CHECK:       @ %bb.0: @ %entry
1144; CHECK-NEXT:    vldrw.u32 q0, [r1]
1145; CHECK-NEXT:    vstrw.32 q0, [r0, #-4]
1146; CHECK-NEXT:    bx lr
1147entry:
1148  %z = getelementptr inbounds i8, i8* %y, i32 -4
1149  %0 = bitcast i8* %x to <4 x i32>*
1150  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1151  %2 = bitcast i8* %z to <4 x i32>*
1152  store <4 x i32> %1, <4 x i32>* %2, align 4
1153  ret i8* %y
1154}
1155
1156define i8* @strw32_508(i8* %y, i8* %x) {
1157; CHECK-LABEL: strw32_508:
1158; CHECK:       @ %bb.0: @ %entry
1159; CHECK-NEXT:    vldrw.u32 q0, [r1]
1160; CHECK-NEXT:    vstrw.32 q0, [r0, #508]
1161; CHECK-NEXT:    bx lr
1162entry:
1163  %z = getelementptr inbounds i8, i8* %y, i32 508
1164  %0 = bitcast i8* %x to <4 x i32>*
1165  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1166  %2 = bitcast i8* %z to <4 x i32>*
1167  store <4 x i32> %1, <4 x i32>* %2, align 4
1168  ret i8* %y
1169}
1170
1171define i8* @strw32_512(i8* %y, i8* %x) {
1172; CHECK-LABEL: strw32_512:
1173; CHECK:       @ %bb.0: @ %entry
1174; CHECK-NEXT:    vldrw.u32 q0, [r1]
1175; CHECK-NEXT:    add.w r1, r0, #512
1176; CHECK-NEXT:    vstrw.32 q0, [r1]
1177; CHECK-NEXT:    bx lr
1178entry:
1179  %z = getelementptr inbounds i8, i8* %y, i32 512
1180  %0 = bitcast i8* %x to <4 x i32>*
1181  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1182  %2 = bitcast i8* %z to <4 x i32>*
1183  store <4 x i32> %1, <4 x i32>* %2, align 4
1184  ret i8* %y
1185}
1186
1187define i8* @strw32_m508(i8* %y, i8* %x) {
1188; CHECK-LABEL: strw32_m508:
1189; CHECK:       @ %bb.0: @ %entry
1190; CHECK-NEXT:    vldrw.u32 q0, [r1]
1191; CHECK-NEXT:    vstrw.32 q0, [r0, #-508]
1192; CHECK-NEXT:    bx lr
1193entry:
1194  %z = getelementptr inbounds i8, i8* %y, i32 -508
1195  %0 = bitcast i8* %x to <4 x i32>*
1196  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1197  %2 = bitcast i8* %z to <4 x i32>*
1198  store <4 x i32> %1, <4 x i32>* %2, align 4
1199  ret i8* %y
1200}
1201
1202define i8* @strw32_m512(i8* %y, i8* %x) {
1203; CHECK-LABEL: strw32_m512:
1204; CHECK:       @ %bb.0: @ %entry
1205; CHECK-NEXT:    vldrw.u32 q0, [r1]
1206; CHECK-NEXT:    sub.w r1, r0, #512
1207; CHECK-NEXT:    vstrw.32 q0, [r1]
1208; CHECK-NEXT:    bx lr
1209entry:
1210  %z = getelementptr inbounds i8, i8* %y, i32 -512
1211  %0 = bitcast i8* %x to <4 x i32>*
1212  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1213  %2 = bitcast i8* %z to <4 x i32>*
1214  store <4 x i32> %1, <4 x i32>* %2, align 4
1215  ret i8* %y
1216}
1217
1218
1219define i8* @strh32_4(i8* %y, i8* %x) {
1220; CHECK-LABEL: strh32_4:
1221; CHECK:       @ %bb.0: @ %entry
1222; CHECK-NEXT:    vldrh.u32 q0, [r1]
1223; CHECK-NEXT:    vstrh.32 q0, [r0, #4]
1224; CHECK-NEXT:    bx lr
1225entry:
1226  %z = getelementptr inbounds i8, i8* %y, i32 4
1227  %0 = bitcast i8* %x to <4 x i16>*
1228  %1 = load <4 x i16>, <4 x i16>* %0, align 2
1229  %2 = bitcast i8* %z to <4 x i16>*
1230  store <4 x i16> %1, <4 x i16>* %2, align 2
1231  ret i8* %y
1232}
1233
1234define i8* @strh32_3(i8* %y, i8* %x) {
1235; CHECK-LABEL: strh32_3:
1236; CHECK:       @ %bb.0: @ %entry
1237; CHECK-NEXT:    vldrh.u32 q0, [r1]
1238; CHECK-NEXT:    adds r1, r0, #3
1239; CHECK-NEXT:    vstrh.32 q0, [r1]
1240; CHECK-NEXT:    bx lr
1241entry:
1242  %z = getelementptr inbounds i8, i8* %y, i32 3
1243  %0 = bitcast i8* %x to <4 x i16>*
1244  %1 = load <4 x i16>, <4 x i16>* %0, align 2
1245  %2 = bitcast i8* %z to <4 x i16>*
1246  store <4 x i16> %1, <4 x i16>* %2, align 2
1247  ret i8* %y
1248}
1249
1250define i8* @strh32_2(i8* %y, i8* %x) {
1251; CHECK-LABEL: strh32_2:
1252; CHECK:       @ %bb.0: @ %entry
1253; CHECK-NEXT:    vldrh.u32 q0, [r1]
1254; CHECK-NEXT:    vstrh.32 q0, [r0, #2]
1255; CHECK-NEXT:    bx lr
1256entry:
1257  %z = getelementptr inbounds i8, i8* %y, i32 2
1258  %0 = bitcast i8* %x to <4 x i16>*
1259  %1 = load <4 x i16>, <4 x i16>* %0, align 2
1260  %2 = bitcast i8* %z to <4 x i16>*
1261  store <4 x i16> %1, <4 x i16>* %2, align 2
1262  ret i8* %y
1263}
1264
1265define i8* @strh32_254(i8* %y, i8* %x) {
1266; CHECK-LABEL: strh32_254:
1267; CHECK:       @ %bb.0: @ %entry
1268; CHECK-NEXT:    vldrh.u32 q0, [r1]
1269; CHECK-NEXT:    vstrh.32 q0, [r0, #254]
1270; CHECK-NEXT:    bx lr
1271entry:
1272  %z = getelementptr inbounds i8, i8* %y, i32 254
1273  %0 = bitcast i8* %x to <4 x i16>*
1274  %1 = load <4 x i16>, <4 x i16>* %0, align 2
1275  %2 = bitcast i8* %z to <4 x i16>*
1276  store <4 x i16> %1, <4 x i16>* %2, align 2
1277  ret i8* %y
1278}
1279
1280define i8* @strh32_256(i8* %y, i8* %x) {
1281; CHECK-LABEL: strh32_256:
1282; CHECK:       @ %bb.0: @ %entry
1283; CHECK-NEXT:    vldrh.u32 q0, [r1]
1284; CHECK-NEXT:    add.w r1, r0, #256
1285; CHECK-NEXT:    vstrh.32 q0, [r1]
1286; CHECK-NEXT:    bx lr
1287entry:
1288  %z = getelementptr inbounds i8, i8* %y, i32 256
1289  %0 = bitcast i8* %x to <4 x i16>*
1290  %1 = load <4 x i16>, <4 x i16>* %0, align 2
1291  %2 = bitcast i8* %z to <4 x i16>*
1292  store <4 x i16> %1, <4 x i16>* %2, align 2
1293  ret i8* %y
1294}
1295
1296define i8* @strh32_m254(i8* %y, i8* %x) {
1297; CHECK-LABEL: strh32_m254:
1298; CHECK:       @ %bb.0: @ %entry
1299; CHECK-NEXT:    vldrh.u32 q0, [r1]
1300; CHECK-NEXT:    vstrh.32 q0, [r0, #-254]
1301; CHECK-NEXT:    bx lr
1302entry:
1303  %z = getelementptr inbounds i8, i8* %y, i32 -254
1304  %0 = bitcast i8* %x to <4 x i16>*
1305  %1 = load <4 x i16>, <4 x i16>* %0, align 2
1306  %2 = bitcast i8* %z to <4 x i16>*
1307  store <4 x i16> %1, <4 x i16>* %2, align 2
1308  ret i8* %y
1309}
1310
1311define i8* @strh32_m256(i8* %y, i8* %x) {
1312; CHECK-LABEL: strh32_m256:
1313; CHECK:       @ %bb.0: @ %entry
1314; CHECK-NEXT:    vldrh.u32 q0, [r1]
1315; CHECK-NEXT:    sub.w r1, r0, #256
1316; CHECK-NEXT:    vstrh.32 q0, [r1]
1317; CHECK-NEXT:    bx lr
1318entry:
1319  %z = getelementptr inbounds i8, i8* %y, i32 -256
1320  %0 = bitcast i8* %x to <4 x i16>*
1321  %1 = load <4 x i16>, <4 x i16>* %0, align 2
1322  %2 = bitcast i8* %z to <4 x i16>*
1323  store <4 x i16> %1, <4 x i16>* %2, align 2
1324  ret i8* %y
1325}
1326
1327
1328define i8* @strh16_4(i8* %y, i8* %x) {
1329; CHECK-LABEL: strh16_4:
1330; CHECK:       @ %bb.0: @ %entry
1331; CHECK-NEXT:    vldrh.u16 q0, [r1]
1332; CHECK-NEXT:    vstrh.16 q0, [r0, #4]
1333; CHECK-NEXT:    bx lr
1334entry:
1335  %z = getelementptr inbounds i8, i8* %y, i32 4
1336  %0 = bitcast i8* %x to <8 x i16>*
1337  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1338  %2 = bitcast i8* %z to <8 x i16>*
1339  store <8 x i16> %1, <8 x i16>* %2, align 2
1340  ret i8* %y
1341}
1342
1343define i8* @strh16_3(i8* %y, i8* %x) {
1344; CHECK-LABEL: strh16_3:
1345; CHECK:       @ %bb.0: @ %entry
1346; CHECK-NEXT:    vldrh.u16 q0, [r1]
1347; CHECK-NEXT:    adds r1, r0, #3
1348; CHECK-NEXT:    vstrh.16 q0, [r1]
1349; CHECK-NEXT:    bx lr
1350entry:
1351  %z = getelementptr inbounds i8, i8* %y, i32 3
1352  %0 = bitcast i8* %x to <8 x i16>*
1353  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1354  %2 = bitcast i8* %z to <8 x i16>*
1355  store <8 x i16> %1, <8 x i16>* %2, align 2
1356  ret i8* %y
1357}
1358
1359define i8* @strh16_2(i8* %y, i8* %x) {
1360; CHECK-LABEL: strh16_2:
1361; CHECK:       @ %bb.0: @ %entry
1362; CHECK-NEXT:    vldrh.u16 q0, [r1]
1363; CHECK-NEXT:    vstrh.16 q0, [r0, #2]
1364; CHECK-NEXT:    bx lr
1365entry:
1366  %z = getelementptr inbounds i8, i8* %y, i32 2
1367  %0 = bitcast i8* %x to <8 x i16>*
1368  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1369  %2 = bitcast i8* %z to <8 x i16>*
1370  store <8 x i16> %1, <8 x i16>* %2, align 2
1371  ret i8* %y
1372}
1373
1374define i8* @strh16_254(i8* %y, i8* %x) {
1375; CHECK-LABEL: strh16_254:
1376; CHECK:       @ %bb.0: @ %entry
1377; CHECK-NEXT:    vldrh.u16 q0, [r1]
1378; CHECK-NEXT:    vstrh.16 q0, [r0, #254]
1379; CHECK-NEXT:    bx lr
1380entry:
1381  %z = getelementptr inbounds i8, i8* %y, i32 254
1382  %0 = bitcast i8* %x to <8 x i16>*
1383  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1384  %2 = bitcast i8* %z to <8 x i16>*
1385  store <8 x i16> %1, <8 x i16>* %2, align 2
1386  ret i8* %y
1387}
1388
1389define i8* @strh16_256(i8* %y, i8* %x) {
1390; CHECK-LABEL: strh16_256:
1391; CHECK:       @ %bb.0: @ %entry
1392; CHECK-NEXT:    vldrh.u16 q0, [r1]
1393; CHECK-NEXT:    add.w r1, r0, #256
1394; CHECK-NEXT:    vstrh.16 q0, [r1]
1395; CHECK-NEXT:    bx lr
1396entry:
1397  %z = getelementptr inbounds i8, i8* %y, i32 256
1398  %0 = bitcast i8* %x to <8 x i16>*
1399  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1400  %2 = bitcast i8* %z to <8 x i16>*
1401  store <8 x i16> %1, <8 x i16>* %2, align 2
1402  ret i8* %y
1403}
1404
1405define i8* @strh16_m254(i8* %y, i8* %x) {
1406; CHECK-LABEL: strh16_m254:
1407; CHECK:       @ %bb.0: @ %entry
1408; CHECK-NEXT:    vldrh.u16 q0, [r1]
1409; CHECK-NEXT:    vstrh.16 q0, [r0, #-254]
1410; CHECK-NEXT:    bx lr
1411entry:
1412  %z = getelementptr inbounds i8, i8* %y, i32 -254
1413  %0 = bitcast i8* %x to <8 x i16>*
1414  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1415  %2 = bitcast i8* %z to <8 x i16>*
1416  store <8 x i16> %1, <8 x i16>* %2, align 2
1417  ret i8* %y
1418}
1419
1420define i8* @strh16_m256(i8* %y, i8* %x) {
1421; CHECK-LABEL: strh16_m256:
1422; CHECK:       @ %bb.0: @ %entry
1423; CHECK-NEXT:    vldrh.u16 q0, [r1]
1424; CHECK-NEXT:    sub.w r1, r0, #256
1425; CHECK-NEXT:    vstrh.16 q0, [r1]
1426; CHECK-NEXT:    bx lr
1427entry:
1428  %z = getelementptr inbounds i8, i8* %y, i32 -256
1429  %0 = bitcast i8* %x to <8 x i16>*
1430  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1431  %2 = bitcast i8* %z to <8 x i16>*
1432  store <8 x i16> %1, <8 x i16>* %2, align 2
1433  ret i8* %y
1434}
1435
1436
1437define i8* @strb32_4(i8* %y, i8* %x) {
1438; CHECK-LABEL: strb32_4:
1439; CHECK:       @ %bb.0: @ %entry
1440; CHECK-NEXT:    vldrb.u32 q0, [r1]
1441; CHECK-NEXT:    vstrb.32 q0, [r0, #4]
1442; CHECK-NEXT:    bx lr
1443entry:
1444  %z = getelementptr inbounds i8, i8* %y, i32 4
1445  %0 = bitcast i8* %x to <4 x i8>*
1446  %1 = load <4 x i8>, <4 x i8>* %0, align 1
1447  %2 = bitcast i8* %z to <4 x i8>*
1448  store <4 x i8> %1, <4 x i8>* %2, align 1
1449  ret i8* %y
1450}
1451
1452define i8* @strb32_3(i8* %y, i8* %x) {
1453; CHECK-LABEL: strb32_3:
1454; CHECK:       @ %bb.0: @ %entry
1455; CHECK-NEXT:    vldrb.u32 q0, [r1]
1456; CHECK-NEXT:    vstrb.32 q0, [r0, #3]
1457; CHECK-NEXT:    bx lr
1458entry:
1459  %z = getelementptr inbounds i8, i8* %y, i32 3
1460  %0 = bitcast i8* %x to <4 x i8>*
1461  %1 = load <4 x i8>, <4 x i8>* %0, align 1
1462  %2 = bitcast i8* %z to <4 x i8>*
1463  store <4 x i8> %1, <4 x i8>* %2, align 1
1464  ret i8* %y
1465}
1466
1467define i8* @strb32_127(i8* %y, i8* %x) {
1468; CHECK-LABEL: strb32_127:
1469; CHECK:       @ %bb.0: @ %entry
1470; CHECK-NEXT:    vldrb.u32 q0, [r1]
1471; CHECK-NEXT:    vstrb.32 q0, [r0, #127]
1472; CHECK-NEXT:    bx lr
1473entry:
1474  %z = getelementptr inbounds i8, i8* %y, i32 127
1475  %0 = bitcast i8* %x to <4 x i8>*
1476  %1 = load <4 x i8>, <4 x i8>* %0, align 1
1477  %2 = bitcast i8* %z to <4 x i8>*
1478  store <4 x i8> %1, <4 x i8>* %2, align 1
1479  ret i8* %y
1480}
1481
1482define i8* @strb32_128(i8* %y, i8* %x) {
1483; CHECK-LABEL: strb32_128:
1484; CHECK:       @ %bb.0: @ %entry
1485; CHECK-NEXT:    vldrb.u32 q0, [r1]
1486; CHECK-NEXT:    add.w r1, r0, #128
1487; CHECK-NEXT:    vstrb.32 q0, [r1]
1488; CHECK-NEXT:    bx lr
1489entry:
1490  %z = getelementptr inbounds i8, i8* %y, i32 128
1491  %0 = bitcast i8* %x to <4 x i8>*
1492  %1 = load <4 x i8>, <4 x i8>* %0, align 1
1493  %2 = bitcast i8* %z to <4 x i8>*
1494  store <4 x i8> %1, <4 x i8>* %2, align 1
1495  ret i8* %y
1496}
1497
1498define i8* @strb32_m127(i8* %y, i8* %x) {
1499; CHECK-LABEL: strb32_m127:
1500; CHECK:       @ %bb.0: @ %entry
1501; CHECK-NEXT:    vldrb.u32 q0, [r1]
1502; CHECK-NEXT:    vstrb.32 q0, [r0, #-127]
1503; CHECK-NEXT:    bx lr
1504entry:
1505  %z = getelementptr inbounds i8, i8* %y, i32 -127
1506  %0 = bitcast i8* %x to <4 x i8>*
1507  %1 = load <4 x i8>, <4 x i8>* %0, align 1
1508  %2 = bitcast i8* %z to <4 x i8>*
1509  store <4 x i8> %1, <4 x i8>* %2, align 1
1510  ret i8* %y
1511}
1512
1513define i8* @strb32_m128(i8* %y, i8* %x) {
1514; CHECK-LABEL: strb32_m128:
1515; CHECK:       @ %bb.0: @ %entry
1516; CHECK-NEXT:    vldrb.u32 q0, [r1]
1517; CHECK-NEXT:    sub.w r1, r0, #128
1518; CHECK-NEXT:    vstrb.32 q0, [r1]
1519; CHECK-NEXT:    bx lr
1520entry:
1521  %z = getelementptr inbounds i8, i8* %y, i32 -128
1522  %0 = bitcast i8* %x to <4 x i8>*
1523  %1 = load <4 x i8>, <4 x i8>* %0, align 1
1524  %2 = bitcast i8* %z to <4 x i8>*
1525  store <4 x i8> %1, <4 x i8>* %2, align 1
1526  ret i8* %y
1527}
1528
1529
1530define i8* @strb16_4(i8* %y, i8* %x) {
1531; CHECK-LABEL: strb16_4:
1532; CHECK:       @ %bb.0: @ %entry
1533; CHECK-NEXT:    vldrb.u16 q0, [r1]
1534; CHECK-NEXT:    vstrb.16 q0, [r0, #4]
1535; CHECK-NEXT:    bx lr
1536entry:
1537  %z = getelementptr inbounds i8, i8* %y, i32 4
1538  %0 = bitcast i8* %x to <8 x i8>*
1539  %1 = load <8 x i8>, <8 x i8>* %0, align 1
1540  %2 = bitcast i8* %z to <8 x i8>*
1541  store <8 x i8> %1, <8 x i8>* %2, align 1
1542  ret i8* %y
1543}
1544
1545define i8* @strb16_3(i8* %y, i8* %x) {
1546; CHECK-LABEL: strb16_3:
1547; CHECK:       @ %bb.0: @ %entry
1548; CHECK-NEXT:    vldrb.u16 q0, [r1]
1549; CHECK-NEXT:    vstrb.16 q0, [r0, #3]
1550; CHECK-NEXT:    bx lr
1551entry:
1552  %z = getelementptr inbounds i8, i8* %y, i32 3
1553  %0 = bitcast i8* %x to <8 x i8>*
1554  %1 = load <8 x i8>, <8 x i8>* %0, align 1
1555  %2 = bitcast i8* %z to <8 x i8>*
1556  store <8 x i8> %1, <8 x i8>* %2, align 1
1557  ret i8* %y
1558}
1559
1560define i8* @strb16_127(i8* %y, i8* %x) {
1561; CHECK-LABEL: strb16_127:
1562; CHECK:       @ %bb.0: @ %entry
1563; CHECK-NEXT:    vldrb.u16 q0, [r1]
1564; CHECK-NEXT:    vstrb.16 q0, [r0, #127]
1565; CHECK-NEXT:    bx lr
1566entry:
1567  %z = getelementptr inbounds i8, i8* %y, i32 127
1568  %0 = bitcast i8* %x to <8 x i8>*
1569  %1 = load <8 x i8>, <8 x i8>* %0, align 1
1570  %2 = bitcast i8* %z to <8 x i8>*
1571  store <8 x i8> %1, <8 x i8>* %2, align 1
1572  ret i8* %y
1573}
1574
1575define i8* @strb16_128(i8* %y, i8* %x) {
1576; CHECK-LABEL: strb16_128:
1577; CHECK:       @ %bb.0: @ %entry
1578; CHECK-NEXT:    vldrb.u16 q0, [r1]
1579; CHECK-NEXT:    add.w r1, r0, #128
1580; CHECK-NEXT:    vstrb.16 q0, [r1]
1581; CHECK-NEXT:    bx lr
1582entry:
1583  %z = getelementptr inbounds i8, i8* %y, i32 128
1584  %0 = bitcast i8* %x to <8 x i8>*
1585  %1 = load <8 x i8>, <8 x i8>* %0, align 1
1586  %2 = bitcast i8* %z to <8 x i8>*
1587  store <8 x i8> %1, <8 x i8>* %2, align 1
1588  ret i8* %y
1589}
1590
1591define i8* @strb16_m127(i8* %y, i8* %x) {
1592; CHECK-LABEL: strb16_m127:
1593; CHECK:       @ %bb.0: @ %entry
1594; CHECK-NEXT:    vldrb.u16 q0, [r1]
1595; CHECK-NEXT:    vstrb.16 q0, [r0, #-127]
1596; CHECK-NEXT:    bx lr
1597entry:
1598  %z = getelementptr inbounds i8, i8* %y, i32 -127
1599  %0 = bitcast i8* %x to <8 x i8>*
1600  %1 = load <8 x i8>, <8 x i8>* %0, align 1
1601  %2 = bitcast i8* %z to <8 x i8>*
1602  store <8 x i8> %1, <8 x i8>* %2, align 1
1603  ret i8* %y
1604}
1605
1606define i8* @strb16_m128(i8* %y, i8* %x) {
1607; CHECK-LABEL: strb16_m128:
1608; CHECK:       @ %bb.0: @ %entry
1609; CHECK-NEXT:    vldrb.u16 q0, [r1]
1610; CHECK-NEXT:    sub.w r1, r0, #128
1611; CHECK-NEXT:    vstrb.16 q0, [r1]
1612; CHECK-NEXT:    bx lr
1613entry:
1614  %z = getelementptr inbounds i8, i8* %y, i32 -128
1615  %0 = bitcast i8* %x to <8 x i8>*
1616  %1 = load <8 x i8>, <8 x i8>* %0, align 1
1617  %2 = bitcast i8* %z to <8 x i8>*
1618  store <8 x i8> %1, <8 x i8>* %2, align 1
1619  ret i8* %y
1620}
1621
1622
1623define i8* @strb8_4(i8* %y, i8* %x) {
1624; CHECK-LABEL: strb8_4:
1625; CHECK:       @ %bb.0: @ %entry
1626; CHECK-NEXT:    vldrb.u8 q0, [r1]
1627; CHECK-NEXT:    vstrb.8 q0, [r0, #4]
1628; CHECK-NEXT:    bx lr
1629entry:
1630  %z = getelementptr inbounds i8, i8* %y, i32 4
1631  %0 = bitcast i8* %x to <16 x i8>*
1632  %1 = load <16 x i8>, <16 x i8>* %0, align 1
1633  %2 = bitcast i8* %z to <16 x i8>*
1634  store <16 x i8> %1, <16 x i8>* %2, align 1
1635  ret i8* %y
1636}
1637
1638define i8* @strb8_3(i8* %y, i8* %x) {
1639; CHECK-LABEL: strb8_3:
1640; CHECK:       @ %bb.0: @ %entry
1641; CHECK-NEXT:    vldrb.u8 q0, [r1]
1642; CHECK-NEXT:    vstrb.8 q0, [r0, #3]
1643; CHECK-NEXT:    bx lr
1644entry:
1645  %z = getelementptr inbounds i8, i8* %y, i32 3
1646  %0 = bitcast i8* %x to <16 x i8>*
1647  %1 = load <16 x i8>, <16 x i8>* %0, align 1
1648  %2 = bitcast i8* %z to <16 x i8>*
1649  store <16 x i8> %1, <16 x i8>* %2, align 1
1650  ret i8* %y
1651}
1652
1653define i8* @strb8_127(i8* %y, i8* %x) {
1654; CHECK-LABEL: strb8_127:
1655; CHECK:       @ %bb.0: @ %entry
1656; CHECK-NEXT:    vldrb.u8 q0, [r1]
1657; CHECK-NEXT:    vstrb.8 q0, [r0, #127]
1658; CHECK-NEXT:    bx lr
1659entry:
1660  %z = getelementptr inbounds i8, i8* %y, i32 127
1661  %0 = bitcast i8* %x to <16 x i8>*
1662  %1 = load <16 x i8>, <16 x i8>* %0, align 1
1663  %2 = bitcast i8* %z to <16 x i8>*
1664  store <16 x i8> %1, <16 x i8>* %2, align 1
1665  ret i8* %y
1666}
1667
1668define i8* @strb8_128(i8* %y, i8* %x) {
1669; CHECK-LABEL: strb8_128:
1670; CHECK:       @ %bb.0: @ %entry
1671; CHECK-NEXT:    vldrb.u8 q0, [r1]
1672; CHECK-NEXT:    add.w r1, r0, #128
1673; CHECK-NEXT:    vstrb.8 q0, [r1]
1674; CHECK-NEXT:    bx lr
1675entry:
1676  %z = getelementptr inbounds i8, i8* %y, i32 128
1677  %0 = bitcast i8* %x to <16 x i8>*
1678  %1 = load <16 x i8>, <16 x i8>* %0, align 1
1679  %2 = bitcast i8* %z to <16 x i8>*
1680  store <16 x i8> %1, <16 x i8>* %2, align 1
1681  ret i8* %y
1682}
1683
1684define i8* @strb8_m127(i8* %y, i8* %x) {
1685; CHECK-LABEL: strb8_m127:
1686; CHECK:       @ %bb.0: @ %entry
1687; CHECK-NEXT:    vldrb.u8 q0, [r1]
1688; CHECK-NEXT:    vstrb.8 q0, [r0, #-127]
1689; CHECK-NEXT:    bx lr
1690entry:
1691  %z = getelementptr inbounds i8, i8* %y, i32 -127
1692  %0 = bitcast i8* %x to <16 x i8>*
1693  %1 = load <16 x i8>, <16 x i8>* %0, align 1
1694  %2 = bitcast i8* %z to <16 x i8>*
1695  store <16 x i8> %1, <16 x i8>* %2, align 1
1696  ret i8* %y
1697}
1698
1699define i8* @strb8_m128(i8* %y, i8* %x) {
1700; CHECK-LABEL: strb8_m128:
1701; CHECK:       @ %bb.0: @ %entry
1702; CHECK-NEXT:    vldrb.u8 q0, [r1]
1703; CHECK-NEXT:    sub.w r1, r0, #128
1704; CHECK-NEXT:    vstrb.8 q0, [r1]
1705; CHECK-NEXT:    bx lr
1706entry:
1707  %z = getelementptr inbounds i8, i8* %y, i32 -128
1708  %0 = bitcast i8* %x to <16 x i8>*
1709  %1 = load <16 x i8>, <16 x i8>* %0, align 1
1710  %2 = bitcast i8* %z to <16 x i8>*
1711  store <16 x i8> %1, <16 x i8>* %2, align 1
1712  ret i8* %y
1713}
1714
1715
1716define i8* @strf32_4(i8* %y, i8* %x) {
1717; CHECK-LABEL: strf32_4:
1718; CHECK:       @ %bb.0: @ %entry
1719; CHECK-NEXT:    vldrw.u32 q0, [r1]
1720; CHECK-NEXT:    vstrw.32 q0, [r0, #4]
1721; CHECK-NEXT:    bx lr
1722entry:
1723  %z = getelementptr inbounds i8, i8* %y, i32 4
1724  %0 = bitcast i8* %x to <4 x float>*
1725  %1 = load <4 x float>, <4 x float>* %0, align 4
1726  %2 = bitcast i8* %z to <4 x float>*
1727  store <4 x float> %1, <4 x float>* %2, align 4
1728  ret i8* %y
1729}
1730
1731define i8* @strf16_4(i8* %y, i8* %x) {
1732; CHECK-LABEL: strf16_4:
1733; CHECK:       @ %bb.0: @ %entry
1734; CHECK-NEXT:    vldrh.u16 q0, [r1]
1735; CHECK-NEXT:    vstrh.16 q0, [r0, #4]
1736; CHECK-NEXT:    bx lr
1737entry:
1738  %z = getelementptr inbounds i8, i8* %y, i32 4
1739  %0 = bitcast i8* %x to <8 x half>*
1740  %1 = load <8 x half>, <8 x half>* %0, align 2
1741  %2 = bitcast i8* %z to <8 x half>*
1742  store <8 x half> %1, <8 x half>* %2, align 2
1743  ret i8* %y
1744}
1745
1746define i8* @strwi32_align1(i8* %y, i8* %x) {
1747; CHECK-LE-LABEL: strwi32_align1:
1748; CHECK-LE:       @ %bb.0: @ %entry
1749; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
1750; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]
1751; CHECK-LE-NEXT:    bx lr
1752;
1753; CHECK-BE-LABEL: strwi32_align1:
1754; CHECK-BE:       @ %bb.0: @ %entry
1755; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
1756; CHECK-BE-NEXT:    vrev32.8 q0, q0
1757; CHECK-BE-NEXT:    vstrb.8 q0, [r0, #3]
1758; CHECK-BE-NEXT:    bx lr
1759entry:
1760  %z = getelementptr inbounds i8, i8* %y, i32 3
1761  %0 = bitcast i8* %x to <4 x i32>*
1762  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1763  %2 = bitcast i8* %z to <4 x i32>*
1764  store <4 x i32> %1, <4 x i32>* %2, align 1
1765  ret i8* %y
1766}
1767
1768define i8* @strhi16_align1(i8* %y, i8* %x) {
1769; CHECK-LE-LABEL: strhi16_align1:
1770; CHECK-LE:       @ %bb.0: @ %entry
1771; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
1772; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]
1773; CHECK-LE-NEXT:    bx lr
1774;
1775; CHECK-BE-LABEL: strhi16_align1:
1776; CHECK-BE:       @ %bb.0: @ %entry
1777; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
1778; CHECK-BE-NEXT:    vrev16.8 q0, q0
1779; CHECK-BE-NEXT:    vstrb.8 q0, [r0, #3]
1780; CHECK-BE-NEXT:    bx lr
1781entry:
1782  %z = getelementptr inbounds i8, i8* %y, i32 3
1783  %0 = bitcast i8* %x to <8 x i16>*
1784  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1785  %2 = bitcast i8* %z to <8 x i16>*
1786  store <8 x i16> %1, <8 x i16>* %2, align 1
1787  ret i8* %y
1788}
1789
1790define i8* @strhi32_align1(i8* %y, i8* %x) {
1791; CHECK-LABEL: strhi32_align1:
1792; CHECK:       @ %bb.0: @ %entry
1793; CHECK-NEXT:    .pad #8
1794; CHECK-NEXT:    sub sp, #8
1795; CHECK-NEXT:    vldrw.u32 q0, [r1]
1796; CHECK-NEXT:    mov r1, sp
1797; CHECK-NEXT:    vstrh.32 q0, [r1]
1798; CHECK-NEXT:    ldrd r1, r2, [sp]
1799; CHECK-NEXT:    str.w r1, [r0, #3]
1800; CHECK-NEXT:    str.w r2, [r0, #7]
1801; CHECK-NEXT:    add sp, #8
1802; CHECK-NEXT:    bx lr
1803entry:
1804  %z = getelementptr inbounds i8, i8* %y, i32 3
1805  %0 = bitcast i8* %x to <4 x i32>*
1806  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1807  %2 = bitcast i8* %z to <4 x i16>*
1808  %3 = trunc <4 x i32> %1 to <4 x i16>
1809  store <4 x i16> %3, <4 x i16>* %2, align 1
1810  ret i8* %y
1811}
1812
1813define i8* @strf32_align1(i8* %y, i8* %x) {
1814; CHECK-LE-LABEL: strf32_align1:
1815; CHECK-LE:       @ %bb.0: @ %entry
1816; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
1817; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]
1818; CHECK-LE-NEXT:    bx lr
1819;
1820; CHECK-BE-LABEL: strf32_align1:
1821; CHECK-BE:       @ %bb.0: @ %entry
1822; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
1823; CHECK-BE-NEXT:    vrev32.8 q0, q0
1824; CHECK-BE-NEXT:    vstrb.8 q0, [r0, #3]
1825; CHECK-BE-NEXT:    bx lr
1826entry:
1827  %z = getelementptr inbounds i8, i8* %y, i32 3
1828  %0 = bitcast i8* %x to <4 x float>*
1829  %1 = load <4 x float>, <4 x float>* %0, align 4
1830  %2 = bitcast i8* %z to <4 x float>*
1831  store <4 x float> %1, <4 x float>* %2, align 1
1832  ret i8* %y
1833}
1834
1835define i8* @strf16_align1(i8* %y, i8* %x) {
1836; CHECK-LE-LABEL: strf16_align1:
1837; CHECK-LE:       @ %bb.0: @ %entry
1838; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
1839; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]
1840; CHECK-LE-NEXT:    bx lr
1841;
1842; CHECK-BE-LABEL: strf16_align1:
1843; CHECK-BE:       @ %bb.0: @ %entry
1844; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
1845; CHECK-BE-NEXT:    vrev16.8 q0, q0
1846; CHECK-BE-NEXT:    vstrb.8 q0, [r0, #3]
1847; CHECK-BE-NEXT:    bx lr
1848entry:
1849  %z = getelementptr inbounds i8, i8* %y, i32 3
1850  %0 = bitcast i8* %x to <8 x half>*
1851  %1 = load <8 x half>, <8 x half>* %0, align 2
1852  %2 = bitcast i8* %z to <8 x half>*
1853  store <8 x half> %1, <8 x half>* %2, align 1
1854  ret i8* %y
1855}
1856
1857define i8* @strf16_align8(i8* %y, i8* %x) {
1858; CHECK-LE-LABEL: strf16_align8:
1859; CHECK-LE:       @ %bb.0: @ %entry
1860; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
1861; CHECK-LE-NEXT:    vstrw.32 q0, [r0, #16]
1862; CHECK-LE-NEXT:    bx lr
1863;
1864; CHECK-BE-LABEL: strf16_align8:
1865; CHECK-BE:       @ %bb.0: @ %entry
1866; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
1867; CHECK-BE-NEXT:    vstrh.16 q0, [r0, #16]
1868; CHECK-BE-NEXT:    bx lr
1869entry:
1870  %z = getelementptr inbounds i8, i8* %y, i32 16
1871  %0 = bitcast i8* %x to <8 x i16>*
1872  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1873  %2 = bitcast i8* %z to <8 x i16>*
1874  store <8 x i16> %1, <8 x i16>* %2, align 8
1875  ret i8* %y
1876}
1877