1; RUN: llc < %s -mtriple=arm64-eabi -verify-machineinstrs | FileCheck %s
2
3; CHECK-LABEL: ldp_int
4; CHECK: ldp
5define i32 @ldp_int(i32* %p) nounwind {
6  %tmp = load i32, i32* %p, align 4
7  %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
8  %tmp1 = load i32, i32* %add.ptr, align 4
9  %add = add nsw i32 %tmp1, %tmp
10  ret i32 %add
11}
12
13; CHECK-LABEL: ldp_sext_int
14; CHECK: ldpsw
15define i64 @ldp_sext_int(i32* %p) nounwind {
16  %tmp = load i32, i32* %p, align 4
17  %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
18  %tmp1 = load i32, i32* %add.ptr, align 4
19  %sexttmp = sext i32 %tmp to i64
20  %sexttmp1 = sext i32 %tmp1 to i64
21  %add = add nsw i64 %sexttmp1, %sexttmp
22  ret i64 %add
23}
24
25; CHECK-LABEL: ldp_half_sext_res0_int:
26; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0]
27; CHECK: sxtw     x[[DST1]], w[[DST1]]
28define i64 @ldp_half_sext_res0_int(i32* %p) nounwind {
29  %tmp = load i32, i32* %p, align 4
30  %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
31  %tmp1 = load i32, i32* %add.ptr, align 4
32  %sexttmp = sext i32 %tmp to i64
33  %sexttmp1 = zext i32 %tmp1 to i64
34  %add = add nsw i64 %sexttmp1, %sexttmp
35  ret i64 %add
36}
37
38; CHECK-LABEL: ldp_half_sext_res1_int:
39; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0]
40; CHECK: sxtw     x[[DST2]], w[[DST2]]
41define i64 @ldp_half_sext_res1_int(i32* %p) nounwind {
42  %tmp = load i32, i32* %p, align 4
43  %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
44  %tmp1 = load i32, i32* %add.ptr, align 4
45  %sexttmp = zext i32 %tmp to i64
46  %sexttmp1 = sext i32 %tmp1 to i64
47  %add = add nsw i64 %sexttmp1, %sexttmp
48  ret i64 %add
49}
50
51
52; CHECK-LABEL: ldp_long
53; CHECK: ldp
54define i64 @ldp_long(i64* %p) nounwind {
55  %tmp = load i64, i64* %p, align 8
56  %add.ptr = getelementptr inbounds i64, i64* %p, i64 1
57  %tmp1 = load i64, i64* %add.ptr, align 8
58  %add = add nsw i64 %tmp1, %tmp
59  ret i64 %add
60}
61
62; CHECK-LABEL: ldp_float
63; CHECK: ldp
64define float @ldp_float(float* %p) nounwind {
65  %tmp = load float, float* %p, align 4
66  %add.ptr = getelementptr inbounds float, float* %p, i64 1
67  %tmp1 = load float, float* %add.ptr, align 4
68  %add = fadd float %tmp, %tmp1
69  ret float %add
70}
71
72; CHECK-LABEL: ldp_double
73; CHECK: ldp
74define double @ldp_double(double* %p) nounwind {
75  %tmp = load double, double* %p, align 8
76  %add.ptr = getelementptr inbounds double, double* %p, i64 1
77  %tmp1 = load double, double* %add.ptr, align 8
78  %add = fadd double %tmp, %tmp1
79  ret double %add
80}
81
82; CHECK-LABEL: ldp_doublex2
83; CHECK: ldp
84define <2 x double> @ldp_doublex2(<2 x double>* %p) nounwind {
85  %tmp = load <2 x double>, <2 x double>* %p, align 16
86  %add.ptr = getelementptr inbounds <2 x double>, <2 x double>* %p, i64 1
87  %tmp1 = load <2 x double>, <2 x double>* %add.ptr, align 16
88  %add = fadd <2 x double> %tmp, %tmp1
89  ret <2 x double> %add
90}
91
92; Test the load/store optimizer---combine ldurs into a ldp, if appropriate
93define i32 @ldur_int(i32* %a) nounwind {
94; CHECK-LABEL: ldur_int
95; CHECK: ldp     [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8]
96; CHECK-NEXT: add     w{{[0-9]+}}, [[DST2]], [[DST1]]
97; CHECK-NEXT: ret
98  %p1 = getelementptr inbounds i32, i32* %a, i32 -1
99  %tmp1 = load i32, i32* %p1, align 2
100  %p2 = getelementptr inbounds i32, i32* %a, i32 -2
101  %tmp2 = load i32, i32* %p2, align 2
102  %tmp3 = add i32 %tmp1, %tmp2
103  ret i32 %tmp3
104}
105
106define i64 @ldur_sext_int(i32* %a) nounwind {
107; CHECK-LABEL: ldur_sext_int
108; CHECK: ldpsw     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8]
109; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
110; CHECK-NEXT: ret
111  %p1 = getelementptr inbounds i32, i32* %a, i32 -1
112  %tmp1 = load i32, i32* %p1, align 2
113  %p2 = getelementptr inbounds i32, i32* %a, i32 -2
114  %tmp2 = load i32, i32* %p2, align 2
115  %sexttmp1 = sext i32 %tmp1 to i64
116  %sexttmp2 = sext i32 %tmp2 to i64
117  %tmp3 = add i64 %sexttmp1, %sexttmp2
118  ret i64 %tmp3
119}
120
121define i64 @ldur_half_sext_int_res0(i32* %a) nounwind {
122; CHECK-LABEL: ldur_half_sext_int_res0
123; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
124; CHECK: sxtw     x[[DST1]], w[[DST1]]
125; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
126; CHECK-NEXT: ret
127  %p1 = getelementptr inbounds i32, i32* %a, i32 -1
128  %tmp1 = load i32, i32* %p1, align 2
129  %p2 = getelementptr inbounds i32, i32* %a, i32 -2
130  %tmp2 = load i32, i32* %p2, align 2
131  %sexttmp1 = zext i32 %tmp1 to i64
132  %sexttmp2 = sext i32 %tmp2 to i64
133  %tmp3 = add i64 %sexttmp1, %sexttmp2
134  ret i64 %tmp3
135}
136
137define i64 @ldur_half_sext_int_res1(i32* %a) nounwind {
138; CHECK-LABEL: ldur_half_sext_int_res1
139; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
140; CHECK: sxtw     x[[DST2]], w[[DST2]]
141; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
142; CHECK-NEXT: ret
143  %p1 = getelementptr inbounds i32, i32* %a, i32 -1
144  %tmp1 = load i32, i32* %p1, align 2
145  %p2 = getelementptr inbounds i32, i32* %a, i32 -2
146  %tmp2 = load i32, i32* %p2, align 2
147  %sexttmp1 = sext i32 %tmp1 to i64
148  %sexttmp2 = zext i32 %tmp2 to i64
149  %tmp3 = add i64 %sexttmp1, %sexttmp2
150  ret i64 %tmp3
151}
152
153
154define i64 @ldur_long(i64* %a) nounwind ssp {
155; CHECK-LABEL: ldur_long
156; CHECK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16]
157; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
158; CHECK-NEXT: ret
159  %p1 = getelementptr inbounds i64, i64* %a, i64 -1
160  %tmp1 = load i64, i64* %p1, align 2
161  %p2 = getelementptr inbounds i64, i64* %a, i64 -2
162  %tmp2 = load i64, i64* %p2, align 2
163  %tmp3 = add i64 %tmp1, %tmp2
164  ret i64 %tmp3
165}
166
167define float @ldur_float(float* %a) {
168; CHECK-LABEL: ldur_float
169; CHECK: ldp     [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8]
170; CHECK-NEXT: fadd    s{{[0-9]+}}, [[DST2]], [[DST1]]
171; CHECK-NEXT: ret
172  %p1 = getelementptr inbounds float, float* %a, i64 -1
173  %tmp1 = load float, float* %p1, align 2
174  %p2 = getelementptr inbounds float, float* %a, i64 -2
175  %tmp2 = load float, float* %p2, align 2
176  %tmp3 = fadd float %tmp1, %tmp2
177  ret float %tmp3
178}
179
180define double @ldur_double(double* %a) {
181; CHECK-LABEL: ldur_double
182; CHECK: ldp     [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16]
183; CHECK-NEXT: fadd    d{{[0-9]+}}, [[DST2]], [[DST1]]
184; CHECK-NEXT: ret
185  %p1 = getelementptr inbounds double, double* %a, i64 -1
186  %tmp1 = load double, double* %p1, align 2
187  %p2 = getelementptr inbounds double, double* %a, i64 -2
188  %tmp2 = load double, double* %p2, align 2
189  %tmp3 = fadd double %tmp1, %tmp2
190  ret double %tmp3
191}
192
193define <2 x double> @ldur_doublex2(<2 x double>* %a) {
194; CHECK-LABEL: ldur_doublex2
195; CHECK: ldp     q[[DST1:[0-9]+]], q[[DST2:[0-9]+]], [x0, #-32]
196; CHECK-NEXT: fadd    v{{[0-9]+}}.2d, v[[DST2]].2d, v[[DST1]].2d
197; CHECK-NEXT: ret
198  %p1 = getelementptr inbounds <2 x double>, <2 x double>* %a, i64 -1
199  %tmp1 = load <2 x double>, <2 x double>* %p1, align 2
200  %p2 = getelementptr inbounds <2 x double>, <2 x double>* %a, i64 -2
201  %tmp2 = load <2 x double>, <2 x double>* %p2, align 2
202  %tmp3 = fadd <2 x double> %tmp1, %tmp2
203  ret <2 x double> %tmp3
204}
205
206; Now check some boundary conditions
207define i64 @pairUpBarelyIn(i64* %a) nounwind ssp {
208; CHECK-LABEL: pairUpBarelyIn
209; CHECK-NOT: ldur
210; CHECK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
211; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
212; CHECK-NEXT: ret
213  %p1 = getelementptr inbounds i64, i64* %a, i64 -31
214  %tmp1 = load i64, i64* %p1, align 2
215  %p2 = getelementptr inbounds i64, i64* %a, i64 -32
216  %tmp2 = load i64, i64* %p2, align 2
217  %tmp3 = add i64 %tmp1, %tmp2
218  ret i64 %tmp3
219}
220
221define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp {
222; CHECK-LABEL: pairUpBarelyInSext
223; CHECK-NOT: ldur
224; CHECK: ldpsw     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
225; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
226; CHECK-NEXT: ret
227  %p1 = getelementptr inbounds i32, i32* %a, i64 -63
228  %tmp1 = load i32, i32* %p1, align 2
229  %p2 = getelementptr inbounds i32, i32* %a, i64 -64
230  %tmp2 = load i32, i32* %p2, align 2
231  %sexttmp1 = sext i32 %tmp1 to i64
232  %sexttmp2 = sext i32 %tmp2 to i64
233  %tmp3 = add i64 %sexttmp1, %sexttmp2
234  ret i64 %tmp3
235}
236
237define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp {
238; CHECK-LABEL: pairUpBarelyInHalfSextRes0
239; CHECK-NOT: ldur
240; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
241; CHECK: sxtw     x[[DST1]], w[[DST1]]
242; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
243; CHECK-NEXT: ret
244  %p1 = getelementptr inbounds i32, i32* %a, i64 -63
245  %tmp1 = load i32, i32* %p1, align 2
246  %p2 = getelementptr inbounds i32, i32* %a, i64 -64
247  %tmp2 = load i32, i32* %p2, align 2
248  %sexttmp1 = zext i32 %tmp1 to i64
249  %sexttmp2 = sext i32 %tmp2 to i64
250  %tmp3 = add i64 %sexttmp1, %sexttmp2
251  ret i64 %tmp3
252}
253
254define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp {
255; CHECK-LABEL: pairUpBarelyInHalfSextRes1
256; CHECK-NOT: ldur
257; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
258; CHECK: sxtw     x[[DST2]], w[[DST2]]
259; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
260; CHECK-NEXT: ret
261  %p1 = getelementptr inbounds i32, i32* %a, i64 -63
262  %tmp1 = load i32, i32* %p1, align 2
263  %p2 = getelementptr inbounds i32, i32* %a, i64 -64
264  %tmp2 = load i32, i32* %p2, align 2
265  %sexttmp1 = sext i32 %tmp1 to i64
266  %sexttmp2 = zext i32 %tmp2 to i64
267  %tmp3 = add i64 %sexttmp1, %sexttmp2
268  ret i64 %tmp3
269}
270
271define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
272; CHECK-LABEL: pairUpBarelyOut
273; CHECK-NOT: ldp
274; Don't be fragile about which loads or manipulations of the base register
275; are used---just check that there isn't an ldp before the add
276; CHECK: add
277; CHECK-NEXT: ret
278  %p1 = getelementptr inbounds i64, i64* %a, i64 -32
279  %tmp1 = load i64, i64* %p1, align 2
280  %p2 = getelementptr inbounds i64, i64* %a, i64 -33
281  %tmp2 = load i64, i64* %p2, align 2
282  %tmp3 = add i64 %tmp1, %tmp2
283  ret i64 %tmp3
284}
285
286define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp {
287; CHECK-LABEL: pairUpBarelyOutSext
288; CHECK-NOT: ldp
289; Don't be fragile about which loads or manipulations of the base register
290; are used---just check that there isn't an ldp before the add
291; CHECK: add
292; CHECK-NEXT: ret
293  %p1 = getelementptr inbounds i32, i32* %a, i64 -64
294  %tmp1 = load i32, i32* %p1, align 2
295  %p2 = getelementptr inbounds i32, i32* %a, i64 -65
296  %tmp2 = load i32, i32* %p2, align 2
297  %sexttmp1 = sext i32 %tmp1 to i64
298  %sexttmp2 = sext i32 %tmp2 to i64
299  %tmp3 = add i64 %sexttmp1, %sexttmp2
300  ret i64 %tmp3
301}
302
303define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
304; CHECK-LABEL: pairUpNotAligned
305; CHECK-NOT: ldp
306; CHECK: ldur
307; CHECK-NEXT: ldur
308; CHECK-NEXT: add
309; CHECK-NEXT: ret
310  %p1 = getelementptr inbounds i64, i64* %a, i64 -18
311  %bp1 = bitcast i64* %p1 to i8*
312  %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
313  %dp1 = bitcast i8* %bp1p1 to i64*
314  %tmp1 = load i64, i64* %dp1, align 1
315
316  %p2 = getelementptr inbounds i64, i64* %a, i64 -17
317  %bp2 = bitcast i64* %p2 to i8*
318  %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
319  %dp2 = bitcast i8* %bp2p1 to i64*
320  %tmp2 = load i64, i64* %dp2, align 1
321
322  %tmp3 = add i64 %tmp1, %tmp2
323  ret i64 %tmp3
324}
325
326define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp {
327; CHECK-LABEL: pairUpNotAlignedSext
328; CHECK-NOT: ldp
329; CHECK: ldursw
330; CHECK-NEXT: ldursw
331; CHECK-NEXT: add
332; CHECK-NEXT: ret
333  %p1 = getelementptr inbounds i32, i32* %a, i64 -18
334  %bp1 = bitcast i32* %p1 to i8*
335  %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
336  %dp1 = bitcast i8* %bp1p1 to i32*
337  %tmp1 = load i32, i32* %dp1, align 1
338
339  %p2 = getelementptr inbounds i32, i32* %a, i64 -17
340  %bp2 = bitcast i32* %p2 to i8*
341  %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
342  %dp2 = bitcast i8* %bp2p1 to i32*
343  %tmp2 = load i32, i32* %dp2, align 1
344
345  %sexttmp1 = sext i32 %tmp1 to i64
346  %sexttmp2 = sext i32 %tmp2 to i64
347  %tmp3 = add i64 %sexttmp1, %sexttmp2
348 ret i64 %tmp3
349}
350
351declare void @use-ptr(i32*)
352
353; CHECK-LABEL: ldp_sext_int_pre
354; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}, #8]
355define i64 @ldp_sext_int_pre(i32* %p) nounwind {
356  %ptr = getelementptr inbounds i32, i32* %p, i64 2
357  call void @use-ptr(i32* %ptr)
358  %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 0
359  %tmp = load i32, i32* %add.ptr, align 4
360  %add.ptr1 = getelementptr inbounds i32, i32* %ptr, i64 1
361  %tmp1 = load i32, i32* %add.ptr1, align 4
362  %sexttmp = sext i32 %tmp to i64
363  %sexttmp1 = sext i32 %tmp1 to i64
364  %add = add nsw i64 %sexttmp1, %sexttmp
365  ret i64 %add
366}
367
368; CHECK-LABEL: ldp_sext_int_post
369; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x0], #8
370define i64 @ldp_sext_int_post(i32* %p) nounwind {
371  %tmp = load i32, i32* %p, align 4
372  %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
373  %tmp1 = load i32, i32* %add.ptr, align 4
374  %sexttmp = sext i32 %tmp to i64
375  %sexttmp1 = sext i32 %tmp1 to i64
376  %ptr = getelementptr inbounds i32, i32* %add.ptr, i64 1
377  call void @use-ptr(i32* %ptr)
378  %add = add nsw i64 %sexttmp1, %sexttmp
379  ret i64 %add
380}
381
382