1; Test 32-bit addition in which the second operand is variable.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
4
5declare i32 @foo()
6
7; Check ALR.
8define zeroext i1 @f1(i32 %dummy, i32 %a, i32 %b, i32 *%res) {
9; CHECK-LABEL: f1:
10; CHECK: alr %r3, %r4
11; CHECK-DAG: st %r3, 0(%r5)
12; CHECK-DAG: ipm [[REG:%r[0-5]]]
13; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35
14; CHECK: br %r14
15  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
16  %val = extractvalue {i32, i1} %t, 0
17  %obit = extractvalue {i32, i1} %t, 1
18  store i32 %val, i32 *%res
19  ret i1 %obit
20}
21
22; Check using the overflow result for a branch.
23define void @f2(i32 %dummy, i32 %a, i32 %b, i32 *%res) {
24; CHECK-LABEL: f2:
25; CHECK: alr %r3, %r4
26; CHECK: st %r3, 0(%r5)
27; CHECK: jgnle foo@PLT
28; CHECK: br %r14
29  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
30  %val = extractvalue {i32, i1} %t, 0
31  %obit = extractvalue {i32, i1} %t, 1
32  store i32 %val, i32 *%res
33  br i1 %obit, label %call, label %exit
34
35call:
36  tail call i32 @foo()
37  br label %exit
38
39exit:
40  ret void
41}
42
43; ... and the same with the inverted direction.
44define void @f3(i32 %dummy, i32 %a, i32 %b, i32 *%res) {
45; CHECK-LABEL: f3:
46; CHECK: alr %r3, %r4
47; CHECK: st %r3, 0(%r5)
48; CHECK: jgle foo@PLT
49; CHECK: br %r14
50  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
51  %val = extractvalue {i32, i1} %t, 0
52  %obit = extractvalue {i32, i1} %t, 1
53  store i32 %val, i32 *%res
54  br i1 %obit, label %exit, label %call
55
56call:
57  tail call i32 @foo()
58  br label %exit
59
60exit:
61  ret void
62}
63
64; Check the low end of the AL range.
65define zeroext i1 @f4(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
66; CHECK-LABEL: f4:
67; CHECK: al %r3, 0(%r4)
68; CHECK-DAG: st %r3, 0(%r5)
69; CHECK-DAG: ipm [[REG:%r[0-5]]]
70; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35
71; CHECK: br %r14
72  %b = load i32, i32 *%src
73  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
74  %val = extractvalue {i32, i1} %t, 0
75  %obit = extractvalue {i32, i1} %t, 1
76  store i32 %val, i32 *%res
77  ret i1 %obit
78}
79
80; Check the high end of the aligned AL range.
81define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
82; CHECK-LABEL: f5:
83; CHECK: al %r3, 4092(%r4)
84; CHECK-DAG: st %r3, 0(%r5)
85; CHECK-DAG: ipm [[REG:%r[0-5]]]
86; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35
87; CHECK: br %r14
88  %ptr = getelementptr i32, i32 *%src, i64 1023
89  %b = load i32, i32 *%ptr
90  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
91  %val = extractvalue {i32, i1} %t, 0
92  %obit = extractvalue {i32, i1} %t, 1
93  store i32 %val, i32 *%res
94  ret i1 %obit
95}
96
97; Check the next word up, which should use ALY instead of AL.
98define zeroext i1 @f6(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
99; CHECK-LABEL: f6:
100; CHECK: aly %r3, 4096(%r4)
101; CHECK-DAG: st %r3, 0(%r5)
102; CHECK-DAG: ipm [[REG:%r[0-5]]]
103; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35
104; CHECK: br %r14
105  %ptr = getelementptr i32, i32 *%src, i64 1024
106  %b = load i32, i32 *%ptr
107  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
108  %val = extractvalue {i32, i1} %t, 0
109  %obit = extractvalue {i32, i1} %t, 1
110  store i32 %val, i32 *%res
111  ret i1 %obit
112}
113
114; Check the high end of the aligned ALY range.
115define zeroext i1 @f7(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
116; CHECK-LABEL: f7:
117; CHECK: aly %r3, 524284(%r4)
118; CHECK-DAG: st %r3, 0(%r5)
119; CHECK-DAG: ipm [[REG:%r[0-5]]]
120; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35
121; CHECK: br %r14
122  %ptr = getelementptr i32, i32 *%src, i64 131071
123  %b = load i32, i32 *%ptr
124  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
125  %val = extractvalue {i32, i1} %t, 0
126  %obit = extractvalue {i32, i1} %t, 1
127  store i32 %val, i32 *%res
128  ret i1 %obit
129}
130
131; Check the next word up, which needs separate address logic.
132; Other sequences besides this one would be OK.
133define zeroext i1 @f8(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
134; CHECK-LABEL: f8:
135; CHECK: agfi %r4, 524288
136; CHECK: al %r3, 0(%r4)
137; CHECK-DAG: st %r3, 0(%r5)
138; CHECK-DAG: ipm [[REG:%r[0-5]]]
139; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35
140; CHECK: br %r14
141  %ptr = getelementptr i32, i32 *%src, i64 131072
142  %b = load i32, i32 *%ptr
143  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
144  %val = extractvalue {i32, i1} %t, 0
145  %obit = extractvalue {i32, i1} %t, 1
146  store i32 %val, i32 *%res
147  ret i1 %obit
148}
149
150; Check the high end of the negative aligned ALY range.
151define zeroext i1 @f9(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
152; CHECK-LABEL: f9:
153; CHECK: aly %r3, -4(%r4)
154; CHECK-DAG: st %r3, 0(%r5)
155; CHECK-DAG: ipm [[REG:%r[0-5]]]
156; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35
157; CHECK: br %r14
158  %ptr = getelementptr i32, i32 *%src, i64 -1
159  %b = load i32, i32 *%ptr
160  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
161  %val = extractvalue {i32, i1} %t, 0
162  %obit = extractvalue {i32, i1} %t, 1
163  store i32 %val, i32 *%res
164  ret i1 %obit
165}
166
167; Check the low end of the ALY range.
168define zeroext i1 @f10(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
169; CHECK-LABEL: f10:
170; CHECK: aly %r3, -524288(%r4)
171; CHECK-DAG: st %r3, 0(%r5)
172; CHECK-DAG: ipm [[REG:%r[0-5]]]
173; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35
174; CHECK: br %r14
175  %ptr = getelementptr i32, i32 *%src, i64 -131072
176  %b = load i32, i32 *%ptr
177  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
178  %val = extractvalue {i32, i1} %t, 0
179  %obit = extractvalue {i32, i1} %t, 1
180  store i32 %val, i32 *%res
181  ret i1 %obit
182}
183
184; Check the next word down, which needs separate address logic.
185; Other sequences besides this one would be OK.
186define zeroext i1 @f11(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
187; CHECK-LABEL: f11:
188; CHECK: agfi %r4, -524292
189; CHECK: al %r3, 0(%r4)
190; CHECK-DAG: st %r3, 0(%r5)
191; CHECK-DAG: ipm [[REG:%r[0-5]]]
192; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35
193; CHECK: br %r14
194  %ptr = getelementptr i32, i32 *%src, i64 -131073
195  %b = load i32, i32 *%ptr
196  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
197  %val = extractvalue {i32, i1} %t, 0
198  %obit = extractvalue {i32, i1} %t, 1
199  store i32 %val, i32 *%res
200  ret i1 %obit
201}
202
203; Check that AL allows an index.
204define zeroext i1 @f12(i64 %src, i64 %index, i32 %a, i32 *%res) {
205; CHECK-LABEL: f12:
206; CHECK: al %r4, 4092({{%r3,%r2|%r2,%r3}})
207; CHECK-DAG: st %r4, 0(%r5)
208; CHECK-DAG: ipm [[REG:%r[0-5]]]
209; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35
210; CHECK: br %r14
211  %add1 = add i64 %src, %index
212  %add2 = add i64 %add1, 4092
213  %ptr = inttoptr i64 %add2 to i32 *
214  %b = load i32, i32 *%ptr
215  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
216  %val = extractvalue {i32, i1} %t, 0
217  %obit = extractvalue {i32, i1} %t, 1
218  store i32 %val, i32 *%res
219  ret i1 %obit
220}
221
222; Check that ALY allows an index.
223define zeroext i1 @f13(i64 %src, i64 %index, i32 %a, i32 *%res) {
224; CHECK-LABEL: f13:
225; CHECK: aly %r4, 4096({{%r3,%r2|%r2,%r3}})
226; CHECK-DAG: st %r4, 0(%r5)
227; CHECK-DAG: ipm [[REG:%r[0-5]]]
228; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35
229; CHECK: br %r14
230  %add1 = add i64 %src, %index
231  %add2 = add i64 %add1, 4096
232  %ptr = inttoptr i64 %add2 to i32 *
233  %b = load i32, i32 *%ptr
234  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
235  %val = extractvalue {i32, i1} %t, 0
236  %obit = extractvalue {i32, i1} %t, 1
237  store i32 %val, i32 *%res
238  ret i1 %obit
239}
240
241; Check that additions of spilled values can use AL rather than ALR.
242define zeroext i1 @f14(i32 *%ptr0) {
243; CHECK-LABEL: f14:
244; CHECK: brasl %r14, foo@PLT
245; CHECK: al %r2, 16{{[04]}}(%r15)
246; CHECK: br %r14
247  %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
248  %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
249  %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
250  %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
251  %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
252  %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
253  %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
254  %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
255  %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
256
257  %val0 = load i32, i32 *%ptr0
258  %val1 = load i32, i32 *%ptr1
259  %val2 = load i32, i32 *%ptr2
260  %val3 = load i32, i32 *%ptr3
261  %val4 = load i32, i32 *%ptr4
262  %val5 = load i32, i32 *%ptr5
263  %val6 = load i32, i32 *%ptr6
264  %val7 = load i32, i32 *%ptr7
265  %val8 = load i32, i32 *%ptr8
266  %val9 = load i32, i32 *%ptr9
267
268  %ret = call i32 @foo()
269
270  %t0 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %ret, i32 %val0)
271  %add0 = extractvalue {i32, i1} %t0, 0
272  %obit0 = extractvalue {i32, i1} %t0, 1
273  %t1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add0, i32 %val1)
274  %add1 = extractvalue {i32, i1} %t1, 0
275  %obit1 = extractvalue {i32, i1} %t1, 1
276  %res1 = or i1 %obit0, %obit1
277  %t2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add1, i32 %val2)
278  %add2 = extractvalue {i32, i1} %t2, 0
279  %obit2 = extractvalue {i32, i1} %t2, 1
280  %res2 = or i1 %res1, %obit2
281  %t3 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add2, i32 %val3)
282  %add3 = extractvalue {i32, i1} %t3, 0
283  %obit3 = extractvalue {i32, i1} %t3, 1
284  %res3 = or i1 %res2, %obit3
285  %t4 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add3, i32 %val4)
286  %add4 = extractvalue {i32, i1} %t4, 0
287  %obit4 = extractvalue {i32, i1} %t4, 1
288  %res4 = or i1 %res3, %obit4
289  %t5 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add4, i32 %val5)
290  %add5 = extractvalue {i32, i1} %t5, 0
291  %obit5 = extractvalue {i32, i1} %t5, 1
292  %res5 = or i1 %res4, %obit5
293  %t6 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add5, i32 %val6)
294  %add6 = extractvalue {i32, i1} %t6, 0
295  %obit6 = extractvalue {i32, i1} %t6, 1
296  %res6 = or i1 %res5, %obit6
297  %t7 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add6, i32 %val7)
298  %add7 = extractvalue {i32, i1} %t7, 0
299  %obit7 = extractvalue {i32, i1} %t7, 1
300  %res7 = or i1 %res6, %obit7
301  %t8 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add7, i32 %val8)
302  %add8 = extractvalue {i32, i1} %t8, 0
303  %obit8 = extractvalue {i32, i1} %t8, 1
304  %res8 = or i1 %res7, %obit8
305  %t9 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add8, i32 %val9)
306  %add9 = extractvalue {i32, i1} %t9, 0
307  %obit9 = extractvalue {i32, i1} %t9, 1
308  %res9 = or i1 %res8, %obit9
309
310  ret i1 %res9
311}
312
313declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
314
315