1; Test the handling of base + index + 12-bit displacement addresses for
2; large frames, in cases where no 20-bit form exists.  The tests here
3; assume z10 register pressure, without the high words being available.
4;
5; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
6; RUN:   FileCheck -check-prefix=CHECK-NOFP %s
7; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | \
8; RUN:   FileCheck -check-prefix=CHECK-FP %s
9
10declare void @foo(float *%ptr1, float *%ptr2)
11
12; This file tests what happens when a displacement is converted from
13; being relative to the start of a frame object to being relative to
14; the frame itself.  In some cases the test is only possible if two
15; objects are allocated.
16;
17; Rather than rely on a particular order for those objects, the tests
18; instead allocate two objects of the same size and apply the test to
19; both of them.  For consistency, all tests follow this model, even if
20; one object would actually be enough.
21
22; First check the highest in-range offset after conversion, which is 4092
23; for word-addressing instructions like LDEB.
24;
25; The last in-range doubleword offset is 4088.  Since the frame has two
26; emergency spill slots at 160(%r15), the amount that we need to allocate
27; in order to put another object at offset 4088 is (4088 - 176) / 4 = 978
28; words.
29define void @f1(double *%dst) {
30; CHECK-NOFP-LABEL: f1:
31; CHECK-NOFP: ldeb {{%f[0-7]}}, 4092(%r15)
32; CHECK-NOFP: br %r14
33;
34; CHECK-FP-LABEL: f1:
35; CHECK-FP: ldeb {{%f[0-7]}}, 4092(%r11)
36; CHECK-FP: br %r14
37  %region1 = alloca [978 x float], align 8
38  %region2 = alloca [978 x float], align 8
39  %start1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 0
40  %start2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 0
41  call void @foo(float *%start1, float *%start2)
42  %ptr1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 1
43  %ptr2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 1
44  %float1 = load float *%ptr1
45  %float2 = load float *%ptr2
46  %double1 = fpext float %float1 to double
47  %double2 = fpext float %float2 to double
48  store volatile double %double1, double *%dst
49  store volatile double %double2, double *%dst
50  ret void
51}
52
53; Test the first out-of-range offset.
54define void @f2(double *%dst) {
55; CHECK-NOFP-LABEL: f2:
56; CHECK-NOFP: lghi %r1, 4096
57; CHECK-NOFP: ldeb {{%f[0-7]}}, 0(%r1,%r15)
58; CHECK-NOFP: br %r14
59;
60; CHECK-FP-LABEL: f2:
61; CHECK-FP: lghi %r1, 4096
62; CHECK-FP: ldeb {{%f[0-7]}}, 0(%r1,%r11)
63; CHECK-FP: br %r14
64  %region1 = alloca [978 x float], align 8
65  %region2 = alloca [978 x float], align 8
66  %start1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 0
67  %start2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 0
68  call void @foo(float *%start1, float *%start2)
69  %ptr1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 2
70  %ptr2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 2
71  %float1 = load float *%ptr1
72  %float2 = load float *%ptr2
73  %double1 = fpext float %float1 to double
74  %double2 = fpext float %float2 to double
75  store volatile double %double1, double *%dst
76  store volatile double %double2, double *%dst
77  ret void
78}
79
80; Test the next offset after that.
81define void @f3(double *%dst) {
82; CHECK-NOFP-LABEL: f3:
83; CHECK-NOFP: lghi %r1, 4096
84; CHECK-NOFP: ldeb {{%f[0-7]}}, 4(%r1,%r15)
85; CHECK-NOFP: br %r14
86;
87; CHECK-FP-LABEL: f3:
88; CHECK-FP: lghi %r1, 4096
89; CHECK-FP: ldeb {{%f[0-7]}}, 4(%r1,%r11)
90; CHECK-FP: br %r14
91  %region1 = alloca [978 x float], align 8
92  %region2 = alloca [978 x float], align 8
93  %start1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 0
94  %start2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 0
95  call void @foo(float *%start1, float *%start2)
96  %ptr1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 3
97  %ptr2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 3
98  %float1 = load float *%ptr1
99  %float2 = load float *%ptr2
100  %double1 = fpext float %float1 to double
101  %double2 = fpext float %float2 to double
102  store volatile double %double1, double *%dst
103  store volatile double %double2, double *%dst
104  ret void
105}
106
107; Add 4096 bytes (1024 words) to the size of each object and repeat.
108define void @f4(double *%dst) {
109; CHECK-NOFP-LABEL: f4:
110; CHECK-NOFP: lghi %r1, 4096
111; CHECK-NOFP: ldeb {{%f[0-7]}}, 4092(%r1,%r15)
112; CHECK-NOFP: br %r14
113;
114; CHECK-FP-LABEL: f4:
115; CHECK-FP: lghi %r1, 4096
116; CHECK-FP: ldeb {{%f[0-7]}}, 4092(%r1,%r11)
117; CHECK-FP: br %r14
118  %region1 = alloca [2002 x float], align 8
119  %region2 = alloca [2002 x float], align 8
120  %start1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 0
121  %start2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 0
122  call void @foo(float *%start1, float *%start2)
123  %ptr1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 1
124  %ptr2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 1
125  %float1 = load float *%ptr1
126  %float2 = load float *%ptr2
127  %double1 = fpext float %float1 to double
128  %double2 = fpext float %float2 to double
129  store volatile double %double1, double *%dst
130  store volatile double %double2, double *%dst
131  ret void
132}
133
134; ...as above.
135define void @f5(double *%dst) {
136; CHECK-NOFP-LABEL: f5:
137; CHECK-NOFP: lghi %r1, 8192
138; CHECK-NOFP: ldeb {{%f[0-7]}}, 0(%r1,%r15)
139; CHECK-NOFP: br %r14
140;
141; CHECK-FP-LABEL: f5:
142; CHECK-FP: lghi %r1, 8192
143; CHECK-FP: ldeb {{%f[0-7]}}, 0(%r1,%r11)
144; CHECK-FP: br %r14
145  %region1 = alloca [2002 x float], align 8
146  %region2 = alloca [2002 x float], align 8
147  %start1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 0
148  %start2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 0
149  call void @foo(float *%start1, float *%start2)
150  %ptr1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 2
151  %ptr2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 2
152  %float1 = load float *%ptr1
153  %float2 = load float *%ptr2
154  %double1 = fpext float %float1 to double
155  %double2 = fpext float %float2 to double
156  store volatile double %double1, double *%dst
157  store volatile double %double2, double *%dst
158  ret void
159}
160
161; ...as above.
162define void @f6(double *%dst) {
163; CHECK-NOFP-LABEL: f6:
164; CHECK-NOFP: lghi %r1, 8192
165; CHECK-NOFP: ldeb {{%f[0-7]}}, 4(%r1,%r15)
166; CHECK-NOFP: br %r14
167;
168; CHECK-FP-LABEL: f6:
169; CHECK-FP: lghi %r1, 8192
170; CHECK-FP: ldeb {{%f[0-7]}}, 4(%r1,%r11)
171; CHECK-FP: br %r14
172  %region1 = alloca [2002 x float], align 8
173  %region2 = alloca [2002 x float], align 8
174  %start1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 0
175  %start2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 0
176  call void @foo(float *%start1, float *%start2)
177  %ptr1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 3
178  %ptr2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 3
179  %float1 = load float *%ptr1
180  %float2 = load float *%ptr2
181  %double1 = fpext float %float1 to double
182  %double2 = fpext float %float2 to double
183  store volatile double %double1, double *%dst
184  store volatile double %double2, double *%dst
185  ret void
186}
187
188; Now try an offset of 4092 from the start of the object, with the object
189; being at offset 8192.  This time we need objects of (8192 - 168) / 4 = 2004
190; words.
191define void @f7(double *%dst) {
192; CHECK-NOFP-LABEL: f7:
193; CHECK-NOFP: lghi %r1, 8192
194; CHECK-NOFP: ldeb {{%f[0-7]}}, 4092(%r1,%r15)
195; CHECK-NOFP: br %r14
196;
197; CHECK-FP-LABEL: f7:
198; CHECK-FP: lghi %r1, 8192
199; CHECK-FP: ldeb {{%f[0-7]}}, 4092(%r1,%r11)
200; CHECK-FP: br %r14
201  %region1 = alloca [2004 x float], align 8
202  %region2 = alloca [2004 x float], align 8
203  %start1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 0
204  %start2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 0
205  call void @foo(float *%start1, float *%start2)
206  %ptr1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 1023
207  %ptr2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 1023
208  %float1 = load float *%ptr1
209  %float2 = load float *%ptr2
210  %double1 = fpext float %float1 to double
211  %double2 = fpext float %float2 to double
212  store volatile double %double1, double *%dst
213  store volatile double %double2, double *%dst
214  ret void
215}
216
217; Keep the object-relative offset the same but bump the size of the
218; objects by one doubleword.
219define void @f8(double *%dst) {
220; CHECK-NOFP-LABEL: f8:
221; CHECK-NOFP: lghi %r1, 12288
222; CHECK-NOFP: ldeb {{%f[0-7]}}, 4(%r1,%r15)
223; CHECK-NOFP: br %r14
224;
225; CHECK-FP-LABEL: f8:
226; CHECK-FP: lghi %r1, 12288
227; CHECK-FP: ldeb {{%f[0-7]}}, 4(%r1,%r11)
228; CHECK-FP: br %r14
229  %region1 = alloca [2006 x float], align 8
230  %region2 = alloca [2006 x float], align 8
231  %start1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 0
232  %start2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 0
233  call void @foo(float *%start1, float *%start2)
234  %ptr1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 1023
235  %ptr2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 1023
236  %float1 = load float *%ptr1
237  %float2 = load float *%ptr2
238  %double1 = fpext float %float1 to double
239  %double2 = fpext float %float2 to double
240  store volatile double %double1, double *%dst
241  store volatile double %double2, double *%dst
242  ret void
243}
244
245; Check a case where the original displacement is out of range.  The backend
246; should force an LAY from the outset.  We don't yet do any kind of anchor
247; optimization, so there should be no offset on the LDEB itself.
248define void @f9(double *%dst) {
249; CHECK-NOFP-LABEL: f9:
250; CHECK-NOFP: lay %r1, 12296(%r15)
251; CHECK-NOFP: ldeb {{%f[0-7]}}, 0(%r1)
252; CHECK-NOFP: br %r14
253;
254; CHECK-FP-LABEL: f9:
255; CHECK-FP: lay %r1, 12296(%r11)
256; CHECK-FP: ldeb {{%f[0-7]}}, 0(%r1)
257; CHECK-FP: br %r14
258  %region1 = alloca [2006 x float], align 8
259  %region2 = alloca [2006 x float], align 8
260  %start1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 0
261  %start2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 0
262  call void @foo(float *%start1, float *%start2)
263  %ptr1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 1024
264  %ptr2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 1024
265  %float1 = load float *%ptr1
266  %float2 = load float *%ptr2
267  %double1 = fpext float %float1 to double
268  %double2 = fpext float %float2 to double
269  store volatile double %double1, double *%dst
270  store volatile double %double2, double *%dst
271  ret void
272}
273
274; Repeat f2 in a case that needs the emergency spill slots, because all
275; call-clobbered and allocated call-saved registers are live.  Note that
276; %vptr and %dst are copied to call-saved registers, freeing up %r2 and
277; %r3 during the main test.
278define void @f10(i32 *%vptr, double *%dst) {
279; CHECK-NOFP-LABEL: f10:
280; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15)
281; CHECK-NOFP: lghi [[REGISTER]], 4096
282; CHECK-NOFP: ldeb {{%f[0-7]}}, 0([[REGISTER]],%r15)
283; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
284; CHECK-NOFP: br %r14
285;
286; CHECK-FP-LABEL: f10:
287; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11)
288; CHECK-FP: lghi [[REGISTER]], 4096
289; CHECK-FP: ldeb {{%f[0-7]}}, 0([[REGISTER]],%r11)
290; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
291; CHECK-FP: br %r14
292  %region1 = alloca [978 x float], align 8
293  %region2 = alloca [978 x float], align 8
294  %start1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 0
295  %start2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 0
296  call void @foo(float *%start1, float *%start2)
297  %ptr1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 2
298  %ptr2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 2
299  %i0 = load volatile i32 *%vptr
300  %i1 = load volatile i32 *%vptr
301  %i2 = load volatile i32 *%vptr
302  %i3 = load volatile i32 *%vptr
303  %i4 = load volatile i32 *%vptr
304  %i5 = load volatile i32 *%vptr
305  %i14 = load volatile i32 *%vptr
306  %float1 = load float *%ptr1
307  %float2 = load float *%ptr2
308  %double1 = fpext float %float1 to double
309  %double2 = fpext float %float2 to double
310  store volatile double %double1, double *%dst
311  store volatile double %double2, double *%dst
312  store volatile i32 %i0, i32 *%vptr
313  store volatile i32 %i1, i32 *%vptr
314  store volatile i32 %i2, i32 *%vptr
315  store volatile i32 %i3, i32 *%vptr
316  store volatile i32 %i4, i32 *%vptr
317  store volatile i32 %i5, i32 *%vptr
318  store volatile i32 %i14, i32 *%vptr
319  ret void
320}
321
322; Repeat f2 in a case where the index register is already occupied.
323define void @f11(double *%dst, i64 %index) {
324; CHECK-NOFP-LABEL: f11:
325; CHECK-NOFP: lgr [[REGISTER:%r[1-9][0-5]?]], %r3
326; CHECK-NOFP: lay %r1, 4096(%r15)
327; CHECK-NOFP: ldeb {{%f[0-7]}}, 0([[REGISTER]],%r1)
328; CHECK-NOFP: br %r14
329;
330; CHECK-FP-LABEL: f11:
331; CHECK-FP: lgr [[REGISTER:%r[1-9][0-5]?]], %r3
332; CHECK-FP: lay %r1, 4096(%r11)
333; CHECK-FP: ldeb {{%f[0-7]}}, 0([[REGISTER]],%r1)
334; CHECK-FP: br %r14
335  %region1 = alloca [978 x float], align 8
336  %region2 = alloca [978 x float], align 8
337  %start1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 0
338  %start2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 0
339  call void @foo(float *%start1, float *%start2)
340  %elem1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 2
341  %elem2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 2
342  %base1 = ptrtoint float *%elem1 to i64
343  %base2 = ptrtoint float *%elem2 to i64
344  %addr1 = add i64 %base1, %index
345  %addr2 = add i64 %base2, %index
346  %ptr1 = inttoptr i64 %addr1 to float *
347  %ptr2 = inttoptr i64 %addr2 to float *
348  %float1 = load float *%ptr1
349  %float2 = load float *%ptr2
350  %double1 = fpext float %float1 to double
351  %double2 = fpext float %float2 to double
352  store volatile double %double1, double *%dst
353  store volatile double %double2, double *%dst
354  ret void
355}
356