1# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s
2
3# The tests below test the allocation of 128bit callee-saves
4# on the stack, specifically their offsets.
5
6# Padding of GPR64-registers is needed to ensure 16 byte alignment of
7# the stack pointer after the GPR64/FPR64 block (which is also needed
8# for the FPR128 saves when present).
9
10# This file also tests whether an emergency stack slot is allocated
11# when the stack frame is over a given size, caused by a series of
12# FPR128 saves. The alignment can leave a gap that can be scavenged
13# for stack slot scavenging, so it is important that the stack size
14# is properly estimated.
15
16
17--- |
18
19  ; ModuleID = '<stdin>'
20  source_filename = "<stdin>"
21  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
22  target triple = "aarch64-unknown-linux-gnu"
23
24  ; Function Attrs: nounwind
25  define aarch64_vector_pcs void @test_q10_q11_x19() nounwind { entry: unreachable }
26
27  ; Function Attrs: nounwind
28  define aarch64_vector_pcs void @test_q10_q11_x19_x20() nounwind { entry: unreachable }
29
30  ; Function Attrs: nounwind
31  define aarch64_vector_pcs void @test_q10_q11_x19_x20_x21() nounwind { entry: unreachable }
32
33  ; Function Attrs: nounwind
34  define aarch64_vector_pcs void @test_q8_to_q23_x19_to_x30() nounwind { entry: unreachable }
35
36  ; Function Attrs: nounwind
37  define aarch64_vector_pcs void @test_q8_to_q23_x19_to_x30_preinc() nounwind { entry: unreachable }
38
39...
40---
41name:            test_q10_q11_x19
42tracksRegLiveness: true
43body:             |
44  bb.0.entry:
45  $x19 = IMPLICIT_DEF
46  $q10 = IMPLICIT_DEF
47  $q11 = IMPLICIT_DEF
48
49  ; Check that the alignment gap for the 8-byte x19 is padded
50  ; with another 8 bytes. The CSR region will look like this:
51  ;    +-------------------+
52  ;    |/////padding///////|        (8 bytes)
53  ;    |       X19         |        (8 bytes)
54  ;    +-------------------+ <-  SP -16
55  ;    |     Q10, Q11      |        (32 bytes)
56  ;    +-------------------+ <-  SP -48
57
58  ; CHECK-LABEL: test_q10_q11_x19{{[[:space:]]}}
59  ; CHECK-DAG: $sp = frame-setup STPQpre killed $q11, killed $q10, $sp, -3 :: (store 16 into %stack.[[Q11:[0-9]+]]), (store 16 into %stack.[[Q10:[0-9]+]])
60  ; CHECK-DAG: - { id: [[Q11]], {{.*}}, offset: -48, size: 16, alignment: 16
61  ; CHECK-DAG: - { id: [[Q10]], {{.*}}, offset: -32, size: 16, alignment: 16
62  ; CHECK-DAG: frame-setup STRXui killed $x19, $sp, 4 :: (store 8 into %stack.[[X19:[0-9]+]])
63  ; CHECK-DAG: - { id: [[X19]], {{.*}}, offset: -16, size: 8, alignment: 16
64
65...
66---
67name:            test_q10_q11_x19_x20
68alignment:       4
69tracksRegLiveness: true
70body:             |
71  bb.0.entry:
72  $x19 = IMPLICIT_DEF
73  $x20 = IMPLICIT_DEF
74  $q10 = IMPLICIT_DEF
75  $q11 = IMPLICIT_DEF
76
77  ;    +-------------------+
78  ;    |     X19, X20      |        (16 bytes)
79  ;    +-------------------+ <-  SP -16
80  ;    |     Q10, Q11      |        (32 bytes)
81  ;    +-------------------+ <-  SP -48
82
83  ; CHECK-LABEL: test_q10_q11_x19_x20{{[[:space:]]}}
84  ; CHECK-DAG: $sp = frame-setup STPQpre killed $q11, killed $q10, $sp, -3 :: (store 16 into %stack.[[Q11:[0-9]+]]), (store 16 into %stack.[[Q10:[0-9]+]])
85  ; CHECK-DAG: frame-setup STPXi killed $x20, killed $x19, $sp, 4 :: (store 8 into %stack.[[X20:[0-9]+]]), (store 8 into %stack.[[X19:[0-9]+]])
86  ; CHECK-DAG: - { id: [[Q11]], {{.*}}, offset: -48, size: 16, alignment: 16
87  ; CHECK-DAG: - { id: [[Q10]], {{.*}}, offset: -32, size: 16, alignment: 16
88  ; CHECK-DAG: - { id: [[X20]], {{.*}}, offset: -16, size: 8, alignment: 8
89  ; CHECK-DAG: - { id: [[X19]], {{.*}}, offset:  -8, size: 8, alignment: 8
90
91...
92---
93name:            test_q10_q11_x19_x20_x21
94tracksRegLiveness: true
95body:             |
96  bb.0.entry:
97  $x19 = IMPLICIT_DEF
98  $x20 = IMPLICIT_DEF
99  $x21 = IMPLICIT_DEF
100  $q10 = IMPLICIT_DEF
101  $q11 = IMPLICIT_DEF
102
103  ; Check that the alignment gap is padded with another 8 bytes.
104  ; The CSR region will look like this:
105  ;    +-------------------+
106  ;    |     X19, X20      |        (16 bytes)
107  ;    +-------------------+ <-  SP -16
108  ;    |/////padding///////|        (8 bytes)
109  ;    |        X21        |        (8 bytes)
110  ;    +-------------------+ <-  SP -32
111  ;    |     Q10, Q11      |        (32 bytes)
112  ;    +-------------------+ <-  SP -64
113
114  ; CHECK-LABEL: test_q10_q11_x19_x20_x21
115  ; CHECK-DAG: $sp = frame-setup STPQpre killed $q11, killed $q10, $sp, -4 :: (store 16 into %stack.[[Q11:[0-9]+]]), (store 16 into %stack.[[Q10:[0-9]+]])
116  ; CHECK-DAG: frame-setup STRXui killed $x21, $sp, 4 :: (store 8 into %stack.[[X21:[0-9]+]])
117  ; CHECK-DAG: frame-setup STPXi killed $x20, killed $x19, $sp, 6
118  ; CHECK-DAG: - { id: [[Q11]], {{.*}}, offset: -64, size: 16, alignment: 16
119  ; CHECK-DAG: - { id: [[Q10]], {{.*}}, offset: -48, size: 16, alignment: 16
120  ; CHECK-DAG: - { id: [[X21]], {{.*}}, offset: -32, size: 8, alignment: 16
121
122...
123---
124name:            test_q8_to_q23_x19_to_x30
125tracksRegLiveness: true
126body:             |
127  bb.0.entry:
128  $x19 = IMPLICIT_DEF
129  $x20 = IMPLICIT_DEF
130  $x21 = IMPLICIT_DEF
131  $x22 = IMPLICIT_DEF
132  $x23 = IMPLICIT_DEF
133  $x24 = IMPLICIT_DEF
134  $x25 = IMPLICIT_DEF
135  $x26 = IMPLICIT_DEF
136  $x27 = IMPLICIT_DEF
137  $x28 = IMPLICIT_DEF
138  $fp = IMPLICIT_DEF
139  $lr = IMPLICIT_DEF
140  $q8 = IMPLICIT_DEF
141  $q9 = IMPLICIT_DEF
142  $q10 = IMPLICIT_DEF
143  $q11 = IMPLICIT_DEF
144  $q12 = IMPLICIT_DEF
145  $q13 = IMPLICIT_DEF
146  $q14 = IMPLICIT_DEF
147  $q15 = IMPLICIT_DEF
148  $q16 = IMPLICIT_DEF
149  $q17 = IMPLICIT_DEF
150  $q18 = IMPLICIT_DEF
151  $q19 = IMPLICIT_DEF
152  $q20 = IMPLICIT_DEF
153  $q21 = IMPLICIT_DEF
154  $q22 = IMPLICIT_DEF
155  $q23 = IMPLICIT_DEF
156
157  ; Test with more callee saves, which triggers 'BigStack' in
158  ; AArch64FrameLowering which in turn causes an emergency spill
159  ; slot to be allocated. The emergency spill slot is allocated
160  ; as close as possible to SP, so at SP + 0.
161  ;    +-------------------+
162  ;    |     X19..X30      |        (96 bytes)
163  ;    +-------------------+ <-  SP -96
164  ;    |      Q8..Q23      |        (256 bytes)
165  ;    +-------------------+ <-  SP -352
166  ;    |   emergency slot  |        (16 bytes)
167  ;    +-------------------+ <-  SP -368
168
169  ; CHECK-LABEL: test_q8_to_q23_x19_to_x30
170  ; CHECK: $sp = frame-setup SUBXri $sp, 368, 0
171  ; CHECK-NEXT: frame-setup STPQi killed $q23, killed $q22, $sp, 1 :: (store 16 into %stack.{{[0-9]+}}), (store 16 into %stack.{{[0-9]+}})
172  ; CHECK-NEXT: frame-setup STPQi killed $q21, killed $q20, $sp, 3
173  ; CHECK-NEXT: frame-setup STPQi killed $q19, killed $q18, $sp, 5
174  ; CHECK-NEXT: frame-setup STPQi killed $q17, killed $q16, $sp, 7
175  ; CHECK-NEXT: frame-setup STPQi killed $q15, killed $q14, $sp, 9
176  ; CHECK-NEXT: frame-setup STPQi killed $q13, killed $q12, $sp, 11
177  ; CHECK-NEXT: frame-setup STPQi killed $q11, killed $q10, $sp, 13
178  ; CHECK-NEXT: frame-setup STPQi killed $q9, killed $q8, $sp, 15
179  ; CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 34 :: (store 8 into %stack.{{[0-9]+}}), (store 8 into %stack.{{[0-9]+}})
180  ; CHECK-NEXT: frame-setup STPXi killed $x28, killed $x27, $sp, 36
181  ; CHECK-NEXT: frame-setup STPXi killed $x26, killed $x25, $sp, 38
182  ; CHECK-NEXT: frame-setup STPXi killed $x24, killed $x23, $sp, 40
183  ; CHECK-NEXT: frame-setup STPXi killed $x22, killed $x21, $sp, 42
184  ; CHECK-NEXT: frame-setup STPXi killed $x20, killed $x19, $sp, 44
185
186...
187---
188name:            test_q8_to_q23_x19_to_x30_preinc
189tracksRegLiveness: true
190stack:
191  - { id: 0, size: 160, alignment: 4, local-offset: 0 }
192constants:
193body:             |
194  bb.0.entry:
195  $x19 = IMPLICIT_DEF
196  $x20 = IMPLICIT_DEF
197  $x21 = IMPLICIT_DEF
198  $x22 = IMPLICIT_DEF
199  $x23 = IMPLICIT_DEF
200  $x24 = IMPLICIT_DEF
201  $x25 = IMPLICIT_DEF
202  $x26 = IMPLICIT_DEF
203  $x27 = IMPLICIT_DEF
204  $x28 = IMPLICIT_DEF
205  $fp = IMPLICIT_DEF
206  $lr = IMPLICIT_DEF
207  $q8 = IMPLICIT_DEF
208  $q9 = IMPLICIT_DEF
209  $q10 = IMPLICIT_DEF
210  $q11 = IMPLICIT_DEF
211  $q12 = IMPLICIT_DEF
212  $q13 = IMPLICIT_DEF
213  $q14 = IMPLICIT_DEF
214  $q15 = IMPLICIT_DEF
215  $q16 = IMPLICIT_DEF
216  $q17 = IMPLICIT_DEF
217  $q18 = IMPLICIT_DEF
218  $q19 = IMPLICIT_DEF
219  $q20 = IMPLICIT_DEF
220  $q21 = IMPLICIT_DEF
221  $q22 = IMPLICIT_DEF
222  $q23 = IMPLICIT_DEF
223
224  ; When the total stack size >= 512, it will use the pre-increment
225  ; rather than the 'sub sp, sp, <size>'.
226  ;    +-------------------+
227  ;    |     X19..X30      |        (96 bytes)
228  ;    +-------------------+ <-  SP -96
229  ;    |      Q8..Q23      |        (256 bytes)
230  ;    +-------------------+ <-  SP -352
231  ;    |       'obj'       |        (32 bytes)
232  ;    +-------------------+ <-  SP -384
233  ;    |   emergency slot  |        (16 bytes)
234  ;    +-------------------+ <-  SP -400
235
236  ; CHECK-LABEL: test_q8_to_q23_x19_to_x30_preinc
237  ; CHECK: $sp = frame-setup STPQpre killed $q23, killed $q22, $sp, -22 :: (store 16 into %stack.{{[0-9]+}}), (store 16 into %stack.{{[0-9]+}})
238  ; CHECK-NEXT: frame-setup STPQi killed $q21, killed $q20, $sp, 2
239  ; CHECK-NEXT: frame-setup STPQi killed $q19, killed $q18, $sp, 4
240  ; CHECK-NEXT: frame-setup STPQi killed $q17, killed $q16, $sp, 6
241  ; CHECK-NEXT: frame-setup STPQi killed $q15, killed $q14, $sp, 8
242  ; CHECK-NEXT: frame-setup STPQi killed $q13, killed $q12, $sp, 10
243  ; CHECK-NEXT: frame-setup STPQi killed $q11, killed $q10, $sp, 12
244  ; CHECK-NEXT: frame-setup STPQi killed $q9, killed $q8, $sp, 14
245  ; CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 32 :: (store 8 into %stack.{{[0-9]+}}), (store 8 into %stack.{{[0-9]+}})
246  ; CHECK-NEXT: frame-setup STPXi killed $x28, killed $x27, $sp, 34
247  ; CHECK-NEXT: frame-setup STPXi killed $x26, killed $x25, $sp, 36
248  ; CHECK-NEXT: frame-setup STPXi killed $x24, killed $x23, $sp, 38
249  ; CHECK-NEXT: frame-setup STPXi killed $x22, killed $x21, $sp, 40
250  ; CHECK-NEXT: frame-setup STPXi killed $x20, killed $x19, $sp, 42
251  ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 176, 0
252
253...
254