1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O3 -verify-machineinstrs | FileCheck %s
3;
4; Test stack clash protection probing for static allocas.
5
6; Small: one probe.
7define i32 @fun0() #0 {
8; CHECK-LABEL: fun0:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    aghi %r15, -560
11; CHECK-NEXT:    .cfi_def_cfa_offset 720
12; CHECK-NEXT:    cg %r0, 552(%r15)
13; CHECK-NEXT:    mvhi 552(%r15), 1
14; CHECK-NEXT:    l %r2, 160(%r15)
15; CHECK-NEXT:    aghi %r15, 560
16; CHECK-NEXT:    br %r14
17
18  %a = alloca i32, i64 100
19  %b = getelementptr inbounds i32, i32* %a, i64 98
20  store volatile i32 1, i32* %b
21  %c = load volatile i32, i32* %a
22  ret i32 %c
23}
24
25; Medium: two probes.
26define i32 @fun1() #0 {
27; CHECK-LABEL: fun1:
28; CHECK:       # %bb.0:
29; CHECK-NEXT:    aghi %r15, -4096
30; CHECK-NEXT:    .cfi_def_cfa_offset 4256
31; CHECK-NEXT:    cg %r0, 4088(%r15)
32; CHECK-NEXT:    aghi %r15, -4080
33; CHECK-NEXT:    .cfi_def_cfa_offset 8336
34; CHECK-NEXT:    cg %r0, 4072(%r15)
35; CHECK-NEXT:    mvhi 976(%r15), 1
36; CHECK-NEXT:    l %r2, 176(%r15)
37; CHECK-NEXT:    aghi %r15, 8176
38; CHECK-NEXT:    br %r14
39
40  %a = alloca i32, i64 2000
41  %b = getelementptr inbounds i32, i32* %a, i64 200
42  store volatile i32 1, i32* %b
43  %c = load volatile i32, i32* %a
44  ret i32 %c
45}
46
47; Large: Use a loop to allocate and probe in steps.
48define i32 @fun2() #0 {
49; CHECK-LABEL: fun2:
50; CHECK:       # %bb.0:
51; CHECK-NEXT:    lgr %r0, %r15
52; CHECK-NEXT:    .cfi_def_cfa_register %r0
53; CHECK-NEXT:    agfi %r0, -69632
54; CHECK-NEXT:    .cfi_def_cfa_offset 69792
55; CHECK-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
56; CHECK-NEXT:    aghi %r15, -4096
57; CHECK-NEXT:    cg %r0, 4088(%r15)
58; CHECK-NEXT:    clgrjh %r15, %r0, .LBB2_1
59; CHECK-NEXT:  # %bb.2:
60; CHECK-NEXT:    .cfi_def_cfa_register %r15
61; CHECK-NEXT:    aghi %r15, -2544
62; CHECK-NEXT:    .cfi_def_cfa_offset 72336
63; CHECK-NEXT:    cg %r0, 2536(%r15)
64; CHECK-NEXT:    lhi %r0, 1
65; CHECK-NEXT:    mvhi 568(%r15), 1
66; CHECK-NEXT:    sty %r0, 28968(%r15)
67; CHECK-NEXT:    l %r2, 176(%r15)
68; CHECK-NEXT:    agfi %r15, 72176
69; CHECK-NEXT:    br %r14
70
71  %a = alloca i32, i64 18000
72  %b0 = getelementptr inbounds i32, i32* %a, i64 98
73  %b1 = getelementptr inbounds i32, i32* %a, i64 7198
74  store volatile i32 1, i32* %b0
75  store volatile i32 1, i32* %b1
76  %c = load volatile i32, i32* %a
77  ret i32 %c
78}
79
80; Ends evenly on the step so no remainder needed.
81define void @fun3() #0 {
82; CHECK-LABEL: fun3:
83; CHECK:       # %bb.0: # %entry
84; CHECK-NEXT:    lgr %r0, %r15
85; CHECK-NEXT:    .cfi_def_cfa_register %r0
86; CHECK-NEXT:    aghi %r0, -28672
87; CHECK-NEXT:    .cfi_def_cfa_offset 28832
88; CHECK-NEXT:  .LBB3_1: # %entry
89; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
90; CHECK-NEXT:    aghi %r15, -4096
91; CHECK-NEXT:    cg %r0, 4088(%r15)
92; CHECK-NEXT:    clgrjh %r15, %r0, .LBB3_1
93; CHECK-NEXT:  # %bb.2: # %entry
94; CHECK-NEXT:    .cfi_def_cfa_register %r15
95; CHECK-NEXT:    mvhi 180(%r15), 0
96; CHECK-NEXT:    l %r0, 180(%r15)
97; CHECK-NEXT:    aghi %r15, 28672
98; CHECK-NEXT:    br %r14
99entry:
100  %stack = alloca [7122 x i32], align 4
101  %i = alloca i32, align 4
102  %0 = bitcast [7122 x i32]* %stack to i8*
103  %i.0.i.0..sroa_cast = bitcast i32* %i to i8*
104  store volatile i32 0, i32* %i, align 4
105  %i.0.i.0.6 = load volatile i32, i32* %i, align 4
106  ret void
107}
108
109; Loop with bigger step.
110define void @fun4() #0 "stack-probe-size"="8192" {
111; CHECK-LABEL: fun4:
112; CHECK:       # %bb.0: # %entry
113; CHECK-NEXT:    lgr %r0, %r15
114; CHECK-NEXT:    .cfi_def_cfa_register %r0
115; CHECK-NEXT:    aghi %r0, -24576
116; CHECK-NEXT:    .cfi_def_cfa_offset 24736
117; CHECK-NEXT:  .LBB4_1: # %entry
118; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
119; CHECK-NEXT:    aghi %r15, -8192
120; CHECK-NEXT:    cg %r0, 8184(%r15)
121; CHECK-NEXT:    clgrjh %r15, %r0, .LBB4_1
122; CHECK-NEXT:  # %bb.2: # %entry
123; CHECK-NEXT:    .cfi_def_cfa_register %r15
124; CHECK-NEXT:    aghi %r15, -7608
125; CHECK-NEXT:    .cfi_def_cfa_offset 32344
126; CHECK-NEXT:    cg %r0, 7600(%r15)
127; CHECK-NEXT:    mvhi 180(%r15), 0
128; CHECK-NEXT:    l %r0, 180(%r15)
129; CHECK-NEXT:    aghi %r15, 32184
130; CHECK-NEXT:    br %r14
131entry:
132  %stack = alloca [8000 x i32], align 4
133  %i = alloca i32, align 4
134  %0 = bitcast [8000 x i32]* %stack to i8*
135  %i.0.i.0..sroa_cast = bitcast i32* %i to i8*
136  store volatile i32 0, i32* %i, align 4
137  %i.0.i.0.6 = load volatile i32, i32* %i, align 4
138  ret void
139}
140
141; Probe size should be modulo stack alignment.
142define void @fun5() #0 "stack-probe-size"="4100" {
143; CHECK-LABEL: fun5:
144; CHECK:       # %bb.0: # %entry
145; CHECK-NEXT:    aghi %r15, -4096
146; CHECK-NEXT:    .cfi_def_cfa_offset 4256
147; CHECK-NEXT:    cg %r0, 4088(%r15)
148; CHECK-NEXT:    aghi %r15, -88
149; CHECK-NEXT:    .cfi_def_cfa_offset 4344
150; CHECK-NEXT:    cg %r0, 80(%r15)
151; CHECK-NEXT:    mvhi 180(%r15), 0
152; CHECK-NEXT:    l %r0, 180(%r15)
153; CHECK-NEXT:    aghi %r15, 4184
154; CHECK-NEXT:    br %r14
155entry:
156  %stack = alloca [1000 x i32], align 4
157  %i = alloca i32, align 4
158  %0 = bitcast [1000 x i32]* %stack to i8*
159  %i.0.i.0..sroa_cast = bitcast i32* %i to i8*
160  store volatile i32 0, i32* %i, align 4
161  %i.0.i.0.6 = load volatile i32, i32* %i, align 4
162  ret void
163}
164
165; The minimum probe size is the stack alignment.
166define void @fun6() #0 "stack-probe-size"="5" {
167; CHECK-LABEL: fun6:
168; CHECK:       # %bb.0: # %entry
169; CHECK-NEXT:    lgr %r0, %r15
170; CHECK-NEXT:    .cfi_def_cfa_register %r0
171; CHECK-NEXT:    aghi %r0, -4184
172; CHECK-NEXT:    .cfi_def_cfa_offset 4344
173; CHECK-NEXT:  .LBB6_1: # %entry
174; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
175; CHECK-NEXT:    aghi %r15, -8
176; CHECK-NEXT:    cg %r0, 0(%r15)
177; CHECK-NEXT:    clgrjh %r15, %r0, .LBB6_1
178; CHECK-NEXT:  # %bb.2: # %entry
179; CHECK-NEXT:    .cfi_def_cfa_register %r15
180; CHECK-NEXT:    mvhi 180(%r15), 0
181; CHECK-NEXT:    l %r0, 180(%r15)
182; CHECK-NEXT:    aghi %r15, 4184
183; CHECK-NEXT:    br %r14
184entry:
185  %stack = alloca [1000 x i32], align 4
186  %i = alloca i32, align 4
187  %0 = bitcast [1000 x i32]* %stack to i8*
188  %i.0.i.0..sroa_cast = bitcast i32* %i to i8*
189  store volatile i32 0, i32* %i, align 4
190  %i.0.i.0.6 = load volatile i32, i32* %i, align 4
191  ret void
192}
193
194; Small with a natural probe (STMG) - needs no extra probe.
195define i32 @fun7() #0 {
196; CHECK-LABEL: fun7:
197; CHECK:       # %bb.0:
198; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
199; CHECK-NEXT:    .cfi_offset %r14, -48
200; CHECK-NEXT:    .cfi_offset %r15, -40
201; CHECK-NEXT:    aghi %r15, -3976
202; CHECK-NEXT:    .cfi_def_cfa_offset 4136
203; CHECK-NEXT:    brasl %r14, foo@PLT
204; CHECK-NEXT:    st %r2, 568(%r15)
205; CHECK-NEXT:    l %r2, 176(%r15)
206; CHECK-NEXT:    lmg %r14, %r15, 4088(%r15)
207; CHECK-NEXT:    br %r14
208  %v = call i32 @foo()
209  %a = alloca i32, i64 950
210  %b = getelementptr inbounds i32, i32* %a, i64 98
211  store volatile i32 %v, i32* %b
212  %c = load volatile i32, i32* %a
213  ret i32 %c
214}
215
216; Medium with an STMG - still needs probing.
217define i32 @fun8() #0 {
218; CHECK-LABEL: fun8:
219; CHECK:       # %bb.0:
220; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
221; CHECK-NEXT:    .cfi_offset %r14, -48
222; CHECK-NEXT:    .cfi_offset %r15, -40
223; CHECK-NEXT:    aghi %r15, -3984
224; CHECK-NEXT:    .cfi_def_cfa_offset 4144
225; CHECK-NEXT:    cg %r0, 3976(%r15)
226; CHECK-NEXT:    brasl %r14, foo@PLT
227; CHECK-NEXT:    st %r2, 976(%r15)
228; CHECK-NEXT:    l %r2, 176(%r15)
229; CHECK-NEXT:    lmg %r14, %r15, 4096(%r15)
230; CHECK-NEXT:    br %r14
231
232  %v = call i32 @foo()
233  %a = alloca i32, i64 952
234  %b = getelementptr inbounds i32, i32* %a, i64 200
235  store volatile i32 %v, i32* %b
236  %c = load volatile i32, i32* %a
237  ret i32 %c
238}
239
240define void @fun9() #0 "backchain" {
241; CHECK-LABEL: fun9:
242; CHECK:       # %bb.0: # %entry
243; CHECK-NEXT:    lgr %r1, %r15
244; CHECK-NEXT:    lgr %r0, %r15
245; CHECK-NEXT:    .cfi_def_cfa_register %r0
246; CHECK-NEXT:    aghi %r0, -28672
247; CHECK-NEXT:    .cfi_def_cfa_offset 28832
248; CHECK-NEXT:  .LBB9_1: # %entry
249; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
250; CHECK-NEXT:    aghi %r15, -4096
251; CHECK-NEXT:    cg %r0, 4088(%r15)
252; CHECK-NEXT:    clgrjh %r15, %r0, .LBB9_1
253; CHECK-NEXT:  # %bb.2: # %entry
254; CHECK-NEXT:    .cfi_def_cfa_register %r15
255; CHECK-NEXT:    stg %r1, 0(%r15)
256; CHECK-NEXT:    mvhi 180(%r15), 0
257; CHECK-NEXT:    l %r0, 180(%r15)
258; CHECK-NEXT:    aghi %r15, 28672
259; CHECK-NEXT:    br %r14
260entry:
261  %stack = alloca [7122 x i32], align 4
262  %i = alloca i32, align 4
263  %0 = bitcast [7122 x i32]* %stack to i8*
264  %i.0.i.0..sroa_cast = bitcast i32* %i to i8*
265  store volatile i32 0, i32* %i, align 4
266  %i.0.i.0.6 = load volatile i32, i32* %i, align 4
267  ret void
268}
269
270
271declare i32 @foo()
272attributes #0 = {  "probe-stack"="inline-asm"  }
273
274