1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O3 -verify-machineinstrs | FileCheck %s 3; 4; Test stack clash protection probing for static allocas. 5 6; Small: one probe. 7define i32 @fun0() #0 { 8; CHECK-LABEL: fun0: 9; CHECK: # %bb.0: 10; CHECK-NEXT: aghi %r15, -560 11; CHECK-NEXT: .cfi_def_cfa_offset 720 12; CHECK-NEXT: cg %r0, 552(%r15) 13; CHECK-NEXT: mvhi 552(%r15), 1 14; CHECK-NEXT: l %r2, 160(%r15) 15; CHECK-NEXT: aghi %r15, 560 16; CHECK-NEXT: br %r14 17 18 %a = alloca i32, i64 100 19 %b = getelementptr inbounds i32, i32* %a, i64 98 20 store volatile i32 1, i32* %b 21 %c = load volatile i32, i32* %a 22 ret i32 %c 23} 24 25; Medium: two probes. 26define i32 @fun1() #0 { 27; CHECK-LABEL: fun1: 28; CHECK: # %bb.0: 29; CHECK-NEXT: aghi %r15, -4096 30; CHECK-NEXT: .cfi_def_cfa_offset 4256 31; CHECK-NEXT: cg %r0, 4088(%r15) 32; CHECK-NEXT: aghi %r15, -4080 33; CHECK-NEXT: .cfi_def_cfa_offset 8336 34; CHECK-NEXT: cg %r0, 4072(%r15) 35; CHECK-NEXT: mvhi 976(%r15), 1 36; CHECK-NEXT: l %r2, 176(%r15) 37; CHECK-NEXT: aghi %r15, 8176 38; CHECK-NEXT: br %r14 39 40 %a = alloca i32, i64 2000 41 %b = getelementptr inbounds i32, i32* %a, i64 200 42 store volatile i32 1, i32* %b 43 %c = load volatile i32, i32* %a 44 ret i32 %c 45} 46 47; Large: Use a loop to allocate and probe in steps. 48define i32 @fun2() #0 { 49; CHECK-LABEL: fun2: 50; CHECK: # %bb.0: 51; CHECK-NEXT: lgr %r0, %r15 52; CHECK-NEXT: .cfi_def_cfa_register %r0 53; CHECK-NEXT: agfi %r0, -69632 54; CHECK-NEXT: .cfi_def_cfa_offset 69792 55; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 56; CHECK-NEXT: aghi %r15, -4096 57; CHECK-NEXT: cg %r0, 4088(%r15) 58; CHECK-NEXT: clgrjh %r15, %r0, .LBB2_1 59; CHECK-NEXT: # %bb.2: 60; CHECK-NEXT: .cfi_def_cfa_register %r15 61; CHECK-NEXT: aghi %r15, -2544 62; CHECK-NEXT: .cfi_def_cfa_offset 72336 63; CHECK-NEXT: cg %r0, 2536(%r15) 64; CHECK-NEXT: lhi %r0, 1 65; CHECK-NEXT: mvhi 568(%r15), 1 66; CHECK-NEXT: sty %r0, 28968(%r15) 67; CHECK-NEXT: l %r2, 176(%r15) 68; CHECK-NEXT: agfi %r15, 72176 69; CHECK-NEXT: br %r14 70 71 %a = alloca i32, i64 18000 72 %b0 = getelementptr inbounds i32, i32* %a, i64 98 73 %b1 = getelementptr inbounds i32, i32* %a, i64 7198 74 store volatile i32 1, i32* %b0 75 store volatile i32 1, i32* %b1 76 %c = load volatile i32, i32* %a 77 ret i32 %c 78} 79 80; Ends evenly on the step so no remainder needed. 81define void @fun3() #0 { 82; CHECK-LABEL: fun3: 83; CHECK: # %bb.0: # %entry 84; CHECK-NEXT: lgr %r0, %r15 85; CHECK-NEXT: .cfi_def_cfa_register %r0 86; CHECK-NEXT: aghi %r0, -28672 87; CHECK-NEXT: .cfi_def_cfa_offset 28832 88; CHECK-NEXT: .LBB3_1: # %entry 89; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 90; CHECK-NEXT: aghi %r15, -4096 91; CHECK-NEXT: cg %r0, 4088(%r15) 92; CHECK-NEXT: clgrjh %r15, %r0, .LBB3_1 93; CHECK-NEXT: # %bb.2: # %entry 94; CHECK-NEXT: .cfi_def_cfa_register %r15 95; CHECK-NEXT: mvhi 180(%r15), 0 96; CHECK-NEXT: l %r0, 180(%r15) 97; CHECK-NEXT: aghi %r15, 28672 98; CHECK-NEXT: br %r14 99entry: 100 %stack = alloca [7122 x i32], align 4 101 %i = alloca i32, align 4 102 %0 = bitcast [7122 x i32]* %stack to i8* 103 %i.0.i.0..sroa_cast = bitcast i32* %i to i8* 104 store volatile i32 0, i32* %i, align 4 105 %i.0.i.0.6 = load volatile i32, i32* %i, align 4 106 ret void 107} 108 109; Loop with bigger step. 110define void @fun4() #0 "stack-probe-size"="8192" { 111; CHECK-LABEL: fun4: 112; CHECK: # %bb.0: # %entry 113; CHECK-NEXT: lgr %r0, %r15 114; CHECK-NEXT: .cfi_def_cfa_register %r0 115; CHECK-NEXT: aghi %r0, -24576 116; CHECK-NEXT: .cfi_def_cfa_offset 24736 117; CHECK-NEXT: .LBB4_1: # %entry 118; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 119; CHECK-NEXT: aghi %r15, -8192 120; CHECK-NEXT: cg %r0, 8184(%r15) 121; CHECK-NEXT: clgrjh %r15, %r0, .LBB4_1 122; CHECK-NEXT: # %bb.2: # %entry 123; CHECK-NEXT: .cfi_def_cfa_register %r15 124; CHECK-NEXT: aghi %r15, -7608 125; CHECK-NEXT: .cfi_def_cfa_offset 32344 126; CHECK-NEXT: cg %r0, 7600(%r15) 127; CHECK-NEXT: mvhi 180(%r15), 0 128; CHECK-NEXT: l %r0, 180(%r15) 129; CHECK-NEXT: aghi %r15, 32184 130; CHECK-NEXT: br %r14 131entry: 132 %stack = alloca [8000 x i32], align 4 133 %i = alloca i32, align 4 134 %0 = bitcast [8000 x i32]* %stack to i8* 135 %i.0.i.0..sroa_cast = bitcast i32* %i to i8* 136 store volatile i32 0, i32* %i, align 4 137 %i.0.i.0.6 = load volatile i32, i32* %i, align 4 138 ret void 139} 140 141; Probe size should be modulo stack alignment. 142define void @fun5() #0 "stack-probe-size"="4100" { 143; CHECK-LABEL: fun5: 144; CHECK: # %bb.0: # %entry 145; CHECK-NEXT: aghi %r15, -4096 146; CHECK-NEXT: .cfi_def_cfa_offset 4256 147; CHECK-NEXT: cg %r0, 4088(%r15) 148; CHECK-NEXT: aghi %r15, -88 149; CHECK-NEXT: .cfi_def_cfa_offset 4344 150; CHECK-NEXT: cg %r0, 80(%r15) 151; CHECK-NEXT: mvhi 180(%r15), 0 152; CHECK-NEXT: l %r0, 180(%r15) 153; CHECK-NEXT: aghi %r15, 4184 154; CHECK-NEXT: br %r14 155entry: 156 %stack = alloca [1000 x i32], align 4 157 %i = alloca i32, align 4 158 %0 = bitcast [1000 x i32]* %stack to i8* 159 %i.0.i.0..sroa_cast = bitcast i32* %i to i8* 160 store volatile i32 0, i32* %i, align 4 161 %i.0.i.0.6 = load volatile i32, i32* %i, align 4 162 ret void 163} 164 165; The minimum probe size is the stack alignment. 166define void @fun6() #0 "stack-probe-size"="5" { 167; CHECK-LABEL: fun6: 168; CHECK: # %bb.0: # %entry 169; CHECK-NEXT: lgr %r0, %r15 170; CHECK-NEXT: .cfi_def_cfa_register %r0 171; CHECK-NEXT: aghi %r0, -4184 172; CHECK-NEXT: .cfi_def_cfa_offset 4344 173; CHECK-NEXT: .LBB6_1: # %entry 174; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 175; CHECK-NEXT: aghi %r15, -8 176; CHECK-NEXT: cg %r0, 0(%r15) 177; CHECK-NEXT: clgrjh %r15, %r0, .LBB6_1 178; CHECK-NEXT: # %bb.2: # %entry 179; CHECK-NEXT: .cfi_def_cfa_register %r15 180; CHECK-NEXT: mvhi 180(%r15), 0 181; CHECK-NEXT: l %r0, 180(%r15) 182; CHECK-NEXT: aghi %r15, 4184 183; CHECK-NEXT: br %r14 184entry: 185 %stack = alloca [1000 x i32], align 4 186 %i = alloca i32, align 4 187 %0 = bitcast [1000 x i32]* %stack to i8* 188 %i.0.i.0..sroa_cast = bitcast i32* %i to i8* 189 store volatile i32 0, i32* %i, align 4 190 %i.0.i.0.6 = load volatile i32, i32* %i, align 4 191 ret void 192} 193 194; Small with a natural probe (STMG) - needs no extra probe. 195define i32 @fun7() #0 { 196; CHECK-LABEL: fun7: 197; CHECK: # %bb.0: 198; CHECK-NEXT: stmg %r14, %r15, 112(%r15) 199; CHECK-NEXT: .cfi_offset %r14, -48 200; CHECK-NEXT: .cfi_offset %r15, -40 201; CHECK-NEXT: aghi %r15, -3976 202; CHECK-NEXT: .cfi_def_cfa_offset 4136 203; CHECK-NEXT: brasl %r14, foo@PLT 204; CHECK-NEXT: st %r2, 568(%r15) 205; CHECK-NEXT: l %r2, 176(%r15) 206; CHECK-NEXT: lmg %r14, %r15, 4088(%r15) 207; CHECK-NEXT: br %r14 208 %v = call i32 @foo() 209 %a = alloca i32, i64 950 210 %b = getelementptr inbounds i32, i32* %a, i64 98 211 store volatile i32 %v, i32* %b 212 %c = load volatile i32, i32* %a 213 ret i32 %c 214} 215 216; Medium with an STMG - still needs probing. 217define i32 @fun8() #0 { 218; CHECK-LABEL: fun8: 219; CHECK: # %bb.0: 220; CHECK-NEXT: stmg %r14, %r15, 112(%r15) 221; CHECK-NEXT: .cfi_offset %r14, -48 222; CHECK-NEXT: .cfi_offset %r15, -40 223; CHECK-NEXT: aghi %r15, -3984 224; CHECK-NEXT: .cfi_def_cfa_offset 4144 225; CHECK-NEXT: cg %r0, 3976(%r15) 226; CHECK-NEXT: brasl %r14, foo@PLT 227; CHECK-NEXT: st %r2, 976(%r15) 228; CHECK-NEXT: l %r2, 176(%r15) 229; CHECK-NEXT: lmg %r14, %r15, 4096(%r15) 230; CHECK-NEXT: br %r14 231 232 %v = call i32 @foo() 233 %a = alloca i32, i64 952 234 %b = getelementptr inbounds i32, i32* %a, i64 200 235 store volatile i32 %v, i32* %b 236 %c = load volatile i32, i32* %a 237 ret i32 %c 238} 239 240define void @fun9() #0 "backchain" { 241; CHECK-LABEL: fun9: 242; CHECK: # %bb.0: # %entry 243; CHECK-NEXT: lgr %r1, %r15 244; CHECK-NEXT: lgr %r0, %r15 245; CHECK-NEXT: .cfi_def_cfa_register %r0 246; CHECK-NEXT: aghi %r0, -28672 247; CHECK-NEXT: .cfi_def_cfa_offset 28832 248; CHECK-NEXT: .LBB9_1: # %entry 249; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 250; CHECK-NEXT: aghi %r15, -4096 251; CHECK-NEXT: cg %r0, 4088(%r15) 252; CHECK-NEXT: clgrjh %r15, %r0, .LBB9_1 253; CHECK-NEXT: # %bb.2: # %entry 254; CHECK-NEXT: .cfi_def_cfa_register %r15 255; CHECK-NEXT: stg %r1, 0(%r15) 256; CHECK-NEXT: mvhi 180(%r15), 0 257; CHECK-NEXT: l %r0, 180(%r15) 258; CHECK-NEXT: aghi %r15, 28672 259; CHECK-NEXT: br %r14 260entry: 261 %stack = alloca [7122 x i32], align 4 262 %i = alloca i32, align 4 263 %0 = bitcast [7122 x i32]* %stack to i8* 264 %i.0.i.0..sroa_cast = bitcast i32* %i to i8* 265 store volatile i32 0, i32* %i, align 4 266 %i.0.i.0.6 = load volatile i32, i32* %i, align 4 267 ret void 268} 269 270 271declare i32 @foo() 272attributes #0 = { "probe-stack"="inline-asm" } 273 274