1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -aarch64-load-store-renaming=true < %s -mtriple=arm64-apple-ios7.0.0 -mcpu=cyclone -enable-misched=false | FileCheck %s 3 4; rdar://13625505 5; Here we have 9 fixed integer arguments the 9th argument in on stack, the 6; varargs start right after at 8-byte alignment. 7define void @fn9(i32* %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, ...) nounwind noinline ssp { 8; CHECK-LABEL: fn9: 9; CHECK: ; %bb.0: 10; CHECK-NEXT: sub sp, sp, #64 ; =64 11; CHECK-NEXT: ldr w8, [sp, #64] 12; CHECK-NEXT: stp w2, w1, [sp, #52] 13; CHECK-NEXT: stp w4, w3, [sp, #44] 14; CHECK-NEXT: stp w6, w5, [sp, #36] 15; CHECK-NEXT: str w7, [sp, #32] 16; CHECK-NEXT: str w8, [x0] 17; CHECK-NEXT: ldr w9, [sp, #72] 18; CHECK-NEXT: ldr w8, [sp, #80] 19; CHECK-NEXT: stp w8, w9, [sp, #16] 20; CHECK-NEXT: add x8, sp, #72 ; =72 21; CHECK-NEXT: add x8, x8, #24 ; =24 22; CHECK-NEXT: str x8, [sp, #24] 23; CHECK-NEXT: ldr w8, [sp, #88] 24; CHECK-NEXT: str w8, [sp, #12] 25; CHECK-NEXT: add sp, sp, #64 ; =64 26; CHECK-NEXT: ret 27 %1 = alloca i32, align 4 28 %2 = alloca i32, align 4 29 %3 = alloca i32, align 4 30 %4 = alloca i32, align 4 31 %5 = alloca i32, align 4 32 %6 = alloca i32, align 4 33 %7 = alloca i32, align 4 34 %8 = alloca i32, align 4 35 %9 = alloca i32, align 4 36 %args = alloca i8*, align 8 37 %a10 = alloca i32, align 4 38 %a11 = alloca i32, align 4 39 %a12 = alloca i32, align 4 40 store i32 %a2, i32* %2, align 4 41 store i32 %a3, i32* %3, align 4 42 store i32 %a4, i32* %4, align 4 43 store i32 %a5, i32* %5, align 4 44 store i32 %a6, i32* %6, align 4 45 store i32 %a7, i32* %7, align 4 46 store i32 %a8, i32* %8, align 4 47 store i32 %a9, i32* %9, align 4 48 store i32 %a9, i32* %a1 49 %10 = bitcast i8** %args to i8* 50 call void @llvm.va_start(i8* %10) 51 %11 = va_arg i8** %args, i32 52 store i32 %11, i32* %a10, align 4 53 %12 = va_arg i8** %args, i32 54 store i32 %12, i32* %a11, align 4 55 %13 = va_arg i8** %args, i32 56 store i32 %13, i32* %a12, align 4 57 ret void 58} 59 60declare void @llvm.va_start(i8*) nounwind 61 62define i32 @main() nounwind ssp { 63; CHECK-LABEL: main: 64; CHECK: ; %bb.0: 65; CHECK-NEXT: sub sp, sp, #96 ; =96 66; CHECK-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill 67; CHECK-NEXT: mov w9, #1 68; CHECK-NEXT: mov w8, #2 69; CHECK-NEXT: stp w8, w9, [sp, #72] 70; CHECK-NEXT: mov w9, #3 71; CHECK-NEXT: mov w8, #4 72; CHECK-NEXT: stp w8, w9, [sp, #64] 73; CHECK-NEXT: mov w9, #5 74; CHECK-NEXT: mov w8, #6 75; CHECK-NEXT: stp w8, w9, [sp, #56] 76; CHECK-NEXT: mov w9, #7 77; CHECK-NEXT: mov w8, #8 78; CHECK-NEXT: stp w8, w9, [sp, #48] 79; CHECK-NEXT: mov w8, #9 80; CHECK-NEXT: mov w9, #10 81; CHECK-NEXT: stp w9, w8, [sp, #40] 82; CHECK-NEXT: mov w10, #11 83; CHECK-NEXT: mov w11, #12 84; CHECK-NEXT: stp w11, w10, [sp, #32] 85; CHECK-NEXT: stp x10, x11, [sp, #16] 86; CHECK-NEXT: str x9, [sp, #8] 87; CHECK-NEXT: str w8, [sp] 88; CHECK-NEXT: add x0, sp, #76 ; =76 89; CHECK-NEXT: mov w1, #2 90; CHECK-NEXT: mov w2, #3 91; CHECK-NEXT: mov w3, #4 92; CHECK-NEXT: mov w4, #5 93; CHECK-NEXT: mov w5, #6 94; CHECK-NEXT: mov w6, #7 95; CHECK-NEXT: mov w7, #8 96; CHECK-NEXT: bl _fn9 97; CHECK-NEXT: mov w0, #0 98; CHECK-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload 99; CHECK-NEXT: add sp, sp, #96 ; =96 100; CHECK-NEXT: ret 101 %a1 = alloca i32, align 4 102 %a2 = alloca i32, align 4 103 %a3 = alloca i32, align 4 104 %a4 = alloca i32, align 4 105 %a5 = alloca i32, align 4 106 %a6 = alloca i32, align 4 107 %a7 = alloca i32, align 4 108 %a8 = alloca i32, align 4 109 %a9 = alloca i32, align 4 110 %a10 = alloca i32, align 4 111 %a11 = alloca i32, align 4 112 %a12 = alloca i32, align 4 113 store i32 1, i32* %a1, align 4 114 store i32 2, i32* %a2, align 4 115 store i32 3, i32* %a3, align 4 116 store i32 4, i32* %a4, align 4 117 store i32 5, i32* %a5, align 4 118 store i32 6, i32* %a6, align 4 119 store i32 7, i32* %a7, align 4 120 store i32 8, i32* %a8, align 4 121 store i32 9, i32* %a9, align 4 122 store i32 10, i32* %a10, align 4 123 store i32 11, i32* %a11, align 4 124 store i32 12, i32* %a12, align 4 125 %1 = load i32, i32* %a1, align 4 126 %2 = load i32, i32* %a2, align 4 127 %3 = load i32, i32* %a3, align 4 128 %4 = load i32, i32* %a4, align 4 129 %5 = load i32, i32* %a5, align 4 130 %6 = load i32, i32* %a6, align 4 131 %7 = load i32, i32* %a7, align 4 132 %8 = load i32, i32* %a8, align 4 133 %9 = load i32, i32* %a9, align 4 134 %10 = load i32, i32* %a10, align 4 135 %11 = load i32, i32* %a11, align 4 136 %12 = load i32, i32* %a12, align 4 137 call void (i32*, i32, i32, i32, i32, i32, i32, i32, i32, ...) @fn9(i32* %a1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12) 138 ret i32 0 139} 140 141;rdar://13668483 142@.str = private unnamed_addr constant [4 x i8] c"fmt\00", align 1 143define void @foo(i8* %fmt, ...) nounwind { 144; CHECK-LABEL: foo: 145; CHECK: ; %bb.0: ; %entry 146; CHECK-NEXT: sub sp, sp, #48 ; =48 147; CHECK-NEXT: ldr w8, [sp, #48] 148; CHECK-NEXT: str w8, [sp, #28] 149; CHECK-NEXT: add x8, sp, #48 ; =48 150; CHECK-NEXT: add x8, x8, #23 ; =23 151; CHECK-NEXT: and x8, x8, #0xfffffffffffffff0 152; CHECK-NEXT: add x9, x8, #16 ; =16 153; CHECK-NEXT: stp x9, x0, [sp, #32] 154; CHECK-NEXT: ldr q0, [x8] 155; CHECK-NEXT: str q0, [sp], #48 156; CHECK-NEXT: ret 157entry: 158 %fmt.addr = alloca i8*, align 8 159 %args = alloca i8*, align 8 160 %vc = alloca i32, align 4 161 %vv = alloca <4 x i32>, align 16 162 store i8* %fmt, i8** %fmt.addr, align 8 163 %args1 = bitcast i8** %args to i8* 164 call void @llvm.va_start(i8* %args1) 165 %0 = va_arg i8** %args, i32 166 store i32 %0, i32* %vc, align 4 167 %1 = va_arg i8** %args, <4 x i32> 168 store <4 x i32> %1, <4 x i32>* %vv, align 16 169 ret void 170} 171 172define void @bar(i32 %x, <4 x i32> %y) nounwind { 173; CHECK-LABEL: bar: 174; CHECK: ; %bb.0: ; %entry 175; CHECK-NEXT: sub sp, sp, #80 ; =80 176; CHECK-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill 177; CHECK-NEXT: ; kill: def $w0 killed $w0 def $x0 178; CHECK-NEXT: str w0, [sp, #60] 179; CHECK-NEXT: stp q0, q0, [sp, #16] 180; CHECK-NEXT: str x0, [sp] 181; CHECK-NEXT: Lloh0: 182; CHECK-NEXT: adrp x0, l_.str@PAGE 183; CHECK-NEXT: Lloh1: 184; CHECK-NEXT: add x0, x0, l_.str@PAGEOFF 185; CHECK-NEXT: bl _foo 186; CHECK-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload 187; CHECK-NEXT: add sp, sp, #80 ; =80 188; CHECK-NEXT: ret 189; CHECK-NEXT: .loh AdrpAdd Lloh0, Lloh1 190entry: 191 %x.addr = alloca i32, align 4 192 %y.addr = alloca <4 x i32>, align 16 193 store i32 %x, i32* %x.addr, align 4 194 store <4 x i32> %y, <4 x i32>* %y.addr, align 16 195 %0 = load i32, i32* %x.addr, align 4 196 %1 = load <4 x i32>, <4 x i32>* %y.addr, align 16 197 call void (i8*, ...) @foo(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %0, <4 x i32> %1) 198 ret void 199} 200 201; rdar://13668927 202; When passing 16-byte aligned small structs as vararg, make sure the caller 203; side is 16-byte aligned on stack. 204%struct.s41 = type { i32, i16, i32, i16 } 205define void @foo2(i8* %fmt, ...) nounwind { 206; CHECK-LABEL: foo2: 207; CHECK: ; %bb.0: ; %entry 208; CHECK-NEXT: sub sp, sp, #48 ; =48 209; CHECK-NEXT: ldr w8, [sp, #48] 210; CHECK-NEXT: str w8, [sp, #28] 211; CHECK-NEXT: add x8, sp, #48 ; =48 212; CHECK-NEXT: add x8, x8, #23 ; =23 213; CHECK-NEXT: and x8, x8, #0xfffffffffffffff0 214; CHECK-NEXT: add x9, x8, #16 ; =16 215; CHECK-NEXT: stp x9, x0, [sp, #32] 216; CHECK-NEXT: ldr q0, [x8] 217; CHECK-NEXT: str q0, [sp], #48 218; CHECK-NEXT: ret 219entry: 220 %fmt.addr = alloca i8*, align 8 221 %args = alloca i8*, align 8 222 %vc = alloca i32, align 4 223 %vs = alloca %struct.s41, align 16 224 store i8* %fmt, i8** %fmt.addr, align 8 225 %args1 = bitcast i8** %args to i8* 226 call void @llvm.va_start(i8* %args1) 227 %0 = va_arg i8** %args, i32 228 store i32 %0, i32* %vc, align 4 229 %ap.cur = load i8*, i8** %args 230 %1 = getelementptr i8, i8* %ap.cur, i32 15 231 %2 = ptrtoint i8* %1 to i64 232 %3 = and i64 %2, -16 233 %ap.align = inttoptr i64 %3 to i8* 234 %ap.next = getelementptr i8, i8* %ap.align, i32 16 235 store i8* %ap.next, i8** %args 236 %4 = bitcast i8* %ap.align to %struct.s41* 237 %5 = bitcast %struct.s41* %vs to i8* 238 %6 = bitcast %struct.s41* %4 to i8* 239 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %5, i8* align 16 %6, i64 16, i1 false) 240 ret void 241} 242declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind 243 244define void @bar2(i32 %x, i128 %s41.coerce) nounwind { 245; CHECK-LABEL: bar2: 246; CHECK: ; %bb.0: ; %entry 247; CHECK-NEXT: sub sp, sp, #80 ; =80 248; CHECK-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill 249; CHECK-NEXT: ; kill: def $w0 killed $w0 def $x0 250; CHECK-NEXT: str w0, [sp, #60] 251; CHECK-NEXT: stp x1, x2, [sp, #32] 252; CHECK-NEXT: stp x1, x2, [sp, #16] 253; CHECK-NEXT: str x0, [sp] 254; CHECK-NEXT: Lloh2: 255; CHECK-NEXT: adrp x0, l_.str@PAGE 256; CHECK-NEXT: Lloh3: 257; CHECK-NEXT: add x0, x0, l_.str@PAGEOFF 258; CHECK-NEXT: bl _foo2 259; CHECK-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload 260; CHECK-NEXT: add sp, sp, #80 ; =80 261; CHECK-NEXT: ret 262; CHECK-NEXT: .loh AdrpAdd Lloh2, Lloh3 263entry: 264 %x.addr = alloca i32, align 4 265 %s41 = alloca %struct.s41, align 16 266 store i32 %x, i32* %x.addr, align 4 267 %0 = bitcast %struct.s41* %s41 to i128* 268 store i128 %s41.coerce, i128* %0, align 1 269 %1 = load i32, i32* %x.addr, align 4 270 %2 = bitcast %struct.s41* %s41 to i128* 271 %3 = load i128, i128* %2, align 1 272 call void (i8*, ...) @foo2(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %1, i128 %3) 273 ret void 274} 275