1; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s
2
3define i128 @ldp_single_csdb(i128* %p) speculative_load_hardening {
4entry:
5  %0 = load i128, i128* %p, align 16
6  ret i128 %0
7; CHECK-LABEL: ldp_single_csdb
8; CHECK:      ldp   x8, x1, [x0]
9; CHECK-NEXT: cmp sp, #0
10; CHECK-NEXT: csetm x16, ne
11; CHECK-NEXT: and   x8, x8, x16
12; CHECK-NEXT: and   x1, x1, x16
13; CHECK-NEXT: csdb
14; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp
15; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16
16; CHECK-NEXT: mov x0, x8
17; CHECK-NEXT: mov sp, [[TMPREG]]
18; CHECK-NEXT: ret
19}
20
21define double @ld_double(double* %p) speculative_load_hardening {
22entry:
23  %0 = load double, double* %p, align 8
24  ret double %0
25; Checking that the address laoded from is masked for a floating point load.
26; CHECK-LABEL: ld_double
27; CHECK:      cmp sp, #0
28; CHECK-NEXT: csetm x16, ne
29; CHECK-NEXT: and   x0, x0, x16
30; CHECK-NEXT: csdb
31; CHECK-NEXT: ldr   d0, [x0]
32; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp
33; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16
34; CHECK-NEXT: mov sp, [[TMPREG]]
35; CHECK-NEXT: ret
36}
37
38define i32 @csdb_emitted_for_subreg_use(i64* %p, i32 %b) speculative_load_hardening {
39entry:
40  %X = load i64, i64* %p, align 8
41  %X_trunc = trunc i64 %X to i32
42  %add = add i32 %b, %X_trunc
43  %iszero = icmp eq i64 %X, 0
44  %ret = select i1 %iszero, i32 %b, i32 %add
45  ret i32 %ret
46; Checking that the address laoded from is masked for a floating point load.
47; CHECK-LABEL: csdb_emitted_for_subreg_use
48; CHECK:      ldr x8, [x0]
49; CHECK-NEXT: cmp sp, #0
50; CHECK-NEXT: csetm x16, ne
51; CHECK-NEXT: and x8, x8, x16
52; csdb instruction must occur before the add instruction with w8 as operand.
53; CHECK-NEXT: csdb
54; CHECK-NEXT: add w9, w1, w8
55; CHECK-NEXT: cmp x8, #0
56; CHECK-NEXT: csel w0, w1, w9, eq
57; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp
58; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16
59; CHECK-NEXT: mov sp, [[TMPREG]]
60; CHECK-NEXT: ret
61}
62
63define i64 @csdb_emitted_for_superreg_use(i32* %p, i64 %b) speculative_load_hardening {
64entry:
65  %X = load i32, i32* %p, align 4
66  %X_ext = zext i32 %X to i64
67  %add = add i64 %b, %X_ext
68  %iszero = icmp eq i32 %X, 0
69  %ret = select i1 %iszero, i64 %b, i64 %add
70  ret i64 %ret
71; Checking that the address laoded from is masked for a floating point load.
72; CHECK-LABEL: csdb_emitted_for_superreg_use
73; CHECK:      ldr w8, [x0]
74; CHECK-NEXT: cmp sp, #0
75; CHECK-NEXT: csetm x16, ne
76; CHECK-NEXT: and w8, w8, w16
77; csdb instruction must occur before the add instruction with x8 as operand.
78; CHECK-NEXT: csdb
79; CHECK-NEXT: add x9, x1, x8
80; CHECK-NEXT: cmp w8, #0
81; CHECK-NEXT: csel x0, x1, x9, eq
82; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp
83; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16
84; CHECK-NEXT: mov sp, [[TMPREG]]
85; CHECK-NEXT: ret
86}
87
88define i64 @no_masking_with_full_control_flow_barriers(i64 %a, i64 %b, i64* %p) speculative_load_hardening {
89; CHECK-LABEL: no_masking_with_full_control_flow_barriers
90; CHECK: dsb sy
91; CHECK: isb
92entry:
93  %0 = tail call i64 asm "hint #12", "={x17},{x16},0"(i64 %b, i64 %a)
94  %X = load i64, i64* %p, align 8
95  %ret = add i64 %X, %0
96; CHECK-NOT: csdb
97; CHECK-NOT: and
98; CHECK: ret
99  ret i64 %ret
100}
101
102define void @f_implicitdef_vector_load(<4 x i32>* %dst, <2 x i32>* %src) speculative_load_hardening
103{
104entry:
105  %0 = load <2 x i32>, <2 x i32>* %src, align 8
106  %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
107  store <4 x i32> %shuffle, <4 x i32>* %dst, align 4
108  ret void
109; CHECK-LABEL: f_implicitdef_vector_load
110; CHECK:       cmp     sp, #0
111; CHECK-NEXT:  csetm   x16, ne
112; CHECK-NEXT:  and     x1, x1, x16
113; CHECK-NEXT:  csdb
114; CHECK-NEXT:  ldr     d0, [x1]
115; CHECK-NEXT:  mov     v0.d[1], v0.d[0]
116; CHECK-NEXT:  str     q0, [x0]
117; CHECK-NEXT:  mov     [[TMPREG:x[0-9]+]], sp
118; CHECK-NEXT:  and     [[TMPREG]], [[TMPREG]], x16
119; CHECK-NEXT:  mov     sp, [[TMPREG]]
120; CHECK-NEXT:  ret
121}
122
123define <2 x double> @f_usedefvectorload(double* %a, double* %b) speculative_load_hardening {
124entry:
125; CHECK-LABEL: f_usedefvectorload
126; CHECK:       cmp     sp, #0
127; CHECK-NEXT:  csetm   x16, ne
128; CHECK-NEXT:  movi    v0.2d, #0000000000000000
129; CHECK-NEXT:  and     x1, x1, x16
130; CHECK-NEXT:  csdb
131; CHECK-NEXT:  ld1     { v0.d }[0], [x1]
132; CHECK-NEXT:  mov     [[TMPREG:x[0-9]+]], sp
133; CHECK-NEXT:  and     [[TMPREG]], [[TMPREG]], x16
134; CHECK-NEXT:  mov     sp, [[TMPREG]]
135; CHECK-NEXT:  ret
136  %0 = load double, double* %b, align 16
137  %vld1_lane = insertelement <2 x double> <double undef, double 0.000000e+00>, double %0, i32 0
138  ret <2 x double> %vld1_lane
139}
140
141define i32 @deadload() speculative_load_hardening {
142entry:
143; CHECK-LABEL: deadload
144; CHECK:       cmp     sp, #0
145; CHECK-NEXT:  csetm   x16, ne
146; CHECK-NEXT:  sub     sp, sp, #16
147; CHECK-NEXT:  .cfi_def_cfa_offset 16
148; CHECK-NEXT:  ldr     w8, [sp, #12]
149; CHECK-NEXT:  add     sp, sp, #16
150; CHECK-NEXT:  mov     [[TMPREG:x[0-9]+]], sp
151; CHECK-NEXT:  and     [[TMPREG]], [[TMPREG]], x16
152; CHECK-NEXT:  mov     sp, [[TMPREG]]
153; CHECK-NEXT:  ret
154  %a = alloca i32, align 4
155  %val = load volatile i32, i32* %a, align 4
156  ret i32 undef
157}
158