1*06f32e7eSjoerg //===- AArch64SpeculationHardening.cpp - Harden Against Missspeculation  --===//
2*06f32e7eSjoerg //
3*06f32e7eSjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*06f32e7eSjoerg // See https://llvm.org/LICENSE.txt for license information.
5*06f32e7eSjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*06f32e7eSjoerg //
7*06f32e7eSjoerg //===----------------------------------------------------------------------===//
8*06f32e7eSjoerg //
9*06f32e7eSjoerg // This file contains a pass to insert code to mitigate against side channel
10*06f32e7eSjoerg // vulnerabilities that may happen under control flow miss-speculation.
11*06f32e7eSjoerg //
12*06f32e7eSjoerg // The pass implements tracking of control flow miss-speculation into a "taint"
13*06f32e7eSjoerg // register. That taint register can then be used to mask off registers with
14*06f32e7eSjoerg // sensitive data when executing under miss-speculation, a.k.a. "transient
15*06f32e7eSjoerg // execution".
16*06f32e7eSjoerg // This pass is aimed at mitigating against SpectreV1-style vulnarabilities.
17*06f32e7eSjoerg //
18*06f32e7eSjoerg // It also implements speculative load hardening, i.e. using the taint register
19*06f32e7eSjoerg // to automatically mask off loaded data.
20*06f32e7eSjoerg //
21*06f32e7eSjoerg // As a possible follow-on improvement, also an intrinsics-based approach as
22*06f32e7eSjoerg // explained at https://lwn.net/Articles/759423/ could be implemented on top of
23*06f32e7eSjoerg // the current design.
24*06f32e7eSjoerg //
25*06f32e7eSjoerg // For AArch64, the following implementation choices are made to implement the
26*06f32e7eSjoerg // tracking of control flow miss-speculation into a taint register:
27*06f32e7eSjoerg // Some of these are different than the implementation choices made in
28*06f32e7eSjoerg // the similar pass implemented in X86SpeculativeLoadHardening.cpp, as
29*06f32e7eSjoerg // the instruction set characteristics result in different trade-offs.
30*06f32e7eSjoerg // - The speculation hardening is done after register allocation. With a
31*06f32e7eSjoerg //   relative abundance of registers, one register is reserved (X16) to be
32*06f32e7eSjoerg //   the taint register. X16 is expected to not clash with other register
33*06f32e7eSjoerg //   reservation mechanisms with very high probability because:
34*06f32e7eSjoerg //   . The AArch64 ABI doesn't guarantee X16 to be retained across any call.
35*06f32e7eSjoerg //   . The only way to request X16 to be used as a programmer is through
36*06f32e7eSjoerg //     inline assembly. In the rare case a function explicitly demands to
37*06f32e7eSjoerg //     use X16/W16, this pass falls back to hardening against speculation
38*06f32e7eSjoerg //     by inserting a DSB SYS/ISB barrier pair which will prevent control
39*06f32e7eSjoerg //     flow speculation.
40*06f32e7eSjoerg // - It is easy to insert mask operations at this late stage as we have
41*06f32e7eSjoerg //   mask operations available that don't set flags.
42*06f32e7eSjoerg // - The taint variable contains all-ones when no miss-speculation is detected,
43*06f32e7eSjoerg //   and contains all-zeros when miss-speculation is detected. Therefore, when
44*06f32e7eSjoerg //   masking, an AND instruction (which only changes the register to be masked,
45*06f32e7eSjoerg //   no other side effects) can easily be inserted anywhere that's needed.
46*06f32e7eSjoerg // - The tracking of miss-speculation is done by using a data-flow conditional
47*06f32e7eSjoerg //   select instruction (CSEL) to evaluate the flags that were also used to
48*06f32e7eSjoerg //   make conditional branch direction decisions. Speculation of the CSEL
49*06f32e7eSjoerg //   instruction can be limited with a CSDB instruction - so the combination of
50*06f32e7eSjoerg //   CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL
51*06f32e7eSjoerg //   aren't speculated. When conditional branch direction gets miss-speculated,
52*06f32e7eSjoerg //   the semantics of the inserted CSEL instruction is such that the taint
53*06f32e7eSjoerg //   register will contain all zero bits.
54*06f32e7eSjoerg //   One key requirement for this to work is that the conditional branch is
55*06f32e7eSjoerg //   followed by an execution of the CSEL instruction, where the CSEL
56*06f32e7eSjoerg //   instruction needs to use the same flags status as the conditional branch.
57*06f32e7eSjoerg //   This means that the conditional branches must not be implemented as one
58*06f32e7eSjoerg //   of the AArch64 conditional branches that do not use the flags as input
59*06f32e7eSjoerg //   (CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction
60*06f32e7eSjoerg //   selectors to not produce these instructions when speculation hardening
61*06f32e7eSjoerg //   is enabled. This pass will assert if it does encounter such an instruction.
62*06f32e7eSjoerg // - On function call boundaries, the miss-speculation state is transferred from
63*06f32e7eSjoerg //   the taint register X16 to be encoded in the SP register as value 0.
64*06f32e7eSjoerg //
65*06f32e7eSjoerg // For the aspect of automatically hardening loads, using the taint register,
66*06f32e7eSjoerg // (a.k.a. speculative load hardening, see
67*06f32e7eSjoerg //  https://llvm.org/docs/SpeculativeLoadHardening.html), the following
68*06f32e7eSjoerg // implementation choices are made for AArch64:
69*06f32e7eSjoerg //   - Many of the optimizations described at
70*06f32e7eSjoerg //     https://llvm.org/docs/SpeculativeLoadHardening.html to harden fewer
71*06f32e7eSjoerg //     loads haven't been implemented yet - but for some of them there are
72*06f32e7eSjoerg //     FIXMEs in the code.
73*06f32e7eSjoerg //   - loads that load into general purpose (X or W) registers get hardened by
74*06f32e7eSjoerg //     masking the loaded data. For loads that load into other registers, the
75*06f32e7eSjoerg //     address loaded from gets hardened. It is expected that hardening the
76*06f32e7eSjoerg //     loaded data may be more efficient; but masking data in registers other
77*06f32e7eSjoerg //     than X or W is not easy and may result in being slower than just
78*06f32e7eSjoerg //     hardening the X address register loaded from.
79*06f32e7eSjoerg //   - On AArch64, CSDB instructions are inserted between the masking of the
80*06f32e7eSjoerg //     register and its first use, to ensure there's no non-control-flow
81*06f32e7eSjoerg //     speculation that might undermine the hardening mechanism.
82*06f32e7eSjoerg //
83*06f32e7eSjoerg // Future extensions/improvements could be:
84*06f32e7eSjoerg // - Implement this functionality using full speculation barriers, akin to the
85*06f32e7eSjoerg //   x86-slh-lfence option. This may be more useful for the intrinsics-based
86*06f32e7eSjoerg //   approach than for the SLH approach to masking.
87*06f32e7eSjoerg //   Note that this pass already inserts the full speculation barriers if the
88*06f32e7eSjoerg //   function for some niche reason makes use of X16/W16.
89*06f32e7eSjoerg // - no indirect branch misprediction gets protected/instrumented; but this
90*06f32e7eSjoerg //   could be done for some indirect branches, such as switch jump tables.
91*06f32e7eSjoerg //===----------------------------------------------------------------------===//
92*06f32e7eSjoerg 
93*06f32e7eSjoerg #include "AArch64InstrInfo.h"
94*06f32e7eSjoerg #include "AArch64Subtarget.h"
95*06f32e7eSjoerg #include "Utils/AArch64BaseInfo.h"
96*06f32e7eSjoerg #include "llvm/ADT/BitVector.h"
97*06f32e7eSjoerg #include "llvm/ADT/SmallVector.h"
98*06f32e7eSjoerg #include "llvm/CodeGen/MachineBasicBlock.h"
99*06f32e7eSjoerg #include "llvm/CodeGen/MachineFunction.h"
100*06f32e7eSjoerg #include "llvm/CodeGen/MachineFunctionPass.h"
101*06f32e7eSjoerg #include "llvm/CodeGen/MachineInstr.h"
102*06f32e7eSjoerg #include "llvm/CodeGen/MachineInstrBuilder.h"
103*06f32e7eSjoerg #include "llvm/CodeGen/MachineOperand.h"
104*06f32e7eSjoerg #include "llvm/CodeGen/MachineRegisterInfo.h"
105*06f32e7eSjoerg #include "llvm/CodeGen/RegisterScavenging.h"
106*06f32e7eSjoerg #include "llvm/IR/DebugLoc.h"
107*06f32e7eSjoerg #include "llvm/Pass.h"
108*06f32e7eSjoerg #include "llvm/Support/CodeGen.h"
109*06f32e7eSjoerg #include "llvm/Support/Debug.h"
110*06f32e7eSjoerg #include "llvm/Target/TargetMachine.h"
111*06f32e7eSjoerg #include <cassert>
112*06f32e7eSjoerg 
113*06f32e7eSjoerg using namespace llvm;
114*06f32e7eSjoerg 
115*06f32e7eSjoerg #define DEBUG_TYPE "aarch64-speculation-hardening"
116*06f32e7eSjoerg 
117*06f32e7eSjoerg #define AARCH64_SPECULATION_HARDENING_NAME "AArch64 speculation hardening pass"
118*06f32e7eSjoerg 
119*06f32e7eSjoerg static cl::opt<bool> HardenLoads("aarch64-slh-loads", cl::Hidden,
120*06f32e7eSjoerg                                  cl::desc("Sanitize loads from memory."),
121*06f32e7eSjoerg                                  cl::init(true));
122*06f32e7eSjoerg 
123*06f32e7eSjoerg namespace {
124*06f32e7eSjoerg 
125*06f32e7eSjoerg class AArch64SpeculationHardening : public MachineFunctionPass {
126*06f32e7eSjoerg public:
127*06f32e7eSjoerg   const TargetInstrInfo *TII;
128*06f32e7eSjoerg   const TargetRegisterInfo *TRI;
129*06f32e7eSjoerg 
130*06f32e7eSjoerg   static char ID;
131*06f32e7eSjoerg 
AArch64SpeculationHardening()132*06f32e7eSjoerg   AArch64SpeculationHardening() : MachineFunctionPass(ID) {
133*06f32e7eSjoerg     initializeAArch64SpeculationHardeningPass(*PassRegistry::getPassRegistry());
134*06f32e7eSjoerg   }
135*06f32e7eSjoerg 
136*06f32e7eSjoerg   bool runOnMachineFunction(MachineFunction &Fn) override;
137*06f32e7eSjoerg 
getPassName() const138*06f32e7eSjoerg   StringRef getPassName() const override {
139*06f32e7eSjoerg     return AARCH64_SPECULATION_HARDENING_NAME;
140*06f32e7eSjoerg   }
141*06f32e7eSjoerg 
142*06f32e7eSjoerg private:
143*06f32e7eSjoerg   unsigned MisspeculatingTaintReg;
144*06f32e7eSjoerg   unsigned MisspeculatingTaintReg32Bit;
145*06f32e7eSjoerg   bool UseControlFlowSpeculationBarrier;
146*06f32e7eSjoerg   BitVector RegsNeedingCSDBBeforeUse;
147*06f32e7eSjoerg   BitVector RegsAlreadyMasked;
148*06f32e7eSjoerg 
149*06f32e7eSjoerg   bool functionUsesHardeningRegister(MachineFunction &MF) const;
150*06f32e7eSjoerg   bool instrumentControlFlow(MachineBasicBlock &MBB,
151*06f32e7eSjoerg                              bool &UsesFullSpeculationBarrier);
152*06f32e7eSjoerg   bool endsWithCondControlFlow(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
153*06f32e7eSjoerg                                MachineBasicBlock *&FBB,
154*06f32e7eSjoerg                                AArch64CC::CondCode &CondCode) const;
155*06f32e7eSjoerg   void insertTrackingCode(MachineBasicBlock &SplitEdgeBB,
156*06f32e7eSjoerg                           AArch64CC::CondCode &CondCode, DebugLoc DL) const;
157*06f32e7eSjoerg   void insertSPToRegTaintPropagation(MachineBasicBlock &MBB,
158*06f32e7eSjoerg                                      MachineBasicBlock::iterator MBBI) const;
159*06f32e7eSjoerg   void insertRegToSPTaintPropagation(MachineBasicBlock &MBB,
160*06f32e7eSjoerg                                      MachineBasicBlock::iterator MBBI,
161*06f32e7eSjoerg                                      unsigned TmpReg) const;
162*06f32e7eSjoerg   void insertFullSpeculationBarrier(MachineBasicBlock &MBB,
163*06f32e7eSjoerg                                     MachineBasicBlock::iterator MBBI,
164*06f32e7eSjoerg                                     DebugLoc DL) const;
165*06f32e7eSjoerg 
166*06f32e7eSjoerg   bool slhLoads(MachineBasicBlock &MBB);
167*06f32e7eSjoerg   bool makeGPRSpeculationSafe(MachineBasicBlock &MBB,
168*06f32e7eSjoerg                               MachineBasicBlock::iterator MBBI,
169*06f32e7eSjoerg                               MachineInstr &MI, unsigned Reg);
170*06f32e7eSjoerg   bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB,
171*06f32e7eSjoerg                                         bool UsesFullSpeculationBarrier);
172*06f32e7eSjoerg   bool expandSpeculationSafeValue(MachineBasicBlock &MBB,
173*06f32e7eSjoerg                                   MachineBasicBlock::iterator MBBI,
174*06f32e7eSjoerg                                   bool UsesFullSpeculationBarrier);
175*06f32e7eSjoerg   bool insertCSDB(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
176*06f32e7eSjoerg                   DebugLoc DL);
177*06f32e7eSjoerg };
178*06f32e7eSjoerg 
179*06f32e7eSjoerg } // end anonymous namespace
180*06f32e7eSjoerg 
181*06f32e7eSjoerg char AArch64SpeculationHardening::ID = 0;
182*06f32e7eSjoerg 
183*06f32e7eSjoerg INITIALIZE_PASS(AArch64SpeculationHardening, "aarch64-speculation-hardening",
184*06f32e7eSjoerg                 AARCH64_SPECULATION_HARDENING_NAME, false, false)
185*06f32e7eSjoerg 
endsWithCondControlFlow(MachineBasicBlock & MBB,MachineBasicBlock * & TBB,MachineBasicBlock * & FBB,AArch64CC::CondCode & CondCode) const186*06f32e7eSjoerg bool AArch64SpeculationHardening::endsWithCondControlFlow(
187*06f32e7eSjoerg     MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
188*06f32e7eSjoerg     AArch64CC::CondCode &CondCode) const {
189*06f32e7eSjoerg   SmallVector<MachineOperand, 1> analyzeBranchCondCode;
190*06f32e7eSjoerg   if (TII->analyzeBranch(MBB, TBB, FBB, analyzeBranchCondCode, false))
191*06f32e7eSjoerg     return false;
192*06f32e7eSjoerg 
193*06f32e7eSjoerg   // Ignore if the BB ends in an unconditional branch/fall-through.
194*06f32e7eSjoerg   if (analyzeBranchCondCode.empty())
195*06f32e7eSjoerg     return false;
196*06f32e7eSjoerg 
197*06f32e7eSjoerg   // If the BB ends with a single conditional branch, FBB will be set to
198*06f32e7eSjoerg   // nullptr (see API docs for TII->analyzeBranch). For the rest of the
199*06f32e7eSjoerg   // analysis we want the FBB block to be set always.
200*06f32e7eSjoerg   assert(TBB != nullptr);
201*06f32e7eSjoerg   if (FBB == nullptr)
202*06f32e7eSjoerg     FBB = MBB.getFallThrough();
203*06f32e7eSjoerg 
204*06f32e7eSjoerg   // If both the true and the false condition jump to the same basic block,
205*06f32e7eSjoerg   // there isn't need for any protection - whether the branch is speculated
206*06f32e7eSjoerg   // correctly or not, we end up executing the architecturally correct code.
207*06f32e7eSjoerg   if (TBB == FBB)
208*06f32e7eSjoerg     return false;
209*06f32e7eSjoerg 
210*06f32e7eSjoerg   assert(MBB.succ_size() == 2);
211*06f32e7eSjoerg   // translate analyzeBranchCondCode to CondCode.
212*06f32e7eSjoerg   assert(analyzeBranchCondCode.size() == 1 && "unknown Cond array format");
213*06f32e7eSjoerg   CondCode = AArch64CC::CondCode(analyzeBranchCondCode[0].getImm());
214*06f32e7eSjoerg   return true;
215*06f32e7eSjoerg }
216*06f32e7eSjoerg 
insertFullSpeculationBarrier(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc DL) const217*06f32e7eSjoerg void AArch64SpeculationHardening::insertFullSpeculationBarrier(
218*06f32e7eSjoerg     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
219*06f32e7eSjoerg     DebugLoc DL) const {
220*06f32e7eSjoerg   // A full control flow speculation barrier consists of (DSB SYS + ISB)
221*06f32e7eSjoerg   BuildMI(MBB, MBBI, DL, TII->get(AArch64::DSB)).addImm(0xf);
222*06f32e7eSjoerg   BuildMI(MBB, MBBI, DL, TII->get(AArch64::ISB)).addImm(0xf);
223*06f32e7eSjoerg }
224*06f32e7eSjoerg 
insertTrackingCode(MachineBasicBlock & SplitEdgeBB,AArch64CC::CondCode & CondCode,DebugLoc DL) const225*06f32e7eSjoerg void AArch64SpeculationHardening::insertTrackingCode(
226*06f32e7eSjoerg     MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode,
227*06f32e7eSjoerg     DebugLoc DL) const {
228*06f32e7eSjoerg   if (UseControlFlowSpeculationBarrier) {
229*06f32e7eSjoerg     insertFullSpeculationBarrier(SplitEdgeBB, SplitEdgeBB.begin(), DL);
230*06f32e7eSjoerg   } else {
231*06f32e7eSjoerg     BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::CSELXr))
232*06f32e7eSjoerg         .addDef(MisspeculatingTaintReg)
233*06f32e7eSjoerg         .addUse(MisspeculatingTaintReg)
234*06f32e7eSjoerg         .addUse(AArch64::XZR)
235*06f32e7eSjoerg         .addImm(CondCode);
236*06f32e7eSjoerg     SplitEdgeBB.addLiveIn(AArch64::NZCV);
237*06f32e7eSjoerg   }
238*06f32e7eSjoerg }
239*06f32e7eSjoerg 
instrumentControlFlow(MachineBasicBlock & MBB,bool & UsesFullSpeculationBarrier)240*06f32e7eSjoerg bool AArch64SpeculationHardening::instrumentControlFlow(
241*06f32e7eSjoerg     MachineBasicBlock &MBB, bool &UsesFullSpeculationBarrier) {
242*06f32e7eSjoerg   LLVM_DEBUG(dbgs() << "Instrument control flow tracking on MBB: " << MBB);
243*06f32e7eSjoerg 
244*06f32e7eSjoerg   bool Modified = false;
245*06f32e7eSjoerg   MachineBasicBlock *TBB = nullptr;
246*06f32e7eSjoerg   MachineBasicBlock *FBB = nullptr;
247*06f32e7eSjoerg   AArch64CC::CondCode CondCode;
248*06f32e7eSjoerg 
249*06f32e7eSjoerg   if (!endsWithCondControlFlow(MBB, TBB, FBB, CondCode)) {
250*06f32e7eSjoerg     LLVM_DEBUG(dbgs() << "... doesn't end with CondControlFlow\n");
251*06f32e7eSjoerg   } else {
252*06f32e7eSjoerg     // Now insert:
253*06f32e7eSjoerg     // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, cond" on the True edge and
254*06f32e7eSjoerg     // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, Invertcond" on the False
255*06f32e7eSjoerg     // edge.
256*06f32e7eSjoerg     AArch64CC::CondCode InvCondCode = AArch64CC::getInvertedCondCode(CondCode);
257*06f32e7eSjoerg 
258*06f32e7eSjoerg     MachineBasicBlock *SplitEdgeTBB = MBB.SplitCriticalEdge(TBB, *this);
259*06f32e7eSjoerg     MachineBasicBlock *SplitEdgeFBB = MBB.SplitCriticalEdge(FBB, *this);
260*06f32e7eSjoerg 
261*06f32e7eSjoerg     assert(SplitEdgeTBB != nullptr);
262*06f32e7eSjoerg     assert(SplitEdgeFBB != nullptr);
263*06f32e7eSjoerg 
264*06f32e7eSjoerg     DebugLoc DL;
265*06f32e7eSjoerg     if (MBB.instr_end() != MBB.instr_begin())
266*06f32e7eSjoerg       DL = (--MBB.instr_end())->getDebugLoc();
267*06f32e7eSjoerg 
268*06f32e7eSjoerg     insertTrackingCode(*SplitEdgeTBB, CondCode, DL);
269*06f32e7eSjoerg     insertTrackingCode(*SplitEdgeFBB, InvCondCode, DL);
270*06f32e7eSjoerg 
271*06f32e7eSjoerg     LLVM_DEBUG(dbgs() << "SplitEdgeTBB: " << *SplitEdgeTBB << "\n");
272*06f32e7eSjoerg     LLVM_DEBUG(dbgs() << "SplitEdgeFBB: " << *SplitEdgeFBB << "\n");
273*06f32e7eSjoerg     Modified = true;
274*06f32e7eSjoerg   }
275*06f32e7eSjoerg 
276*06f32e7eSjoerg   // Perform correct code generation around function calls and before returns.
277*06f32e7eSjoerg   // The below variables record the return/terminator instructions and the call
278*06f32e7eSjoerg   // instructions respectively; including which register is available as a
279*06f32e7eSjoerg   // temporary register just before the recorded instructions.
280*06f32e7eSjoerg   SmallVector<std::pair<MachineInstr *, unsigned>, 4> ReturnInstructions;
281*06f32e7eSjoerg   SmallVector<std::pair<MachineInstr *, unsigned>, 4> CallInstructions;
282*06f32e7eSjoerg   // if a temporary register is not available for at least one of the
283*06f32e7eSjoerg   // instructions for which we need to transfer taint to the stack pointer, we
284*06f32e7eSjoerg   // need to insert a full speculation barrier.
285*06f32e7eSjoerg   // TmpRegisterNotAvailableEverywhere tracks that condition.
286*06f32e7eSjoerg   bool TmpRegisterNotAvailableEverywhere = false;
287*06f32e7eSjoerg 
288*06f32e7eSjoerg   RegScavenger RS;
289*06f32e7eSjoerg   RS.enterBasicBlock(MBB);
290*06f32e7eSjoerg 
291*06f32e7eSjoerg   for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); I++) {
292*06f32e7eSjoerg     MachineInstr &MI = *I;
293*06f32e7eSjoerg     if (!MI.isReturn() && !MI.isCall())
294*06f32e7eSjoerg       continue;
295*06f32e7eSjoerg 
296*06f32e7eSjoerg     // The RegScavenger represents registers available *after* the MI
297*06f32e7eSjoerg     // instruction pointed to by RS.getCurrentPosition().
298*06f32e7eSjoerg     // We need to have a register that is available *before* the MI is executed.
299*06f32e7eSjoerg     if (I != MBB.begin())
300*06f32e7eSjoerg       RS.forward(std::prev(I));
301*06f32e7eSjoerg     // FIXME: The below just finds *a* unused register. Maybe code could be
302*06f32e7eSjoerg     // optimized more if this looks for the register that isn't used for the
303*06f32e7eSjoerg     // longest time around this place, to enable more scheduling freedom. Not
304*06f32e7eSjoerg     // sure if that would actually result in a big performance difference
305*06f32e7eSjoerg     // though. Maybe RegisterScavenger::findSurvivorBackwards has some logic
306*06f32e7eSjoerg     // already to do this - but it's unclear if that could easily be used here.
307*06f32e7eSjoerg     unsigned TmpReg = RS.FindUnusedReg(&AArch64::GPR64commonRegClass);
308*06f32e7eSjoerg     LLVM_DEBUG(dbgs() << "RS finds "
309*06f32e7eSjoerg                       << ((TmpReg == 0) ? "no register " : "register ");
310*06f32e7eSjoerg                if (TmpReg != 0) dbgs() << printReg(TmpReg, TRI) << " ";
311*06f32e7eSjoerg                dbgs() << "to be available at MI " << MI);
312*06f32e7eSjoerg     if (TmpReg == 0)
313*06f32e7eSjoerg       TmpRegisterNotAvailableEverywhere = true;
314*06f32e7eSjoerg     if (MI.isReturn())
315*06f32e7eSjoerg       ReturnInstructions.push_back({&MI, TmpReg});
316*06f32e7eSjoerg     else if (MI.isCall())
317*06f32e7eSjoerg       CallInstructions.push_back({&MI, TmpReg});
318*06f32e7eSjoerg   }
319*06f32e7eSjoerg 
320*06f32e7eSjoerg   if (TmpRegisterNotAvailableEverywhere) {
321*06f32e7eSjoerg     // When a temporary register is not available everywhere in this basic
322*06f32e7eSjoerg     // basic block where a propagate-taint-to-sp operation is needed, just
323*06f32e7eSjoerg     // emit a full speculation barrier at the start of this basic block, which
324*06f32e7eSjoerg     // renders the taint/speculation tracking in this basic block unnecessary.
325*06f32e7eSjoerg     insertFullSpeculationBarrier(MBB, MBB.begin(),
326*06f32e7eSjoerg                                  (MBB.begin())->getDebugLoc());
327*06f32e7eSjoerg     UsesFullSpeculationBarrier = true;
328*06f32e7eSjoerg     Modified = true;
329*06f32e7eSjoerg   } else {
330*06f32e7eSjoerg     for (auto MI_Reg : ReturnInstructions) {
331*06f32e7eSjoerg       assert(MI_Reg.second != 0);
332*06f32e7eSjoerg       LLVM_DEBUG(
333*06f32e7eSjoerg           dbgs()
334*06f32e7eSjoerg           << " About to insert Reg to SP taint propagation with temp register "
335*06f32e7eSjoerg           << printReg(MI_Reg.second, TRI)
336*06f32e7eSjoerg           << " on instruction: " << *MI_Reg.first);
337*06f32e7eSjoerg       insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second);
338*06f32e7eSjoerg       Modified = true;
339*06f32e7eSjoerg     }
340*06f32e7eSjoerg 
341*06f32e7eSjoerg     for (auto MI_Reg : CallInstructions) {
342*06f32e7eSjoerg       assert(MI_Reg.second != 0);
343*06f32e7eSjoerg       LLVM_DEBUG(dbgs() << " About to insert Reg to SP and back taint "
344*06f32e7eSjoerg                            "propagation with temp register "
345*06f32e7eSjoerg                         << printReg(MI_Reg.second, TRI)
346*06f32e7eSjoerg                         << " around instruction: " << *MI_Reg.first);
347*06f32e7eSjoerg       // Just after the call:
348*06f32e7eSjoerg       insertSPToRegTaintPropagation(
349*06f32e7eSjoerg           MBB, std::next((MachineBasicBlock::iterator)MI_Reg.first));
350*06f32e7eSjoerg       // Just before the call:
351*06f32e7eSjoerg       insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second);
352*06f32e7eSjoerg       Modified = true;
353*06f32e7eSjoerg     }
354*06f32e7eSjoerg   }
355*06f32e7eSjoerg   return Modified;
356*06f32e7eSjoerg }
357*06f32e7eSjoerg 
insertSPToRegTaintPropagation(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI) const358*06f32e7eSjoerg void AArch64SpeculationHardening::insertSPToRegTaintPropagation(
359*06f32e7eSjoerg     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
360*06f32e7eSjoerg   // If full control flow speculation barriers are used, emit a control flow
361*06f32e7eSjoerg   // barrier to block potential miss-speculation in flight coming in to this
362*06f32e7eSjoerg   // function.
363*06f32e7eSjoerg   if (UseControlFlowSpeculationBarrier) {
364*06f32e7eSjoerg     insertFullSpeculationBarrier(MBB, MBBI, DebugLoc());
365*06f32e7eSjoerg     return;
366*06f32e7eSjoerg   }
367*06f32e7eSjoerg 
368*06f32e7eSjoerg   // CMP   SP, #0   === SUBS   xzr, SP, #0
369*06f32e7eSjoerg   BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri))
370*06f32e7eSjoerg       .addDef(AArch64::XZR)
371*06f32e7eSjoerg       .addUse(AArch64::SP)
372*06f32e7eSjoerg       .addImm(0)
373*06f32e7eSjoerg       .addImm(0); // no shift
374*06f32e7eSjoerg   // CSETM x16, NE  === CSINV  x16, xzr, xzr, EQ
375*06f32e7eSjoerg   BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr))
376*06f32e7eSjoerg       .addDef(MisspeculatingTaintReg)
377*06f32e7eSjoerg       .addUse(AArch64::XZR)
378*06f32e7eSjoerg       .addUse(AArch64::XZR)
379*06f32e7eSjoerg       .addImm(AArch64CC::EQ);
380*06f32e7eSjoerg }
381*06f32e7eSjoerg 
insertRegToSPTaintPropagation(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned TmpReg) const382*06f32e7eSjoerg void AArch64SpeculationHardening::insertRegToSPTaintPropagation(
383*06f32e7eSjoerg     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
384*06f32e7eSjoerg     unsigned TmpReg) const {
385*06f32e7eSjoerg   // If full control flow speculation barriers are used, there will not be
386*06f32e7eSjoerg   // miss-speculation when returning from this function, and therefore, also
387*06f32e7eSjoerg   // no need to encode potential miss-speculation into the stack pointer.
388*06f32e7eSjoerg   if (UseControlFlowSpeculationBarrier)
389*06f32e7eSjoerg     return;
390*06f32e7eSjoerg 
391*06f32e7eSjoerg   // mov   Xtmp, SP  === ADD  Xtmp, SP, #0
392*06f32e7eSjoerg   BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
393*06f32e7eSjoerg       .addDef(TmpReg)
394*06f32e7eSjoerg       .addUse(AArch64::SP)
395*06f32e7eSjoerg       .addImm(0)
396*06f32e7eSjoerg       .addImm(0); // no shift
397*06f32e7eSjoerg   // and   Xtmp, Xtmp, TaintReg === AND Xtmp, Xtmp, TaintReg, #0
398*06f32e7eSjoerg   BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs))
399*06f32e7eSjoerg       .addDef(TmpReg, RegState::Renamable)
400*06f32e7eSjoerg       .addUse(TmpReg, RegState::Kill | RegState::Renamable)
401*06f32e7eSjoerg       .addUse(MisspeculatingTaintReg, RegState::Kill)
402*06f32e7eSjoerg       .addImm(0);
403*06f32e7eSjoerg   // mov   SP, Xtmp === ADD SP, Xtmp, #0
404*06f32e7eSjoerg   BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
405*06f32e7eSjoerg       .addDef(AArch64::SP)
406*06f32e7eSjoerg       .addUse(TmpReg, RegState::Kill)
407*06f32e7eSjoerg       .addImm(0)
408*06f32e7eSjoerg       .addImm(0); // no shift
409*06f32e7eSjoerg }
410*06f32e7eSjoerg 
functionUsesHardeningRegister(MachineFunction & MF) const411*06f32e7eSjoerg bool AArch64SpeculationHardening::functionUsesHardeningRegister(
412*06f32e7eSjoerg     MachineFunction &MF) const {
413*06f32e7eSjoerg   for (MachineBasicBlock &MBB : MF) {
414*06f32e7eSjoerg     for (MachineInstr &MI : MBB) {
415*06f32e7eSjoerg       // treat function calls specially, as the hardening register does not
416*06f32e7eSjoerg       // need to remain live across function calls.
417*06f32e7eSjoerg       if (MI.isCall())
418*06f32e7eSjoerg         continue;
419*06f32e7eSjoerg       if (MI.readsRegister(MisspeculatingTaintReg, TRI) ||
420*06f32e7eSjoerg           MI.modifiesRegister(MisspeculatingTaintReg, TRI))
421*06f32e7eSjoerg         return true;
422*06f32e7eSjoerg     }
423*06f32e7eSjoerg   }
424*06f32e7eSjoerg   return false;
425*06f32e7eSjoerg }
426*06f32e7eSjoerg 
427*06f32e7eSjoerg // Make GPR register Reg speculation-safe by putting it through the
428*06f32e7eSjoerg // SpeculationSafeValue pseudo instruction, if we can't prove that
429*06f32e7eSjoerg // the value in the register has already been hardened.
makeGPRSpeculationSafe(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineInstr & MI,unsigned Reg)430*06f32e7eSjoerg bool AArch64SpeculationHardening::makeGPRSpeculationSafe(
431*06f32e7eSjoerg     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineInstr &MI,
432*06f32e7eSjoerg     unsigned Reg) {
433*06f32e7eSjoerg   assert(AArch64::GPR32allRegClass.contains(Reg) ||
434*06f32e7eSjoerg          AArch64::GPR64allRegClass.contains(Reg));
435*06f32e7eSjoerg 
436*06f32e7eSjoerg   // Loads cannot directly load a value into the SP (nor WSP).
437*06f32e7eSjoerg   // Therefore, if Reg is SP or WSP, it is because the instruction loads from
438*06f32e7eSjoerg   // the stack through the stack pointer.
439*06f32e7eSjoerg   //
440*06f32e7eSjoerg   // Since the stack pointer is never dynamically controllable, don't harden it.
441*06f32e7eSjoerg   if (Reg == AArch64::SP || Reg == AArch64::WSP)
442*06f32e7eSjoerg     return false;
443*06f32e7eSjoerg 
444*06f32e7eSjoerg   // Do not harden the register again if already hardened before.
445*06f32e7eSjoerg   if (RegsAlreadyMasked[Reg])
446*06f32e7eSjoerg     return false;
447*06f32e7eSjoerg 
448*06f32e7eSjoerg   const bool Is64Bit = AArch64::GPR64allRegClass.contains(Reg);
449*06f32e7eSjoerg   LLVM_DEBUG(dbgs() << "About to harden register : " << Reg << "\n");
450*06f32e7eSjoerg   BuildMI(MBB, MBBI, MI.getDebugLoc(),
451*06f32e7eSjoerg           TII->get(Is64Bit ? AArch64::SpeculationSafeValueX
452*06f32e7eSjoerg                            : AArch64::SpeculationSafeValueW))
453*06f32e7eSjoerg       .addDef(Reg)
454*06f32e7eSjoerg       .addUse(Reg);
455*06f32e7eSjoerg   RegsAlreadyMasked.set(Reg);
456*06f32e7eSjoerg   return true;
457*06f32e7eSjoerg }
458*06f32e7eSjoerg 
slhLoads(MachineBasicBlock & MBB)459*06f32e7eSjoerg bool AArch64SpeculationHardening::slhLoads(MachineBasicBlock &MBB) {
460*06f32e7eSjoerg   bool Modified = false;
461*06f32e7eSjoerg 
462*06f32e7eSjoerg   LLVM_DEBUG(dbgs() << "slhLoads running on MBB: " << MBB);
463*06f32e7eSjoerg 
464*06f32e7eSjoerg   RegsAlreadyMasked.reset();
465*06f32e7eSjoerg 
466*06f32e7eSjoerg   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
467*06f32e7eSjoerg   MachineBasicBlock::iterator NextMBBI;
468*06f32e7eSjoerg   for (; MBBI != E; MBBI = NextMBBI) {
469*06f32e7eSjoerg     MachineInstr &MI = *MBBI;
470*06f32e7eSjoerg     NextMBBI = std::next(MBBI);
471*06f32e7eSjoerg     // Only harden loaded values or addresses used in loads.
472*06f32e7eSjoerg     if (!MI.mayLoad())
473*06f32e7eSjoerg       continue;
474*06f32e7eSjoerg 
475*06f32e7eSjoerg     LLVM_DEBUG(dbgs() << "About to harden: " << MI);
476*06f32e7eSjoerg 
477*06f32e7eSjoerg     // For general purpose register loads, harden the registers loaded into.
478*06f32e7eSjoerg     // For other loads, harden the address loaded from.
479*06f32e7eSjoerg     // Masking the loaded value is expected to result in less performance
480*06f32e7eSjoerg     // overhead, as the load can still execute speculatively in comparison to
481*06f32e7eSjoerg     // when the address loaded from gets masked. However, masking is only
482*06f32e7eSjoerg     // easy to do efficiently on GPR registers, so for loads into non-GPR
483*06f32e7eSjoerg     // registers (e.g. floating point loads), mask the address loaded from.
484*06f32e7eSjoerg     bool AllDefsAreGPR = llvm::all_of(MI.defs(), [&](MachineOperand &Op) {
485*06f32e7eSjoerg       return Op.isReg() && (AArch64::GPR32allRegClass.contains(Op.getReg()) ||
486*06f32e7eSjoerg                             AArch64::GPR64allRegClass.contains(Op.getReg()));
487*06f32e7eSjoerg     });
488*06f32e7eSjoerg     // FIXME: it might be a worthwhile optimization to not mask loaded
489*06f32e7eSjoerg     // values if all the registers involved in address calculation are already
490*06f32e7eSjoerg     // hardened, leading to this load not able to execute on a miss-speculated
491*06f32e7eSjoerg     // path.
492*06f32e7eSjoerg     bool HardenLoadedData = AllDefsAreGPR;
493*06f32e7eSjoerg     bool HardenAddressLoadedFrom = !HardenLoadedData;
494*06f32e7eSjoerg 
495*06f32e7eSjoerg     // First remove registers from AlreadyMaskedRegisters if their value is
496*06f32e7eSjoerg     // updated by this instruction - it makes them contain a new value that is
497*06f32e7eSjoerg     // not guaranteed to already have been masked.
498*06f32e7eSjoerg     for (MachineOperand Op : MI.defs())
499*06f32e7eSjoerg       for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI)
500*06f32e7eSjoerg         RegsAlreadyMasked.reset(*AI);
501*06f32e7eSjoerg 
502*06f32e7eSjoerg     // FIXME: loads from the stack with an immediate offset from the stack
503*06f32e7eSjoerg     // pointer probably shouldn't be hardened, which could result in a
504*06f32e7eSjoerg     // significant optimization. See section "Don’t check loads from
505*06f32e7eSjoerg     // compile-time constant stack offsets", in
506*06f32e7eSjoerg     // https://llvm.org/docs/SpeculativeLoadHardening.html
507*06f32e7eSjoerg 
508*06f32e7eSjoerg     if (HardenLoadedData)
509*06f32e7eSjoerg       for (auto Def : MI.defs()) {
510*06f32e7eSjoerg         if (Def.isDead())
511*06f32e7eSjoerg           // Do not mask a register that is not used further.
512*06f32e7eSjoerg           continue;
513*06f32e7eSjoerg         // FIXME: For pre/post-increment addressing modes, the base register
514*06f32e7eSjoerg         // used in address calculation is also defined by this instruction.
515*06f32e7eSjoerg         // It might be a worthwhile optimization to not harden that
516*06f32e7eSjoerg         // base register increment/decrement when the increment/decrement is
517*06f32e7eSjoerg         // an immediate.
518*06f32e7eSjoerg         Modified |= makeGPRSpeculationSafe(MBB, NextMBBI, MI, Def.getReg());
519*06f32e7eSjoerg       }
520*06f32e7eSjoerg 
521*06f32e7eSjoerg     if (HardenAddressLoadedFrom)
522*06f32e7eSjoerg       for (auto Use : MI.uses()) {
523*06f32e7eSjoerg         if (!Use.isReg())
524*06f32e7eSjoerg           continue;
525*06f32e7eSjoerg         Register Reg = Use.getReg();
526*06f32e7eSjoerg         // Some loads of floating point data have implicit defs/uses on a
527*06f32e7eSjoerg         // super register of that floating point data. Some examples:
528*06f32e7eSjoerg         // $s0 = LDRSui $sp, 22, implicit-def $q0
529*06f32e7eSjoerg         // $q0 = LD1i64 $q0, 1, renamable $x0
530*06f32e7eSjoerg         // We need to filter out these uses for non-GPR register which occur
531*06f32e7eSjoerg         // because the load partially fills a non-GPR register with the loaded
532*06f32e7eSjoerg         // data. Just skipping all non-GPR registers is safe (for now) as all
533*06f32e7eSjoerg         // AArch64 load instructions only use GPR registers to perform the
534*06f32e7eSjoerg         // address calculation. FIXME: However that might change once we can
535*06f32e7eSjoerg         // produce SVE gather instructions.
536*06f32e7eSjoerg         if (!(AArch64::GPR32allRegClass.contains(Reg) ||
537*06f32e7eSjoerg               AArch64::GPR64allRegClass.contains(Reg)))
538*06f32e7eSjoerg           continue;
539*06f32e7eSjoerg         Modified |= makeGPRSpeculationSafe(MBB, MBBI, MI, Reg);
540*06f32e7eSjoerg       }
541*06f32e7eSjoerg   }
542*06f32e7eSjoerg   return Modified;
543*06f32e7eSjoerg }
544*06f32e7eSjoerg 
545*06f32e7eSjoerg /// \brief If MBBI references a pseudo instruction that should be expanded
546*06f32e7eSjoerg /// here, do the expansion and return true. Otherwise return false.
expandSpeculationSafeValue(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,bool UsesFullSpeculationBarrier)547*06f32e7eSjoerg bool AArch64SpeculationHardening::expandSpeculationSafeValue(
548*06f32e7eSjoerg     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
549*06f32e7eSjoerg     bool UsesFullSpeculationBarrier) {
550*06f32e7eSjoerg   MachineInstr &MI = *MBBI;
551*06f32e7eSjoerg   unsigned Opcode = MI.getOpcode();
552*06f32e7eSjoerg   bool Is64Bit = true;
553*06f32e7eSjoerg 
554*06f32e7eSjoerg   switch (Opcode) {
555*06f32e7eSjoerg   default:
556*06f32e7eSjoerg     break;
557*06f32e7eSjoerg   case AArch64::SpeculationSafeValueW:
558*06f32e7eSjoerg     Is64Bit = false;
559*06f32e7eSjoerg     LLVM_FALLTHROUGH;
560*06f32e7eSjoerg   case AArch64::SpeculationSafeValueX:
561*06f32e7eSjoerg     // Just remove the SpeculationSafe pseudo's if control flow
562*06f32e7eSjoerg     // miss-speculation isn't happening because we're already inserting barriers
563*06f32e7eSjoerg     // to guarantee that.
564*06f32e7eSjoerg     if (!UseControlFlowSpeculationBarrier && !UsesFullSpeculationBarrier) {
565*06f32e7eSjoerg       Register DstReg = MI.getOperand(0).getReg();
566*06f32e7eSjoerg       Register SrcReg = MI.getOperand(1).getReg();
567*06f32e7eSjoerg       // Mark this register and all its aliasing registers as needing to be
568*06f32e7eSjoerg       // value speculation hardened before its next use, by using a CSDB
569*06f32e7eSjoerg       // barrier instruction.
570*06f32e7eSjoerg       for (MachineOperand Op : MI.defs())
571*06f32e7eSjoerg         for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI)
572*06f32e7eSjoerg           RegsNeedingCSDBBeforeUse.set(*AI);
573*06f32e7eSjoerg 
574*06f32e7eSjoerg       // Mask off with taint state.
575*06f32e7eSjoerg       BuildMI(MBB, MBBI, MI.getDebugLoc(),
576*06f32e7eSjoerg               Is64Bit ? TII->get(AArch64::ANDXrs) : TII->get(AArch64::ANDWrs))
577*06f32e7eSjoerg           .addDef(DstReg)
578*06f32e7eSjoerg           .addUse(SrcReg, RegState::Kill)
579*06f32e7eSjoerg           .addUse(Is64Bit ? MisspeculatingTaintReg
580*06f32e7eSjoerg                           : MisspeculatingTaintReg32Bit)
581*06f32e7eSjoerg           .addImm(0);
582*06f32e7eSjoerg     }
583*06f32e7eSjoerg     MI.eraseFromParent();
584*06f32e7eSjoerg     return true;
585*06f32e7eSjoerg   }
586*06f32e7eSjoerg   return false;
587*06f32e7eSjoerg }
588*06f32e7eSjoerg 
insertCSDB(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc DL)589*06f32e7eSjoerg bool AArch64SpeculationHardening::insertCSDB(MachineBasicBlock &MBB,
590*06f32e7eSjoerg                                              MachineBasicBlock::iterator MBBI,
591*06f32e7eSjoerg                                              DebugLoc DL) {
592*06f32e7eSjoerg   assert(!UseControlFlowSpeculationBarrier && "No need to insert CSDBs when "
593*06f32e7eSjoerg                                               "control flow miss-speculation "
594*06f32e7eSjoerg                                               "is already blocked");
595*06f32e7eSjoerg   // insert data value speculation barrier (CSDB)
596*06f32e7eSjoerg   BuildMI(MBB, MBBI, DL, TII->get(AArch64::HINT)).addImm(0x14);
597*06f32e7eSjoerg   RegsNeedingCSDBBeforeUse.reset();
598*06f32e7eSjoerg   return true;
599*06f32e7eSjoerg }
600*06f32e7eSjoerg 
lowerSpeculationSafeValuePseudos(MachineBasicBlock & MBB,bool UsesFullSpeculationBarrier)601*06f32e7eSjoerg bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos(
602*06f32e7eSjoerg     MachineBasicBlock &MBB, bool UsesFullSpeculationBarrier) {
603*06f32e7eSjoerg   bool Modified = false;
604*06f32e7eSjoerg 
605*06f32e7eSjoerg   RegsNeedingCSDBBeforeUse.reset();
606*06f32e7eSjoerg 
607*06f32e7eSjoerg   // The following loop iterates over all instructions in the basic block,
608*06f32e7eSjoerg   // and performs 2 operations:
609*06f32e7eSjoerg   // 1. Insert a CSDB at this location if needed.
610*06f32e7eSjoerg   // 2. Expand the SpeculationSafeValuePseudo if the current instruction is
611*06f32e7eSjoerg   // one.
612*06f32e7eSjoerg   //
613*06f32e7eSjoerg   // The insertion of the CSDB is done as late as possible (i.e. just before
614*06f32e7eSjoerg   // the use of a masked register), in the hope that that will reduce the
615*06f32e7eSjoerg   // total number of CSDBs in a block when there are multiple masked registers
616*06f32e7eSjoerg   // in the block.
617*06f32e7eSjoerg   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
618*06f32e7eSjoerg   DebugLoc DL;
619*06f32e7eSjoerg   while (MBBI != E) {
620*06f32e7eSjoerg     MachineInstr &MI = *MBBI;
621*06f32e7eSjoerg     DL = MI.getDebugLoc();
622*06f32e7eSjoerg     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
623*06f32e7eSjoerg 
624*06f32e7eSjoerg     // First check if a CSDB needs to be inserted due to earlier registers
625*06f32e7eSjoerg     // that were masked and that are used by the next instruction.
626*06f32e7eSjoerg     // Also emit the barrier on any potential control flow changes.
627*06f32e7eSjoerg     bool NeedToEmitBarrier = false;
628*06f32e7eSjoerg     if (RegsNeedingCSDBBeforeUse.any() && (MI.isCall() || MI.isTerminator()))
629*06f32e7eSjoerg       NeedToEmitBarrier = true;
630*06f32e7eSjoerg     if (!NeedToEmitBarrier)
631*06f32e7eSjoerg       for (MachineOperand Op : MI.uses())
632*06f32e7eSjoerg         if (Op.isReg() && RegsNeedingCSDBBeforeUse[Op.getReg()]) {
633*06f32e7eSjoerg           NeedToEmitBarrier = true;
634*06f32e7eSjoerg           break;
635*06f32e7eSjoerg         }
636*06f32e7eSjoerg 
637*06f32e7eSjoerg     if (NeedToEmitBarrier && !UsesFullSpeculationBarrier)
638*06f32e7eSjoerg       Modified |= insertCSDB(MBB, MBBI, DL);
639*06f32e7eSjoerg 
640*06f32e7eSjoerg     Modified |=
641*06f32e7eSjoerg         expandSpeculationSafeValue(MBB, MBBI, UsesFullSpeculationBarrier);
642*06f32e7eSjoerg 
643*06f32e7eSjoerg     MBBI = NMBBI;
644*06f32e7eSjoerg   }
645*06f32e7eSjoerg 
646*06f32e7eSjoerg   if (RegsNeedingCSDBBeforeUse.any() && !UsesFullSpeculationBarrier)
647*06f32e7eSjoerg     Modified |= insertCSDB(MBB, MBBI, DL);
648*06f32e7eSjoerg 
649*06f32e7eSjoerg   return Modified;
650*06f32e7eSjoerg }
651*06f32e7eSjoerg 
runOnMachineFunction(MachineFunction & MF)652*06f32e7eSjoerg bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) {
653*06f32e7eSjoerg   if (!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
654*06f32e7eSjoerg     return false;
655*06f32e7eSjoerg 
656*06f32e7eSjoerg   MisspeculatingTaintReg = AArch64::X16;
657*06f32e7eSjoerg   MisspeculatingTaintReg32Bit = AArch64::W16;
658*06f32e7eSjoerg   TII = MF.getSubtarget().getInstrInfo();
659*06f32e7eSjoerg   TRI = MF.getSubtarget().getRegisterInfo();
660*06f32e7eSjoerg   RegsNeedingCSDBBeforeUse.resize(TRI->getNumRegs());
661*06f32e7eSjoerg   RegsAlreadyMasked.resize(TRI->getNumRegs());
662*06f32e7eSjoerg   UseControlFlowSpeculationBarrier = functionUsesHardeningRegister(MF);
663*06f32e7eSjoerg 
664*06f32e7eSjoerg   bool Modified = false;
665*06f32e7eSjoerg 
666*06f32e7eSjoerg   // Step 1: Enable automatic insertion of SpeculationSafeValue.
667*06f32e7eSjoerg   if (HardenLoads) {
668*06f32e7eSjoerg     LLVM_DEBUG(
669*06f32e7eSjoerg         dbgs() << "***** AArch64SpeculationHardening - automatic insertion of "
670*06f32e7eSjoerg                   "SpeculationSafeValue intrinsics *****\n");
671*06f32e7eSjoerg     for (auto &MBB : MF)
672*06f32e7eSjoerg       Modified |= slhLoads(MBB);
673*06f32e7eSjoerg   }
674*06f32e7eSjoerg 
675*06f32e7eSjoerg   // 2. Add instrumentation code to function entry and exits.
676*06f32e7eSjoerg   LLVM_DEBUG(
677*06f32e7eSjoerg       dbgs()
678*06f32e7eSjoerg       << "***** AArch64SpeculationHardening - track control flow *****\n");
679*06f32e7eSjoerg 
680*06f32e7eSjoerg   SmallVector<MachineBasicBlock *, 2> EntryBlocks;
681*06f32e7eSjoerg   EntryBlocks.push_back(&MF.front());
682*06f32e7eSjoerg   for (const LandingPadInfo &LPI : MF.getLandingPads())
683*06f32e7eSjoerg     EntryBlocks.push_back(LPI.LandingPadBlock);
684*06f32e7eSjoerg   for (auto Entry : EntryBlocks)
685*06f32e7eSjoerg     insertSPToRegTaintPropagation(
686*06f32e7eSjoerg         *Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin()));
687*06f32e7eSjoerg 
688*06f32e7eSjoerg   // 3. Add instrumentation code to every basic block.
689*06f32e7eSjoerg   for (auto &MBB : MF) {
690*06f32e7eSjoerg     bool UsesFullSpeculationBarrier = false;
691*06f32e7eSjoerg     Modified |= instrumentControlFlow(MBB, UsesFullSpeculationBarrier);
692*06f32e7eSjoerg     Modified |=
693*06f32e7eSjoerg         lowerSpeculationSafeValuePseudos(MBB, UsesFullSpeculationBarrier);
694*06f32e7eSjoerg   }
695*06f32e7eSjoerg 
696*06f32e7eSjoerg   return Modified;
697*06f32e7eSjoerg }
698*06f32e7eSjoerg 
699*06f32e7eSjoerg /// \brief Returns an instance of the pseudo instruction expansion pass.
createAArch64SpeculationHardeningPass()700*06f32e7eSjoerg FunctionPass *llvm::createAArch64SpeculationHardeningPass() {
701*06f32e7eSjoerg   return new AArch64SpeculationHardening();
702*06f32e7eSjoerg }
703