1 //===------- ShadowCallStack.cpp - Shadow Call Stack pass -----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The ShadowCallStack pass instruments function prologs/epilogs to check that
11 // the return address has not been corrupted during the execution of the
12 // function. The return address is stored in a 'shadow call stack' addressed
13 // using the %gs segment register.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "X86.h"
18 #include "X86InstrBuilder.h"
19 #include "X86InstrInfo.h"
20 #include "X86Subtarget.h"
21 
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineModuleInfo.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/Passes.h"
28 #include "llvm/CodeGen/TargetInstrInfo.h"
29 #include "llvm/Pass.h"
30 #include "llvm/Support/raw_ostream.h"
31 
32 using namespace llvm;
33 
34 namespace {
35 
36 class ShadowCallStack : public MachineFunctionPass {
37 public:
38   static char ID;
39 
ShadowCallStack()40   ShadowCallStack() : MachineFunctionPass(ID) {
41     initializeShadowCallStackPass(*PassRegistry::getPassRegistry());
42   }
43 
getAnalysisUsage(AnalysisUsage & AU) const44   void getAnalysisUsage(AnalysisUsage &AU) const override {
45     MachineFunctionPass::getAnalysisUsage(AU);
46   }
47 
48   bool runOnMachineFunction(MachineFunction &Fn) override;
49 
50 private:
51   // Do not instrument leaf functions with this many or fewer instructions. The
52   // shadow call stack instrumented prolog/epilog are slightly race-y reading
53   // and checking the saved return address, so it is better to not instrument
54   // functions that have fewer instructions than the instrumented prolog/epilog
55   // race.
56   static const size_t SkipLeafInstructions = 3;
57 };
58 
59 char ShadowCallStack::ID = 0;
60 } // end anonymous namespace.
61 
62 static void addProlog(MachineFunction &Fn, const TargetInstrInfo *TII,
63                       MachineBasicBlock &MBB, const DebugLoc &DL);
64 static void addPrologLeaf(MachineFunction &Fn, const TargetInstrInfo *TII,
65                           MachineBasicBlock &MBB, const DebugLoc &DL,
66                           MCPhysReg FreeRegister);
67 
68 static void addEpilog(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
69                       MachineInstr &MI, MachineBasicBlock &TrapBB);
70 static void addEpilogLeaf(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
71                           MachineInstr &MI, MachineBasicBlock &TrapBB,
72                           MCPhysReg FreeRegister);
73 // Generate a longer epilog that only uses r10 when a tailcall branches to r11.
74 static void addEpilogOnlyR10(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
75                              MachineInstr &MI, MachineBasicBlock &TrapBB);
76 
77 // Helper function to add ModR/M references for [Seg: Reg + Offset] memory
78 // accesses
79 static inline const MachineInstrBuilder &
addSegmentedMem(const MachineInstrBuilder & MIB,MCPhysReg Seg,MCPhysReg Reg,int Offset=0)80 addSegmentedMem(const MachineInstrBuilder &MIB, MCPhysReg Seg, MCPhysReg Reg,
81                 int Offset = 0) {
82   return MIB.addReg(Reg).addImm(1).addReg(0).addImm(Offset).addReg(Seg);
83 }
84 
addProlog(MachineFunction & Fn,const TargetInstrInfo * TII,MachineBasicBlock & MBB,const DebugLoc & DL)85 static void addProlog(MachineFunction &Fn, const TargetInstrInfo *TII,
86                       MachineBasicBlock &MBB, const DebugLoc &DL) {
87   const MCPhysReg ReturnReg = X86::R10;
88   const MCPhysReg OffsetReg = X86::R11;
89 
90   auto MBBI = MBB.begin();
91   // mov r10, [rsp]
92   addDirectMem(BuildMI(MBB, MBBI, DL, TII->get(X86::MOV64rm)).addDef(ReturnReg),
93                X86::RSP);
94   // xor r11, r11
95   BuildMI(MBB, MBBI, DL, TII->get(X86::XOR64rr))
96       .addDef(OffsetReg)
97       .addReg(OffsetReg, RegState::Undef)
98       .addReg(OffsetReg, RegState::Undef);
99   // add QWORD [gs:r11], 8
100   addSegmentedMem(BuildMI(MBB, MBBI, DL, TII->get(X86::ADD64mi8)), X86::GS,
101                   OffsetReg)
102       .addImm(8);
103   // mov r11, [gs:r11]
104   addSegmentedMem(
105       BuildMI(MBB, MBBI, DL, TII->get(X86::MOV64rm)).addDef(OffsetReg), X86::GS,
106       OffsetReg);
107   // mov [gs:r11], r10
108   addSegmentedMem(BuildMI(MBB, MBBI, DL, TII->get(X86::MOV64mr)), X86::GS,
109                   OffsetReg)
110       .addReg(ReturnReg);
111 }
112 
addPrologLeaf(MachineFunction & Fn,const TargetInstrInfo * TII,MachineBasicBlock & MBB,const DebugLoc & DL,MCPhysReg FreeRegister)113 static void addPrologLeaf(MachineFunction &Fn, const TargetInstrInfo *TII,
114                           MachineBasicBlock &MBB, const DebugLoc &DL,
115                           MCPhysReg FreeRegister) {
116   // mov REG, [rsp]
117   addDirectMem(BuildMI(MBB, MBB.begin(), DL, TII->get(X86::MOV64rm))
118                    .addDef(FreeRegister),
119                X86::RSP);
120 }
121 
addEpilog(const TargetInstrInfo * TII,MachineBasicBlock & MBB,MachineInstr & MI,MachineBasicBlock & TrapBB)122 static void addEpilog(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
123                       MachineInstr &MI, MachineBasicBlock &TrapBB) {
124   const DebugLoc &DL = MI.getDebugLoc();
125 
126   // xor r11, r11
127   BuildMI(MBB, MI, DL, TII->get(X86::XOR64rr))
128       .addDef(X86::R11)
129       .addReg(X86::R11, RegState::Undef)
130       .addReg(X86::R11, RegState::Undef);
131   // mov r10, [gs:r11]
132   addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10),
133                   X86::GS, X86::R11);
134   // mov r10, [gs:r10]
135   addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10),
136                   X86::GS, X86::R10);
137   // sub QWORD [gs:r11], 8
138   // This instruction should not be moved up to avoid a signal race.
139   addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::SUB64mi8)),
140                   X86::GS, X86::R11)
141       .addImm(8);
142   // cmp [rsp], r10
143   addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP)
144       .addReg(X86::R10);
145   // jne trap
146   BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(&TrapBB);
147   MBB.addSuccessor(&TrapBB);
148 }
149 
addEpilogLeaf(const TargetInstrInfo * TII,MachineBasicBlock & MBB,MachineInstr & MI,MachineBasicBlock & TrapBB,MCPhysReg FreeRegister)150 static void addEpilogLeaf(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
151                           MachineInstr &MI, MachineBasicBlock &TrapBB,
152                           MCPhysReg FreeRegister) {
153   const DebugLoc &DL = MI.getDebugLoc();
154 
155   // cmp [rsp], REG
156   addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP)
157       .addReg(FreeRegister);
158   // jne trap
159   BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(&TrapBB);
160   MBB.addSuccessor(&TrapBB);
161 }
162 
addEpilogOnlyR10(const TargetInstrInfo * TII,MachineBasicBlock & MBB,MachineInstr & MI,MachineBasicBlock & TrapBB)163 static void addEpilogOnlyR10(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
164                              MachineInstr &MI, MachineBasicBlock &TrapBB) {
165   const DebugLoc &DL = MI.getDebugLoc();
166 
167   // xor r10, r10
168   BuildMI(MBB, MI, DL, TII->get(X86::XOR64rr))
169       .addDef(X86::R10)
170       .addReg(X86::R10, RegState::Undef)
171       .addReg(X86::R10, RegState::Undef);
172   // mov r10, [gs:r10]
173   addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10),
174                   X86::GS, X86::R10);
175   // mov r10, [gs:r10]
176   addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10),
177                   X86::GS, X86::R10);
178   // sub QWORD [gs:0], 8
179   // This instruction should not be moved up to avoid a signal race.
180   addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::SUB64mi8)), X86::GS, 0)
181       .addImm(8);
182   // cmp [rsp], r10
183   addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP)
184       .addReg(X86::R10);
185   // jne trap
186   BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(&TrapBB);
187   MBB.addSuccessor(&TrapBB);
188 }
189 
runOnMachineFunction(MachineFunction & Fn)190 bool ShadowCallStack::runOnMachineFunction(MachineFunction &Fn) {
191   if (!Fn.getFunction().hasFnAttribute(Attribute::ShadowCallStack) ||
192       Fn.getFunction().hasFnAttribute(Attribute::Naked))
193     return false;
194 
195   if (Fn.empty() || !Fn.getRegInfo().tracksLiveness())
196     return false;
197 
198   // FIXME: Skip functions that have r10 or r11 live on entry (r10 can be live
199   // on entry for parameters with the nest attribute.)
200   if (Fn.front().isLiveIn(X86::R10) || Fn.front().isLiveIn(X86::R11))
201     return false;
202 
203   // FIXME: Skip functions with conditional and r10 tail calls for now.
204   bool HasReturn = false;
205   for (auto &MBB : Fn) {
206     if (MBB.empty())
207       continue;
208 
209     const MachineInstr &MI = MBB.instr_back();
210     if (MI.isReturn())
211       HasReturn = true;
212 
213     if (MI.isReturn() && MI.isCall()) {
214       if (MI.findRegisterUseOperand(X86::EFLAGS))
215         return false;
216       // This should only be possible on Windows 64 (see GR64_TC versus
217       // GR64_TCW64.)
218       if (MI.findRegisterUseOperand(X86::R10) ||
219           MI.hasRegisterImplicitUseOperand(X86::R10))
220         return false;
221     }
222   }
223 
224   if (!HasReturn)
225     return false;
226 
227   // For leaf functions:
228   // 1. Do not instrument very short functions where it would not improve that
229   //    function's security.
230   // 2. Detect if there is an unused caller-saved register we can reserve to
231   //    hold the return address instead of writing/reading it from the shadow
232   //    call stack.
233   MCPhysReg LeafFuncRegister = X86::NoRegister;
234   if (!Fn.getFrameInfo().adjustsStack()) {
235     size_t InstructionCount = 0;
236     std::bitset<X86::NUM_TARGET_REGS> UsedRegs;
237     for (auto &MBB : Fn) {
238       for (auto &LiveIn : MBB.liveins())
239         UsedRegs.set(LiveIn.PhysReg);
240       for (auto &MI : MBB) {
241         if (!MI.isDebugValue() && !MI.isCFIInstruction() && !MI.isLabel())
242           InstructionCount++;
243         for (auto &Op : MI.operands())
244           if (Op.isReg() && Op.isDef())
245             UsedRegs.set(Op.getReg());
246       }
247     }
248 
249     if (InstructionCount <= SkipLeafInstructions)
250       return false;
251 
252     std::bitset<X86::NUM_TARGET_REGS> CalleeSavedRegs;
253     const MCPhysReg *CSRegs = Fn.getRegInfo().getCalleeSavedRegs();
254     for (size_t i = 0; CSRegs[i]; i++)
255       CalleeSavedRegs.set(CSRegs[i]);
256 
257     const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
258     for (auto &Reg : X86::GR64_NOSPRegClass.getRegisters()) {
259       // FIXME: Optimization opportunity: spill/restore a callee-saved register
260       // if a caller-saved register is unavailable.
261       if (CalleeSavedRegs.test(Reg))
262         continue;
263 
264       bool Used = false;
265       for (MCSubRegIterator SR(Reg, TRI, true); SR.isValid(); ++SR)
266         if ((Used = UsedRegs.test(*SR)))
267           break;
268 
269       if (!Used) {
270         LeafFuncRegister = Reg;
271         break;
272       }
273     }
274   }
275 
276   const bool LeafFuncOptimization = LeafFuncRegister != X86::NoRegister;
277   if (LeafFuncOptimization)
278     // Mark the leaf function register live-in for all MBBs except the entry MBB
279     for (auto I = ++Fn.begin(), E = Fn.end(); I != E; ++I)
280       I->addLiveIn(LeafFuncRegister);
281 
282   MachineBasicBlock &MBB = Fn.front();
283   const MachineBasicBlock *NonEmpty = MBB.empty() ? MBB.getFallThrough() : &MBB;
284   const DebugLoc &DL = NonEmpty->front().getDebugLoc();
285 
286   const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
287   if (LeafFuncOptimization)
288     addPrologLeaf(Fn, TII, MBB, DL, LeafFuncRegister);
289   else
290     addProlog(Fn, TII, MBB, DL);
291 
292   MachineBasicBlock *Trap = nullptr;
293   for (auto &MBB : Fn) {
294     if (MBB.empty())
295       continue;
296 
297     MachineInstr &MI = MBB.instr_back();
298     if (MI.isReturn()) {
299       if (!Trap) {
300         Trap = Fn.CreateMachineBasicBlock();
301         BuildMI(Trap, MI.getDebugLoc(), TII->get(X86::TRAP));
302         Fn.push_back(Trap);
303       }
304 
305       if (LeafFuncOptimization)
306         addEpilogLeaf(TII, MBB, MI, *Trap, LeafFuncRegister);
307       else if (MI.findRegisterUseOperand(X86::R11))
308         addEpilogOnlyR10(TII, MBB, MI, *Trap);
309       else
310         addEpilog(TII, MBB, MI, *Trap);
311     }
312   }
313 
314   return true;
315 }
316 
317 INITIALIZE_PASS(ShadowCallStack, "shadow-call-stack", "Shadow Call Stack",
318                 false, false)
319 
createShadowCallStackPass()320 FunctionPass *llvm::createShadowCallStackPass() {
321   return new ShadowCallStack();
322 }
323