1 //===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the ARM implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "ARMFrameLowering.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMBaseRegisterInfo.h"
16 #include "ARMConstantPoolValue.h"
17 #include "ARMMachineFunctionInfo.h"
18 #include "ARMSubtarget.h"
19 #include "MCTargetDesc/ARMAddressingModes.h"
20 #include "MCTargetDesc/ARMBaseInfo.h"
21 #include "Utils/ARMBaseInfo.h"
22 #include "llvm/ADT/BitVector.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/CodeGen/MachineBasicBlock.h"
27 #include "llvm/CodeGen/MachineConstantPool.h"
28 #include "llvm/CodeGen/MachineFrameInfo.h"
29 #include "llvm/CodeGen/MachineFunction.h"
30 #include "llvm/CodeGen/MachineInstr.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineJumpTableInfo.h"
33 #include "llvm/CodeGen/MachineModuleInfo.h"
34 #include "llvm/CodeGen/MachineOperand.h"
35 #include "llvm/CodeGen/MachineRegisterInfo.h"
36 #include "llvm/CodeGen/RegisterScavenging.h"
37 #include "llvm/CodeGen/TargetInstrInfo.h"
38 #include "llvm/CodeGen/TargetOpcodes.h"
39 #include "llvm/CodeGen/TargetRegisterInfo.h"
40 #include "llvm/CodeGen/TargetSubtargetInfo.h"
41 #include "llvm/IR/Attributes.h"
42 #include "llvm/IR/CallingConv.h"
43 #include "llvm/IR/DebugLoc.h"
44 #include "llvm/IR/Function.h"
45 #include "llvm/MC/MCContext.h"
46 #include "llvm/MC/MCDwarf.h"
47 #include "llvm/MC/MCInstrDesc.h"
48 #include "llvm/MC/MCRegisterInfo.h"
49 #include "llvm/Support/CodeGen.h"
50 #include "llvm/Support/CommandLine.h"
51 #include "llvm/Support/Compiler.h"
52 #include "llvm/Support/Debug.h"
53 #include "llvm/Support/ErrorHandling.h"
54 #include "llvm/Support/MathExtras.h"
55 #include "llvm/Support/raw_ostream.h"
56 #include "llvm/Target/TargetMachine.h"
57 #include "llvm/Target/TargetOptions.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstddef>
61 #include <cstdint>
62 #include <iterator>
63 #include <utility>
64 #include <vector>
65
66 #define DEBUG_TYPE "arm-frame-lowering"
67
68 using namespace llvm;
69
70 static cl::opt<bool>
71 SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
72 cl::desc("Align ARM NEON spills in prolog and epilog"));
73
74 static MachineBasicBlock::iterator
75 skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
76 unsigned NumAlignedDPRCS2Regs);
77
ARMFrameLowering(const ARMSubtarget & sti)78 ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
79 : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
80 STI(sti) {}
81
keepFramePointer(const MachineFunction & MF) const82 bool ARMFrameLowering::keepFramePointer(const MachineFunction &MF) const {
83 // iOS always has a FP for backtracking, force other targets to keep their FP
84 // when doing FastISel. The emitted code is currently superior, and in cases
85 // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
86 return MF.getSubtarget<ARMSubtarget>().useFastISel();
87 }
88
89 /// Returns true if the target can safely skip saving callee-saved registers
90 /// for noreturn nounwind functions.
enableCalleeSaveSkip(const MachineFunction & MF) const91 bool ARMFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const {
92 assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
93 MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
94 !MF.getFunction().hasFnAttribute(Attribute::UWTable));
95
96 // Frame pointer and link register are not treated as normal CSR, thus we
97 // can always skip CSR saves for nonreturning functions.
98 return true;
99 }
100
101 /// hasFP - Return true if the specified function should have a dedicated frame
102 /// pointer register. This is true if the function has variable sized allocas
103 /// or if frame pointer elimination is disabled.
hasFP(const MachineFunction & MF) const104 bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
105 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
106 const MachineFrameInfo &MFI = MF.getFrameInfo();
107
108 // ABI-required frame pointer.
109 if (MF.getTarget().Options.DisableFramePointerElim(MF))
110 return true;
111
112 // Frame pointer required for use within this function.
113 return (RegInfo->needsStackRealignment(MF) ||
114 MFI.hasVarSizedObjects() ||
115 MFI.isFrameAddressTaken());
116 }
117
118 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
119 /// not required, we reserve argument space for call sites in the function
120 /// immediately on entry to the current function. This eliminates the need for
121 /// add/sub sp brackets around call sites. Returns true if the call frame is
122 /// included as part of the stack frame.
hasReservedCallFrame(const MachineFunction & MF) const123 bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
124 const MachineFrameInfo &MFI = MF.getFrameInfo();
125 unsigned CFSize = MFI.getMaxCallFrameSize();
126 // It's not always a good idea to include the call frame as part of the
127 // stack frame. ARM (especially Thumb) has small immediate offset to
128 // address the stack frame. So a large call frame can cause poor codegen
129 // and may even makes it impossible to scavenge a register.
130 if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
131 return false;
132
133 return !MFI.hasVarSizedObjects();
134 }
135
136 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
137 /// call frame pseudos can be simplified. Unlike most targets, having a FP
138 /// is not sufficient here since we still may reference some objects via SP
139 /// even when FP is available in Thumb2 mode.
140 bool
canSimplifyCallFramePseudos(const MachineFunction & MF) const141 ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
142 return hasReservedCallFrame(MF) || MF.getFrameInfo().hasVarSizedObjects();
143 }
144
emitRegPlusImmediate(bool isARM,MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,const DebugLoc & dl,const ARMBaseInstrInfo & TII,unsigned DestReg,unsigned SrcReg,int NumBytes,unsigned MIFlags=MachineInstr::NoFlags,ARMCC::CondCodes Pred=ARMCC::AL,unsigned PredReg=0)145 static void emitRegPlusImmediate(
146 bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
147 const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
148 unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
149 ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
150 if (isARM)
151 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
152 Pred, PredReg, TII, MIFlags);
153 else
154 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
155 Pred, PredReg, TII, MIFlags);
156 }
157
emitSPUpdate(bool isARM,MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,const DebugLoc & dl,const ARMBaseInstrInfo & TII,int NumBytes,unsigned MIFlags=MachineInstr::NoFlags,ARMCC::CondCodes Pred=ARMCC::AL,unsigned PredReg=0)158 static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
159 MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
160 const ARMBaseInstrInfo &TII, int NumBytes,
161 unsigned MIFlags = MachineInstr::NoFlags,
162 ARMCC::CondCodes Pred = ARMCC::AL,
163 unsigned PredReg = 0) {
164 emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
165 MIFlags, Pred, PredReg);
166 }
167
sizeOfSPAdjustment(const MachineInstr & MI)168 static int sizeOfSPAdjustment(const MachineInstr &MI) {
169 int RegSize;
170 switch (MI.getOpcode()) {
171 case ARM::VSTMDDB_UPD:
172 RegSize = 8;
173 break;
174 case ARM::STMDB_UPD:
175 case ARM::t2STMDB_UPD:
176 RegSize = 4;
177 break;
178 case ARM::t2STR_PRE:
179 case ARM::STR_PRE_IMM:
180 return 4;
181 default:
182 llvm_unreachable("Unknown push or pop like instruction");
183 }
184
185 int count = 0;
186 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
187 // pred) so the list starts at 4.
188 for (int i = MI.getNumOperands() - 1; i >= 4; --i)
189 count += RegSize;
190 return count;
191 }
192
WindowsRequiresStackProbe(const MachineFunction & MF,size_t StackSizeInBytes)193 static bool WindowsRequiresStackProbe(const MachineFunction &MF,
194 size_t StackSizeInBytes) {
195 const MachineFrameInfo &MFI = MF.getFrameInfo();
196 const Function &F = MF.getFunction();
197 unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
198 if (F.hasFnAttribute("stack-probe-size"))
199 F.getFnAttribute("stack-probe-size")
200 .getValueAsString()
201 .getAsInteger(0, StackProbeSize);
202 return (StackSizeInBytes >= StackProbeSize) &&
203 !F.hasFnAttribute("no-stack-arg-probe");
204 }
205
206 namespace {
207
208 struct StackAdjustingInsts {
209 struct InstInfo {
210 MachineBasicBlock::iterator I;
211 unsigned SPAdjust;
212 bool BeforeFPSet;
213 };
214
215 SmallVector<InstInfo, 4> Insts;
216
addInst__anon361d50ef0111::StackAdjustingInsts217 void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
218 bool BeforeFPSet = false) {
219 InstInfo Info = {I, SPAdjust, BeforeFPSet};
220 Insts.push_back(Info);
221 }
222
addExtraBytes__anon361d50ef0111::StackAdjustingInsts223 void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
224 auto Info =
225 llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
226 assert(Info != Insts.end() && "invalid sp adjusting instruction");
227 Info->SPAdjust += ExtraBytes;
228 }
229
emitDefCFAOffsets__anon361d50ef0111::StackAdjustingInsts230 void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
231 const ARMBaseInstrInfo &TII, bool HasFP) {
232 MachineFunction &MF = *MBB.getParent();
233 unsigned CFAOffset = 0;
234 for (auto &Info : Insts) {
235 if (HasFP && !Info.BeforeFPSet)
236 return;
237
238 CFAOffset += Info.SPAdjust;
239 unsigned CFIIndex = MF.addFrameInst(
240 MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
241 BuildMI(MBB, std::next(Info.I), dl,
242 TII.get(TargetOpcode::CFI_INSTRUCTION))
243 .addCFIIndex(CFIIndex)
244 .setMIFlags(MachineInstr::FrameSetup);
245 }
246 }
247 };
248
249 } // end anonymous namespace
250
251 /// Emit an instruction sequence that will align the address in
252 /// register Reg by zero-ing out the lower bits. For versions of the
253 /// architecture that support Neon, this must be done in a single
254 /// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
255 /// single instruction. That function only gets called when optimizing
256 /// spilling of D registers on a core with the Neon instruction set
257 /// present.
emitAligningInstructions(MachineFunction & MF,ARMFunctionInfo * AFI,const TargetInstrInfo & TII,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,const unsigned Reg,const Align Alignment,const bool MustBeSingleInstruction)258 static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
259 const TargetInstrInfo &TII,
260 MachineBasicBlock &MBB,
261 MachineBasicBlock::iterator MBBI,
262 const DebugLoc &DL, const unsigned Reg,
263 const Align Alignment,
264 const bool MustBeSingleInstruction) {
265 const ARMSubtarget &AST =
266 static_cast<const ARMSubtarget &>(MF.getSubtarget());
267 const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
268 const unsigned AlignMask = Alignment.value() - 1U;
269 const unsigned NrBitsToZero = Log2(Alignment);
270 assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
271 if (!AFI->isThumbFunction()) {
272 // if the BFC instruction is available, use that to zero the lower
273 // bits:
274 // bfc Reg, #0, log2(Alignment)
275 // otherwise use BIC, if the mask to zero the required number of bits
276 // can be encoded in the bic immediate field
277 // bic Reg, Reg, Alignment-1
278 // otherwise, emit
279 // lsr Reg, Reg, log2(Alignment)
280 // lsl Reg, Reg, log2(Alignment)
281 if (CanUseBFC) {
282 BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
283 .addReg(Reg, RegState::Kill)
284 .addImm(~AlignMask)
285 .add(predOps(ARMCC::AL));
286 } else if (AlignMask <= 255) {
287 BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
288 .addReg(Reg, RegState::Kill)
289 .addImm(AlignMask)
290 .add(predOps(ARMCC::AL))
291 .add(condCodeOp());
292 } else {
293 assert(!MustBeSingleInstruction &&
294 "Shouldn't call emitAligningInstructions demanding a single "
295 "instruction to be emitted for large stack alignment for a target "
296 "without BFC.");
297 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
298 .addReg(Reg, RegState::Kill)
299 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
300 .add(predOps(ARMCC::AL))
301 .add(condCodeOp());
302 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
303 .addReg(Reg, RegState::Kill)
304 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
305 .add(predOps(ARMCC::AL))
306 .add(condCodeOp());
307 }
308 } else {
309 // Since this is only reached for Thumb-2 targets, the BFC instruction
310 // should always be available.
311 assert(CanUseBFC);
312 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
313 .addReg(Reg, RegState::Kill)
314 .addImm(~AlignMask)
315 .add(predOps(ARMCC::AL));
316 }
317 }
318
319 /// We need the offset of the frame pointer relative to other MachineFrameInfo
320 /// offsets which are encoded relative to SP at function begin.
321 /// See also emitPrologue() for how the FP is set up.
322 /// Unfortunately we cannot determine this value in determineCalleeSaves() yet
323 /// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
324 /// this to produce a conservative estimate that we check in an assert() later.
getMaxFPOffset(const ARMSubtarget & STI,const ARMFunctionInfo & AFI)325 static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI) {
326 // For Thumb1, push.w isn't available, so the first push will always push
327 // r7 and lr onto the stack first.
328 if (AFI.isThumb1OnlyFunction())
329 return -AFI.getArgRegsSaveSize() - (2 * 4);
330 // This is a conservative estimation: Assume the frame pointer being r7 and
331 // pc("r15") up to r8 getting spilled before (= 8 registers).
332 int FPCXTSaveSize = (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
333 return - FPCXTSaveSize - AFI.getArgRegsSaveSize() - (8 * 4);
334 }
335
emitPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const336 void ARMFrameLowering::emitPrologue(MachineFunction &MF,
337 MachineBasicBlock &MBB) const {
338 MachineBasicBlock::iterator MBBI = MBB.begin();
339 MachineFrameInfo &MFI = MF.getFrameInfo();
340 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
341 MachineModuleInfo &MMI = MF.getMMI();
342 MCContext &Context = MMI.getContext();
343 const TargetMachine &TM = MF.getTarget();
344 const MCRegisterInfo *MRI = Context.getRegisterInfo();
345 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
346 const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
347 assert(!AFI->isThumb1OnlyFunction() &&
348 "This emitPrologue does not support Thumb1!");
349 bool isARM = !AFI->isThumbFunction();
350 Align Alignment = STI.getFrameLowering()->getStackAlign();
351 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
352 unsigned NumBytes = MFI.getStackSize();
353 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
354 int FPCXTSaveSize = 0;
355
356 // Debug location must be unknown since the first debug location is used
357 // to determine the end of the prologue.
358 DebugLoc dl;
359
360 Register FramePtr = RegInfo->getFrameRegister(MF);
361
362 // Determine the sizes of each callee-save spill areas and record which frame
363 // belongs to which callee-save spill areas.
364 unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
365 int FramePtrSpillFI = 0;
366 int D8SpillFI = 0;
367
368 // All calls are tail calls in GHC calling conv, and functions have no
369 // prologue/epilogue.
370 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
371 return;
372
373 StackAdjustingInsts DefCFAOffsetCandidates;
374 bool HasFP = hasFP(MF);
375
376 // Allocate the vararg register save area.
377 if (ArgRegsSaveSize) {
378 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
379 MachineInstr::FrameSetup);
380 DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true);
381 }
382
383 if (!AFI->hasStackFrame() &&
384 (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
385 if (NumBytes - ArgRegsSaveSize != 0) {
386 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),
387 MachineInstr::FrameSetup);
388 DefCFAOffsetCandidates.addInst(std::prev(MBBI),
389 NumBytes - ArgRegsSaveSize, true);
390 }
391 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
392 return;
393 }
394
395 // Determine spill area sizes.
396 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
397 unsigned Reg = CSI[i].getReg();
398 int FI = CSI[i].getFrameIdx();
399 switch (Reg) {
400 case ARM::R8:
401 case ARM::R9:
402 case ARM::R10:
403 case ARM::R11:
404 case ARM::R12:
405 if (STI.splitFramePushPop(MF)) {
406 GPRCS2Size += 4;
407 break;
408 }
409 LLVM_FALLTHROUGH;
410 case ARM::R0:
411 case ARM::R1:
412 case ARM::R2:
413 case ARM::R3:
414 case ARM::R4:
415 case ARM::R5:
416 case ARM::R6:
417 case ARM::R7:
418 case ARM::LR:
419 if (Reg == FramePtr)
420 FramePtrSpillFI = FI;
421 GPRCS1Size += 4;
422 break;
423 case ARM::FPCXTNS:
424 FPCXTSaveSize = 4;
425 break;
426 default:
427 // This is a DPR. Exclude the aligned DPRCS2 spills.
428 if (Reg == ARM::D8)
429 D8SpillFI = FI;
430 if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
431 DPRCSSize += 8;
432 }
433 }
434
435 // Move past FPCXT area.
436 MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
437 if (FPCXTSaveSize > 0) {
438 LastPush = MBBI++;
439 DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);
440 }
441
442 // Move past area 1.
443 if (GPRCS1Size > 0) {
444 GPRCS1Push = LastPush = MBBI++;
445 DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
446 }
447
448 // Determine starting offsets of spill areas.
449 unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
450 unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
451 unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
452 Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
453 unsigned DPRGapSize =
454 (GPRCS1Size + GPRCS2Size + FPCXTSaveSize + ArgRegsSaveSize) %
455 DPRAlign.value();
456
457 unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
458 int FramePtrOffsetInPush = 0;
459 if (HasFP) {
460 int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
461 assert(getMaxFPOffset(STI, *AFI) <= FPOffset &&
462 "Max FP estimation is wrong");
463 FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
464 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
465 NumBytes);
466 }
467 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
468 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
469 AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
470
471 // Move past area 2.
472 if (GPRCS2Size > 0) {
473 GPRCS2Push = LastPush = MBBI++;
474 DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
475 }
476
477 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
478 // .cfi_offset operations will reflect that.
479 if (DPRGapSize) {
480 assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
481 if (LastPush != MBB.end() &&
482 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
483 DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
484 else {
485 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
486 MachineInstr::FrameSetup);
487 DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
488 }
489 }
490
491 // Move past area 3.
492 if (DPRCSSize > 0) {
493 // Since vpush register list cannot have gaps, there may be multiple vpush
494 // instructions in the prologue.
495 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
496 DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
497 LastPush = MBBI++;
498 }
499 }
500
501 // Move past the aligned DPRCS2 area.
502 if (AFI->getNumAlignedDPRCS2Regs() > 0) {
503 MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs());
504 // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
505 // leaves the stack pointer pointing to the DPRCS2 area.
506 //
507 // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
508 NumBytes += MFI.getObjectOffset(D8SpillFI);
509 } else
510 NumBytes = DPRCSOffset;
511
512 if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
513 uint32_t NumWords = NumBytes >> 2;
514
515 if (NumWords < 65536)
516 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
517 .addImm(NumWords)
518 .setMIFlags(MachineInstr::FrameSetup)
519 .add(predOps(ARMCC::AL));
520 else
521 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
522 .addImm(NumWords)
523 .setMIFlags(MachineInstr::FrameSetup);
524
525 switch (TM.getCodeModel()) {
526 case CodeModel::Tiny:
527 llvm_unreachable("Tiny code model not available on ARM.");
528 case CodeModel::Small:
529 case CodeModel::Medium:
530 case CodeModel::Kernel:
531 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
532 .add(predOps(ARMCC::AL))
533 .addExternalSymbol("__chkstk")
534 .addReg(ARM::R4, RegState::Implicit)
535 .setMIFlags(MachineInstr::FrameSetup);
536 break;
537 case CodeModel::Large:
538 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
539 .addExternalSymbol("__chkstk")
540 .setMIFlags(MachineInstr::FrameSetup);
541
542 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
543 .add(predOps(ARMCC::AL))
544 .addReg(ARM::R12, RegState::Kill)
545 .addReg(ARM::R4, RegState::Implicit)
546 .setMIFlags(MachineInstr::FrameSetup);
547 break;
548 }
549
550 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
551 .addReg(ARM::SP, RegState::Kill)
552 .addReg(ARM::R4, RegState::Kill)
553 .setMIFlags(MachineInstr::FrameSetup)
554 .add(predOps(ARMCC::AL))
555 .add(condCodeOp());
556 NumBytes = 0;
557 }
558
559 if (NumBytes) {
560 // Adjust SP after all the callee-save spills.
561 if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
562 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
563 DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
564 else {
565 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
566 MachineInstr::FrameSetup);
567 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
568 }
569
570 if (HasFP && isARM)
571 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
572 // Note it's not safe to do this in Thumb2 mode because it would have
573 // taken two instructions:
574 // mov sp, r7
575 // sub sp, #24
576 // If an interrupt is taken between the two instructions, then sp is in
577 // an inconsistent state (pointing to the middle of callee-saved area).
578 // The interrupt handler can end up clobbering the registers.
579 AFI->setShouldRestoreSPFromFP(true);
580 }
581
582 // Set FP to point to the stack slot that contains the previous FP.
583 // For iOS, FP is R7, which has now been stored in spill area 1.
584 // Otherwise, if this is not iOS, all the callee-saved registers go
585 // into spill area 1, including the FP in R11. In either case, it
586 // is in area one and the adjustment needs to take place just after
587 // that push.
588 if (HasFP) {
589 MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
590 unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
591 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,
592 dl, TII, FramePtr, ARM::SP,
593 PushSize + FramePtrOffsetInPush,
594 MachineInstr::FrameSetup);
595 if (FramePtrOffsetInPush + PushSize != 0) {
596 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
597 nullptr, MRI->getDwarfRegNum(FramePtr, true),
598 FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
599 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
600 .addCFIIndex(CFIIndex)
601 .setMIFlags(MachineInstr::FrameSetup);
602 } else {
603 unsigned CFIIndex =
604 MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
605 nullptr, MRI->getDwarfRegNum(FramePtr, true)));
606 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
607 .addCFIIndex(CFIIndex)
608 .setMIFlags(MachineInstr::FrameSetup);
609 }
610 }
611
612 // Now that the prologue's actual instructions are finalised, we can insert
613 // the necessary DWARF cf instructions to describe the situation. Start by
614 // recording where each register ended up:
615 if (GPRCS1Size > 0) {
616 MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
617 int CFIIndex;
618 for (const auto &Entry : CSI) {
619 unsigned Reg = Entry.getReg();
620 int FI = Entry.getFrameIdx();
621 switch (Reg) {
622 case ARM::R8:
623 case ARM::R9:
624 case ARM::R10:
625 case ARM::R11:
626 case ARM::R12:
627 if (STI.splitFramePushPop(MF))
628 break;
629 LLVM_FALLTHROUGH;
630 case ARM::R0:
631 case ARM::R1:
632 case ARM::R2:
633 case ARM::R3:
634 case ARM::R4:
635 case ARM::R5:
636 case ARM::R6:
637 case ARM::R7:
638 case ARM::LR:
639 CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
640 nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
641 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
642 .addCFIIndex(CFIIndex)
643 .setMIFlags(MachineInstr::FrameSetup);
644 break;
645 }
646 }
647 }
648
649 if (GPRCS2Size > 0) {
650 MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
651 for (const auto &Entry : CSI) {
652 unsigned Reg = Entry.getReg();
653 int FI = Entry.getFrameIdx();
654 switch (Reg) {
655 case ARM::R8:
656 case ARM::R9:
657 case ARM::R10:
658 case ARM::R11:
659 case ARM::R12:
660 if (STI.splitFramePushPop(MF)) {
661 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
662 unsigned Offset = MFI.getObjectOffset(FI);
663 unsigned CFIIndex = MF.addFrameInst(
664 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
665 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
666 .addCFIIndex(CFIIndex)
667 .setMIFlags(MachineInstr::FrameSetup);
668 }
669 break;
670 }
671 }
672 }
673
674 if (DPRCSSize > 0) {
675 // Since vpush register list cannot have gaps, there may be multiple vpush
676 // instructions in the prologue.
677 MachineBasicBlock::iterator Pos = std::next(LastPush);
678 for (const auto &Entry : CSI) {
679 unsigned Reg = Entry.getReg();
680 int FI = Entry.getFrameIdx();
681 if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
682 (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
683 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
684 unsigned Offset = MFI.getObjectOffset(FI);
685 unsigned CFIIndex = MF.addFrameInst(
686 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
687 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
688 .addCFIIndex(CFIIndex)
689 .setMIFlags(MachineInstr::FrameSetup);
690 }
691 }
692 }
693
694 // Now we can emit descriptions of where the canonical frame address was
695 // throughout the process. If we have a frame pointer, it takes over the job
696 // half-way through, so only the first few .cfi_def_cfa_offset instructions
697 // actually get emitted.
698 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
699
700 if (STI.isTargetELF() && hasFP(MF))
701 MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
702 AFI->getFramePtrSpillOffset());
703
704 AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
705 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
706 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
707 AFI->setDPRCalleeSavedGapSize(DPRGapSize);
708 AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
709
710 // If we need dynamic stack realignment, do it here. Be paranoid and make
711 // sure if we also have VLAs, we have a base pointer for frame access.
712 // If aligned NEON registers were spilled, the stack has already been
713 // realigned.
714 if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
715 Align MaxAlign = MFI.getMaxAlign();
716 assert(!AFI->isThumb1OnlyFunction());
717 if (!AFI->isThumbFunction()) {
718 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
719 false);
720 } else {
721 // We cannot use sp as source/dest register here, thus we're using r4 to
722 // perform the calculations. We're emitting the following sequence:
723 // mov r4, sp
724 // -- use emitAligningInstructions to produce best sequence to zero
725 // -- out lower bits in r4
726 // mov sp, r4
727 // FIXME: It will be better just to find spare register here.
728 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
729 .addReg(ARM::SP, RegState::Kill)
730 .add(predOps(ARMCC::AL));
731 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
732 false);
733 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
734 .addReg(ARM::R4, RegState::Kill)
735 .add(predOps(ARMCC::AL));
736 }
737
738 AFI->setShouldRestoreSPFromFP(true);
739 }
740
741 // If we need a base pointer, set it up here. It's whatever the value
742 // of the stack pointer is at this point. Any variable size objects
743 // will be allocated after this, so we can still use the base pointer
744 // to reference locals.
745 // FIXME: Clarify FrameSetup flags here.
746 if (RegInfo->hasBasePointer(MF)) {
747 if (isARM)
748 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
749 .addReg(ARM::SP)
750 .add(predOps(ARMCC::AL))
751 .add(condCodeOp());
752 else
753 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
754 .addReg(ARM::SP)
755 .add(predOps(ARMCC::AL));
756 }
757
758 // If the frame has variable sized objects then the epilogue must restore
759 // the sp from fp. We can assume there's an FP here since hasFP already
760 // checks for hasVarSizedObjects.
761 if (MFI.hasVarSizedObjects())
762 AFI->setShouldRestoreSPFromFP(true);
763 }
764
emitEpilogue(MachineFunction & MF,MachineBasicBlock & MBB) const765 void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
766 MachineBasicBlock &MBB) const {
767 MachineFrameInfo &MFI = MF.getFrameInfo();
768 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
769 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
770 const ARMBaseInstrInfo &TII =
771 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
772 assert(!AFI->isThumb1OnlyFunction() &&
773 "This emitEpilogue does not support Thumb1!");
774 bool isARM = !AFI->isThumbFunction();
775
776 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
777 int NumBytes = (int)MFI.getStackSize();
778 Register FramePtr = RegInfo->getFrameRegister(MF);
779
780 // All calls are tail calls in GHC calling conv, and functions have no
781 // prologue/epilogue.
782 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
783 return;
784
785 // First put ourselves on the first (from top) terminator instructions.
786 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
787 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
788
789 if (!AFI->hasStackFrame()) {
790 if (NumBytes - ArgRegsSaveSize != 0)
791 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize,
792 MachineInstr::FrameDestroy);
793 } else {
794 // Unwind MBBI to point to first LDR / VLDRD.
795 if (MBBI != MBB.begin()) {
796 do {
797 --MBBI;
798 } while (MBBI != MBB.begin() &&
799 MBBI->getFlag(MachineInstr::FrameDestroy));
800 if (!MBBI->getFlag(MachineInstr::FrameDestroy))
801 ++MBBI;
802 }
803
804 // Move SP to start of FP callee save spill area.
805 NumBytes -= (ArgRegsSaveSize +
806 AFI->getFPCXTSaveAreaSize() +
807 AFI->getGPRCalleeSavedArea1Size() +
808 AFI->getGPRCalleeSavedArea2Size() +
809 AFI->getDPRCalleeSavedGapSize() +
810 AFI->getDPRCalleeSavedAreaSize());
811
812 // Reset SP based on frame pointer only if the stack frame extends beyond
813 // frame pointer stack slot or target is ELF and the function has FP.
814 if (AFI->shouldRestoreSPFromFP()) {
815 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
816 if (NumBytes) {
817 if (isARM)
818 emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
819 ARMCC::AL, 0, TII,
820 MachineInstr::FrameDestroy);
821 else {
822 // It's not possible to restore SP from FP in a single instruction.
823 // For iOS, this looks like:
824 // mov sp, r7
825 // sub sp, #24
826 // This is bad, if an interrupt is taken after the mov, sp is in an
827 // inconsistent state.
828 // Use the first callee-saved register as a scratch register.
829 assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
830 "No scratch register to restore SP from FP!");
831 emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
832 ARMCC::AL, 0, TII, MachineInstr::FrameDestroy);
833 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
834 .addReg(ARM::R4)
835 .add(predOps(ARMCC::AL))
836 .setMIFlag(MachineInstr::FrameDestroy);
837 }
838 } else {
839 // Thumb2 or ARM.
840 if (isARM)
841 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
842 .addReg(FramePtr)
843 .add(predOps(ARMCC::AL))
844 .add(condCodeOp())
845 .setMIFlag(MachineInstr::FrameDestroy);
846 else
847 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
848 .addReg(FramePtr)
849 .add(predOps(ARMCC::AL))
850 .setMIFlag(MachineInstr::FrameDestroy);
851 }
852 } else if (NumBytes &&
853 !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
854 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
855 MachineInstr::FrameDestroy);
856
857 // Increment past our save areas.
858 if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
859 MBBI++;
860 // Since vpop register list cannot have gaps, there may be multiple vpop
861 // instructions in the epilogue.
862 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
863 MBBI++;
864 }
865 if (AFI->getDPRCalleeSavedGapSize()) {
866 assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
867 "unexpected DPR alignment gap");
868 emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize(),
869 MachineInstr::FrameDestroy);
870 }
871
872 if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
873 if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
874 if (AFI->getFPCXTSaveAreaSize()) MBBI++;
875 }
876
877 if (ArgRegsSaveSize)
878 emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize,
879 MachineInstr::FrameDestroy);
880 }
881
882 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
883 /// debug info. It's the same as what we use for resolving the code-gen
884 /// references for now. FIXME: This can go wrong when references are
885 /// SP-relative and simple call frames aren't used.
getFrameIndexReference(const MachineFunction & MF,int FI,Register & FrameReg) const886 int ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
887 Register &FrameReg) const {
888 return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
889 }
890
ResolveFrameIndexReference(const MachineFunction & MF,int FI,Register & FrameReg,int SPAdj) const891 int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
892 int FI, Register &FrameReg,
893 int SPAdj) const {
894 const MachineFrameInfo &MFI = MF.getFrameInfo();
895 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
896 MF.getSubtarget().getRegisterInfo());
897 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
898 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
899 int FPOffset = Offset - AFI->getFramePtrSpillOffset();
900 bool isFixed = MFI.isFixedObjectIndex(FI);
901
902 FrameReg = ARM::SP;
903 Offset += SPAdj;
904
905 // SP can move around if there are allocas. We may also lose track of SP
906 // when emergency spilling inside a non-reserved call frame setup.
907 bool hasMovingSP = !hasReservedCallFrame(MF);
908
909 // When dynamically realigning the stack, use the frame pointer for
910 // parameters, and the stack/base pointer for locals.
911 if (RegInfo->needsStackRealignment(MF)) {
912 assert(hasFP(MF) && "dynamic stack realignment without a FP!");
913 if (isFixed) {
914 FrameReg = RegInfo->getFrameRegister(MF);
915 Offset = FPOffset;
916 } else if (hasMovingSP) {
917 assert(RegInfo->hasBasePointer(MF) &&
918 "VLAs and dynamic stack alignment, but missing base pointer!");
919 FrameReg = RegInfo->getBaseRegister();
920 Offset -= SPAdj;
921 }
922 return Offset;
923 }
924
925 // If there is a frame pointer, use it when we can.
926 if (hasFP(MF) && AFI->hasStackFrame()) {
927 // Use frame pointer to reference fixed objects. Use it for locals if
928 // there are VLAs (and thus the SP isn't reliable as a base).
929 if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
930 FrameReg = RegInfo->getFrameRegister(MF);
931 return FPOffset;
932 } else if (hasMovingSP) {
933 assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
934 if (AFI->isThumb2Function()) {
935 // Try to use the frame pointer if we can, else use the base pointer
936 // since it's available. This is handy for the emergency spill slot, in
937 // particular.
938 if (FPOffset >= -255 && FPOffset < 0) {
939 FrameReg = RegInfo->getFrameRegister(MF);
940 return FPOffset;
941 }
942 }
943 } else if (AFI->isThumbFunction()) {
944 // Prefer SP to base pointer, if the offset is suitably aligned and in
945 // range as the effective range of the immediate offset is bigger when
946 // basing off SP.
947 // Use add <rd>, sp, #<imm8>
948 // ldr <rd>, [sp, #<imm8>]
949 if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
950 return Offset;
951 // In Thumb2 mode, the negative offset is very limited. Try to avoid
952 // out of range references. ldr <rt>,[<rn>, #-<imm8>]
953 if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
954 FrameReg = RegInfo->getFrameRegister(MF);
955 return FPOffset;
956 }
957 } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
958 // Otherwise, use SP or FP, whichever is closer to the stack slot.
959 FrameReg = RegInfo->getFrameRegister(MF);
960 return FPOffset;
961 }
962 }
963 // Use the base pointer if we have one.
964 // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
965 // That can happen if we forced a base pointer for a large call frame.
966 if (RegInfo->hasBasePointer(MF)) {
967 FrameReg = RegInfo->getBaseRegister();
968 Offset -= SPAdj;
969 }
970 return Offset;
971 }
972
emitPushInst(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,ArrayRef<CalleeSavedInfo> CSI,unsigned StmOpc,unsigned StrOpc,bool NoGap,bool (* Func)(unsigned,bool),unsigned NumAlignedDPRCS2Regs,unsigned MIFlags) const973 void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
974 MachineBasicBlock::iterator MI,
975 ArrayRef<CalleeSavedInfo> CSI,
976 unsigned StmOpc, unsigned StrOpc,
977 bool NoGap, bool (*Func)(unsigned, bool),
978 unsigned NumAlignedDPRCS2Regs,
979 unsigned MIFlags) const {
980 MachineFunction &MF = *MBB.getParent();
981 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
982 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
983
984 DebugLoc DL;
985
986 using RegAndKill = std::pair<unsigned, bool>;
987
988 SmallVector<RegAndKill, 4> Regs;
989 unsigned i = CSI.size();
990 while (i != 0) {
991 unsigned LastReg = 0;
992 for (; i != 0; --i) {
993 unsigned Reg = CSI[i-1].getReg();
994 if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
995
996 // D-registers in the aligned area DPRCS2 are NOT spilled here.
997 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
998 continue;
999
1000 const MachineRegisterInfo &MRI = MF.getRegInfo();
1001 bool isLiveIn = MRI.isLiveIn(Reg);
1002 if (!isLiveIn && !MRI.isReserved(Reg))
1003 MBB.addLiveIn(Reg);
1004 // If NoGap is true, push consecutive registers and then leave the rest
1005 // for other instructions. e.g.
1006 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1007 if (NoGap && LastReg && LastReg != Reg-1)
1008 break;
1009 LastReg = Reg;
1010 // Do not set a kill flag on values that are also marked as live-in. This
1011 // happens with the @llvm-returnaddress intrinsic and with arguments
1012 // passed in callee saved registers.
1013 // Omitting the kill flags is conservatively correct even if the live-in
1014 // is not used after all.
1015 Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1016 }
1017
1018 if (Regs.empty())
1019 continue;
1020
1021 llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1022 return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1023 });
1024
1025 if (Regs.size() > 1 || StrOpc== 0) {
1026 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1027 .addReg(ARM::SP)
1028 .setMIFlags(MIFlags)
1029 .add(predOps(ARMCC::AL));
1030 for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1031 MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
1032 } else if (Regs.size() == 1) {
1033 BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1034 .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1035 .addReg(ARM::SP)
1036 .setMIFlags(MIFlags)
1037 .addImm(-4)
1038 .add(predOps(ARMCC::AL));
1039 }
1040 Regs.clear();
1041
1042 // Put any subsequent vpush instructions before this one: they will refer to
1043 // higher register numbers so need to be pushed first in order to preserve
1044 // monotonicity.
1045 if (MI != MBB.begin())
1046 --MI;
1047 }
1048 }
1049
emitPopInst(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,MutableArrayRef<CalleeSavedInfo> CSI,unsigned LdmOpc,unsigned LdrOpc,bool isVarArg,bool NoGap,bool (* Func)(unsigned,bool),unsigned NumAlignedDPRCS2Regs) const1050 void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1051 MachineBasicBlock::iterator MI,
1052 MutableArrayRef<CalleeSavedInfo> CSI,
1053 unsigned LdmOpc, unsigned LdrOpc,
1054 bool isVarArg, bool NoGap,
1055 bool (*Func)(unsigned, bool),
1056 unsigned NumAlignedDPRCS2Regs) const {
1057 MachineFunction &MF = *MBB.getParent();
1058 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1059 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1060 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1061 DebugLoc DL;
1062 bool isTailCall = false;
1063 bool isInterrupt = false;
1064 bool isTrap = false;
1065 bool isCmseEntry = false;
1066 if (MBB.end() != MI) {
1067 DL = MI->getDebugLoc();
1068 unsigned RetOpcode = MI->getOpcode();
1069 isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
1070 isInterrupt =
1071 RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1072 isTrap =
1073 RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1074 RetOpcode == ARM::tTRAP;
1075 isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
1076 }
1077
1078 SmallVector<unsigned, 4> Regs;
1079 unsigned i = CSI.size();
1080 while (i != 0) {
1081 unsigned LastReg = 0;
1082 bool DeleteRet = false;
1083 for (; i != 0; --i) {
1084 CalleeSavedInfo &Info = CSI[i-1];
1085 unsigned Reg = Info.getReg();
1086 if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1087
1088 // The aligned reloads from area DPRCS2 are not inserted here.
1089 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1090 continue;
1091
1092 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1093 !isCmseEntry && !isTrap && STI.hasV5TOps()) {
1094 if (MBB.succ_empty()) {
1095 Reg = ARM::PC;
1096 // Fold the return instruction into the LDM.
1097 DeleteRet = true;
1098 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1099 // We 'restore' LR into PC so it is not live out of the return block:
1100 // Clear Restored bit.
1101 Info.setRestored(false);
1102 } else
1103 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1104 }
1105
1106 // If NoGap is true, pop consecutive registers and then leave the rest
1107 // for other instructions. e.g.
1108 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1109 if (NoGap && LastReg && LastReg != Reg-1)
1110 break;
1111
1112 LastReg = Reg;
1113 Regs.push_back(Reg);
1114 }
1115
1116 if (Regs.empty())
1117 continue;
1118
1119 llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
1120 return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1121 });
1122
1123 if (Regs.size() > 1 || LdrOpc == 0) {
1124 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1125 .addReg(ARM::SP)
1126 .add(predOps(ARMCC::AL))
1127 .setMIFlags(MachineInstr::FrameDestroy);
1128 for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1129 MIB.addReg(Regs[i], getDefRegState(true));
1130 if (DeleteRet) {
1131 if (MI != MBB.end()) {
1132 MIB.copyImplicitOps(*MI);
1133 MI->eraseFromParent();
1134 }
1135 }
1136 MI = MIB;
1137 } else if (Regs.size() == 1) {
1138 // If we adjusted the reg to PC from LR above, switch it back here. We
1139 // only do that for LDM.
1140 if (Regs[0] == ARM::PC)
1141 Regs[0] = ARM::LR;
1142 MachineInstrBuilder MIB =
1143 BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1144 .addReg(ARM::SP, RegState::Define)
1145 .addReg(ARM::SP)
1146 .setMIFlags(MachineInstr::FrameDestroy);
1147 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1148 // that refactoring is complete (eventually).
1149 if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1150 MIB.addReg(0);
1151 MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
1152 } else
1153 MIB.addImm(4);
1154 MIB.add(predOps(ARMCC::AL));
1155 }
1156 Regs.clear();
1157
1158 // Put any subsequent vpop instructions after this one: they will refer to
1159 // higher register numbers so need to be popped afterwards.
1160 if (MI != MBB.end())
1161 ++MI;
1162 }
1163 }
1164
1165 /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1166 /// starting from d8. Also insert stack realignment code and leave the stack
1167 /// pointer pointing to the d8 spill slot.
emitAlignedDPRCS2Spills(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,unsigned NumAlignedDPRCS2Regs,ArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI)1168 static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
1169 MachineBasicBlock::iterator MI,
1170 unsigned NumAlignedDPRCS2Regs,
1171 ArrayRef<CalleeSavedInfo> CSI,
1172 const TargetRegisterInfo *TRI) {
1173 MachineFunction &MF = *MBB.getParent();
1174 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1175 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1176 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1177 MachineFrameInfo &MFI = MF.getFrameInfo();
1178
1179 // Mark the D-register spill slots as properly aligned. Since MFI computes
1180 // stack slot layout backwards, this can actually mean that the d-reg stack
1181 // slot offsets can be wrong. The offset for d8 will always be correct.
1182 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1183 unsigned DNum = CSI[i].getReg() - ARM::D8;
1184 if (DNum > NumAlignedDPRCS2Regs - 1)
1185 continue;
1186 int FI = CSI[i].getFrameIdx();
1187 // The even-numbered registers will be 16-byte aligned, the odd-numbered
1188 // registers will be 8-byte aligned.
1189 MFI.setObjectAlignment(FI, DNum % 2 ? Align(8) : Align(16));
1190
1191 // The stack slot for D8 needs to be maximally aligned because this is
1192 // actually the point where we align the stack pointer. MachineFrameInfo
1193 // computes all offsets relative to the incoming stack pointer which is a
1194 // bit weird when realigning the stack. Any extra padding for this
1195 // over-alignment is not realized because the code inserted below adjusts
1196 // the stack pointer by numregs * 8 before aligning the stack pointer.
1197 if (DNum == 0)
1198 MFI.setObjectAlignment(FI, MFI.getMaxAlign());
1199 }
1200
1201 // Move the stack pointer to the d8 spill slot, and align it at the same
1202 // time. Leave the stack slot address in the scratch register r4.
1203 //
1204 // sub r4, sp, #numregs * 8
1205 // bic r4, r4, #align - 1
1206 // mov sp, r4
1207 //
1208 bool isThumb = AFI->isThumbFunction();
1209 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1210 AFI->setShouldRestoreSPFromFP(true);
1211
1212 // sub r4, sp, #numregs * 8
1213 // The immediate is <= 64, so it doesn't need any special encoding.
1214 unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1215 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1216 .addReg(ARM::SP)
1217 .addImm(8 * NumAlignedDPRCS2Regs)
1218 .add(predOps(ARMCC::AL))
1219 .add(condCodeOp());
1220
1221 Align MaxAlign = MF.getFrameInfo().getMaxAlign();
1222 // We must set parameter MustBeSingleInstruction to true, since
1223 // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1224 // stack alignment. Luckily, this can always be done since all ARM
1225 // architecture versions that support Neon also support the BFC
1226 // instruction.
1227 emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1228
1229 // mov sp, r4
1230 // The stack pointer must be adjusted before spilling anything, otherwise
1231 // the stack slots could be clobbered by an interrupt handler.
1232 // Leave r4 live, it is used below.
1233 Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1234 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1235 .addReg(ARM::R4)
1236 .add(predOps(ARMCC::AL));
1237 if (!isThumb)
1238 MIB.add(condCodeOp());
1239
1240 // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1241 // r4 holds the stack slot address.
1242 unsigned NextReg = ARM::D8;
1243
1244 // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1245 // The writeback is only needed when emitting two vst1.64 instructions.
1246 if (NumAlignedDPRCS2Regs >= 6) {
1247 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1248 &ARM::QQPRRegClass);
1249 MBB.addLiveIn(SupReg);
1250 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1251 .addReg(ARM::R4, RegState::Kill)
1252 .addImm(16)
1253 .addReg(NextReg)
1254 .addReg(SupReg, RegState::ImplicitKill)
1255 .add(predOps(ARMCC::AL));
1256 NextReg += 4;
1257 NumAlignedDPRCS2Regs -= 4;
1258 }
1259
1260 // We won't modify r4 beyond this point. It currently points to the next
1261 // register to be spilled.
1262 unsigned R4BaseReg = NextReg;
1263
1264 // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1265 if (NumAlignedDPRCS2Regs >= 4) {
1266 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1267 &ARM::QQPRRegClass);
1268 MBB.addLiveIn(SupReg);
1269 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1270 .addReg(ARM::R4)
1271 .addImm(16)
1272 .addReg(NextReg)
1273 .addReg(SupReg, RegState::ImplicitKill)
1274 .add(predOps(ARMCC::AL));
1275 NextReg += 4;
1276 NumAlignedDPRCS2Regs -= 4;
1277 }
1278
1279 // 16-byte aligned vst1.64 with 2 d-regs.
1280 if (NumAlignedDPRCS2Regs >= 2) {
1281 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1282 &ARM::QPRRegClass);
1283 MBB.addLiveIn(SupReg);
1284 BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1285 .addReg(ARM::R4)
1286 .addImm(16)
1287 .addReg(SupReg)
1288 .add(predOps(ARMCC::AL));
1289 NextReg += 2;
1290 NumAlignedDPRCS2Regs -= 2;
1291 }
1292
1293 // Finally, use a vanilla vstr.64 for the odd last register.
1294 if (NumAlignedDPRCS2Regs) {
1295 MBB.addLiveIn(NextReg);
1296 // vstr.64 uses addrmode5 which has an offset scale of 4.
1297 BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1298 .addReg(NextReg)
1299 .addReg(ARM::R4)
1300 .addImm((NextReg - R4BaseReg) * 2)
1301 .add(predOps(ARMCC::AL));
1302 }
1303
1304 // The last spill instruction inserted should kill the scratch register r4.
1305 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1306 }
1307
1308 /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1309 /// iterator to the following instruction.
1310 static MachineBasicBlock::iterator
skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,unsigned NumAlignedDPRCS2Regs)1311 skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
1312 unsigned NumAlignedDPRCS2Regs) {
1313 // sub r4, sp, #numregs * 8
1314 // bic r4, r4, #align - 1
1315 // mov sp, r4
1316 ++MI; ++MI; ++MI;
1317 assert(MI->mayStore() && "Expecting spill instruction");
1318
1319 // These switches all fall through.
1320 switch(NumAlignedDPRCS2Regs) {
1321 case 7:
1322 ++MI;
1323 assert(MI->mayStore() && "Expecting spill instruction");
1324 LLVM_FALLTHROUGH;
1325 default:
1326 ++MI;
1327 assert(MI->mayStore() && "Expecting spill instruction");
1328 LLVM_FALLTHROUGH;
1329 case 1:
1330 case 2:
1331 case 4:
1332 assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
1333 ++MI;
1334 }
1335 return MI;
1336 }
1337
1338 /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1339 /// starting from d8. These instructions are assumed to execute while the
1340 /// stack is still aligned, unlike the code inserted by emitPopInst.
emitAlignedDPRCS2Restores(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,unsigned NumAlignedDPRCS2Regs,ArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI)1341 static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
1342 MachineBasicBlock::iterator MI,
1343 unsigned NumAlignedDPRCS2Regs,
1344 ArrayRef<CalleeSavedInfo> CSI,
1345 const TargetRegisterInfo *TRI) {
1346 MachineFunction &MF = *MBB.getParent();
1347 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1348 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1349 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1350
1351 // Find the frame index assigned to d8.
1352 int D8SpillFI = 0;
1353 for (unsigned i = 0, e = CSI.size(); i != e; ++i)
1354 if (CSI[i].getReg() == ARM::D8) {
1355 D8SpillFI = CSI[i].getFrameIdx();
1356 break;
1357 }
1358
1359 // Materialize the address of the d8 spill slot into the scratch register r4.
1360 // This can be fairly complicated if the stack frame is large, so just use
1361 // the normal frame index elimination mechanism to do it. This code runs as
1362 // the initial part of the epilog where the stack and base pointers haven't
1363 // been changed yet.
1364 bool isThumb = AFI->isThumbFunction();
1365 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1366
1367 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1368 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1369 .addFrameIndex(D8SpillFI)
1370 .addImm(0)
1371 .add(predOps(ARMCC::AL))
1372 .add(condCodeOp());
1373
1374 // Now restore NumAlignedDPRCS2Regs registers starting from d8.
1375 unsigned NextReg = ARM::D8;
1376
1377 // 16-byte aligned vld1.64 with 4 d-regs and writeback.
1378 if (NumAlignedDPRCS2Regs >= 6) {
1379 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1380 &ARM::QQPRRegClass);
1381 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1382 .addReg(ARM::R4, RegState::Define)
1383 .addReg(ARM::R4, RegState::Kill)
1384 .addImm(16)
1385 .addReg(SupReg, RegState::ImplicitDefine)
1386 .add(predOps(ARMCC::AL));
1387 NextReg += 4;
1388 NumAlignedDPRCS2Regs -= 4;
1389 }
1390
1391 // We won't modify r4 beyond this point. It currently points to the next
1392 // register to be spilled.
1393 unsigned R4BaseReg = NextReg;
1394
1395 // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1396 if (NumAlignedDPRCS2Regs >= 4) {
1397 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1398 &ARM::QQPRRegClass);
1399 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1400 .addReg(ARM::R4)
1401 .addImm(16)
1402 .addReg(SupReg, RegState::ImplicitDefine)
1403 .add(predOps(ARMCC::AL));
1404 NextReg += 4;
1405 NumAlignedDPRCS2Regs -= 4;
1406 }
1407
1408 // 16-byte aligned vld1.64 with 2 d-regs.
1409 if (NumAlignedDPRCS2Regs >= 2) {
1410 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1411 &ARM::QPRRegClass);
1412 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1413 .addReg(ARM::R4)
1414 .addImm(16)
1415 .add(predOps(ARMCC::AL));
1416 NextReg += 2;
1417 NumAlignedDPRCS2Regs -= 2;
1418 }
1419
1420 // Finally, use a vanilla vldr.64 for the remaining odd register.
1421 if (NumAlignedDPRCS2Regs)
1422 BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1423 .addReg(ARM::R4)
1424 .addImm(2 * (NextReg - R4BaseReg))
1425 .add(predOps(ARMCC::AL));
1426
1427 // Last store kills r4.
1428 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1429 }
1430
spillCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,ArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI) const1431 bool ARMFrameLowering::spillCalleeSavedRegisters(
1432 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1433 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
1434 if (CSI.empty())
1435 return false;
1436
1437 MachineFunction &MF = *MBB.getParent();
1438 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1439
1440 unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1441 unsigned PushOneOpc = AFI->isThumbFunction() ?
1442 ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1443 unsigned FltOpc = ARM::VSTMDDB_UPD;
1444 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1445 // Save the non-secure floating point context.
1446 if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) {
1447 return C.getReg() == ARM::FPCXTNS;
1448 })) {
1449 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre),
1450 ARM::SP)
1451 .addReg(ARM::SP)
1452 .addImm(-4)
1453 .add(predOps(ARMCC::AL));
1454 }
1455 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
1456 MachineInstr::FrameSetup);
1457 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
1458 MachineInstr::FrameSetup);
1459 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
1460 NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1461
1462 // The code above does not insert spill code for the aligned DPRCS2 registers.
1463 // The stack realignment code will be inserted between the push instructions
1464 // and these spills.
1465 if (NumAlignedDPRCS2Regs)
1466 emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1467
1468 return true;
1469 }
1470
restoreCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,MutableArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI) const1471 bool ARMFrameLowering::restoreCalleeSavedRegisters(
1472 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1473 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
1474 if (CSI.empty())
1475 return false;
1476
1477 MachineFunction &MF = *MBB.getParent();
1478 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1479 bool isVarArg = AFI->getArgRegsSaveSize() > 0;
1480 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1481
1482 // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
1483 // registers. Do that here instead.
1484 if (NumAlignedDPRCS2Regs)
1485 emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1486
1487 unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1488 unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
1489 unsigned FltOpc = ARM::VLDMDIA_UPD;
1490 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
1491 NumAlignedDPRCS2Regs);
1492 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1493 &isARMArea2Register, 0);
1494 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1495 &isARMArea1Register, 0);
1496
1497 return true;
1498 }
1499
1500 // FIXME: Make generic?
EstimateFunctionSizeInBytes(const MachineFunction & MF,const ARMBaseInstrInfo & TII)1501 static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF,
1502 const ARMBaseInstrInfo &TII) {
1503 unsigned FnSize = 0;
1504 for (auto &MBB : MF) {
1505 for (auto &MI : MBB)
1506 FnSize += TII.getInstSizeInBytes(MI);
1507 }
1508 if (MF.getJumpTableInfo())
1509 for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
1510 FnSize += Table.MBBs.size() * 4;
1511 FnSize += MF.getConstantPool()->getConstants().size() * 4;
1512 return FnSize;
1513 }
1514
1515 /// estimateRSStackSizeLimit - Look at each instruction that references stack
1516 /// frames and return the stack size limit beyond which some of these
1517 /// instructions will require a scratch register during their expansion later.
1518 // FIXME: Move to TII?
estimateRSStackSizeLimit(MachineFunction & MF,const TargetFrameLowering * TFI,bool & HasNonSPFrameIndex)1519 static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
1520 const TargetFrameLowering *TFI,
1521 bool &HasNonSPFrameIndex) {
1522 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1523 const ARMBaseInstrInfo &TII =
1524 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1525 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1526 unsigned Limit = (1 << 12) - 1;
1527 for (auto &MBB : MF) {
1528 for (auto &MI : MBB) {
1529 if (MI.isDebugInstr())
1530 continue;
1531 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1532 if (!MI.getOperand(i).isFI())
1533 continue;
1534
1535 // When using ADDri to get the address of a stack object, 255 is the
1536 // largest offset guaranteed to fit in the immediate offset.
1537 if (MI.getOpcode() == ARM::ADDri) {
1538 Limit = std::min(Limit, (1U << 8) - 1);
1539 break;
1540 }
1541 // t2ADDri will not require an extra register, it can reuse the
1542 // destination.
1543 if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
1544 break;
1545
1546 const MCInstrDesc &MCID = MI.getDesc();
1547 const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF);
1548 if (RegClass && !RegClass->contains(ARM::SP))
1549 HasNonSPFrameIndex = true;
1550
1551 // Otherwise check the addressing mode.
1552 switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
1553 case ARMII::AddrMode_i12:
1554 case ARMII::AddrMode2:
1555 // Default 12 bit limit.
1556 break;
1557 case ARMII::AddrMode3:
1558 case ARMII::AddrModeT2_i8:
1559 Limit = std::min(Limit, (1U << 8) - 1);
1560 break;
1561 case ARMII::AddrMode5FP16:
1562 Limit = std::min(Limit, ((1U << 8) - 1) * 2);
1563 break;
1564 case ARMII::AddrMode5:
1565 case ARMII::AddrModeT2_i8s4:
1566 case ARMII::AddrModeT2_ldrex:
1567 Limit = std::min(Limit, ((1U << 8) - 1) * 4);
1568 break;
1569 case ARMII::AddrModeT2_i12:
1570 // i12 supports only positive offset so these will be converted to
1571 // i8 opcodes. See llvm::rewriteT2FrameIndex.
1572 if (TFI->hasFP(MF) && AFI->hasStackFrame())
1573 Limit = std::min(Limit, (1U << 8) - 1);
1574 break;
1575 case ARMII::AddrMode4:
1576 case ARMII::AddrMode6:
1577 // Addressing modes 4 & 6 (load/store) instructions can't encode an
1578 // immediate offset for stack references.
1579 return 0;
1580 case ARMII::AddrModeT2_i7:
1581 Limit = std::min(Limit, ((1U << 7) - 1) * 1);
1582 break;
1583 case ARMII::AddrModeT2_i7s2:
1584 Limit = std::min(Limit, ((1U << 7) - 1) * 2);
1585 break;
1586 case ARMII::AddrModeT2_i7s4:
1587 Limit = std::min(Limit, ((1U << 7) - 1) * 4);
1588 break;
1589 default:
1590 llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
1591 }
1592 break; // At most one FI per instruction
1593 }
1594 }
1595 }
1596
1597 return Limit;
1598 }
1599
1600 // In functions that realign the stack, it can be an advantage to spill the
1601 // callee-saved vector registers after realigning the stack. The vst1 and vld1
1602 // instructions take alignment hints that can improve performance.
1603 static void
checkNumAlignedDPRCS2Regs(MachineFunction & MF,BitVector & SavedRegs)1604 checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
1605 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
1606 if (!SpillAlignedNEONRegs)
1607 return;
1608
1609 // Naked functions don't spill callee-saved registers.
1610 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
1611 return;
1612
1613 // We are planning to use NEON instructions vst1 / vld1.
1614 if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON())
1615 return;
1616
1617 // Don't bother if the default stack alignment is sufficiently high.
1618 if (MF.getSubtarget().getFrameLowering()->getStackAlign() >= Align(8))
1619 return;
1620
1621 // Aligned spills require stack realignment.
1622 if (!static_cast<const ARMBaseRegisterInfo *>(
1623 MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
1624 return;
1625
1626 // We always spill contiguous d-registers starting from d8. Count how many
1627 // needs spilling. The register allocator will almost always use the
1628 // callee-saved registers in order, but it can happen that there are holes in
1629 // the range. Registers above the hole will be spilled to the standard DPRCS
1630 // area.
1631 unsigned NumSpills = 0;
1632 for (; NumSpills < 8; ++NumSpills)
1633 if (!SavedRegs.test(ARM::D8 + NumSpills))
1634 break;
1635
1636 // Don't do this for just one d-register. It's not worth it.
1637 if (NumSpills < 2)
1638 return;
1639
1640 // Spill the first NumSpills D-registers after realigning the stack.
1641 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
1642
1643 // A scratch register is required for the vst1 / vld1 instructions.
1644 SavedRegs.set(ARM::R4);
1645 }
1646
enableShrinkWrapping(const MachineFunction & MF) const1647 bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
1648 // For CMSE entry functions, we want to save the FPCXT_NS immediately
1649 // upon function entry (resp. restore it immmediately before return)
1650 if (STI.hasV8_1MMainlineOps() &&
1651 MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction())
1652 return false;
1653
1654 return true;
1655 }
1656
determineCalleeSaves(MachineFunction & MF,BitVector & SavedRegs,RegScavenger * RS) const1657 void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
1658 BitVector &SavedRegs,
1659 RegScavenger *RS) const {
1660 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1661 // This tells PEI to spill the FP as if it is any other callee-save register
1662 // to take advantage the eliminateFrameIndex machinery. This also ensures it
1663 // is spilled in the order specified by getCalleeSavedRegs() to make it easier
1664 // to combine multiple loads / stores.
1665 bool CanEliminateFrame = true;
1666 bool CS1Spilled = false;
1667 bool LRSpilled = false;
1668 unsigned NumGPRSpills = 0;
1669 unsigned NumFPRSpills = 0;
1670 SmallVector<unsigned, 4> UnspilledCS1GPRs;
1671 SmallVector<unsigned, 4> UnspilledCS2GPRs;
1672 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1673 MF.getSubtarget().getRegisterInfo());
1674 const ARMBaseInstrInfo &TII =
1675 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1676 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1677 MachineFrameInfo &MFI = MF.getFrameInfo();
1678 MachineRegisterInfo &MRI = MF.getRegInfo();
1679 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1680 (void)TRI; // Silence unused warning in non-assert builds.
1681 Register FramePtr = RegInfo->getFrameRegister(MF);
1682
1683 // Spill R4 if Thumb2 function requires stack realignment - it will be used as
1684 // scratch register. Also spill R4 if Thumb2 function has varsized objects,
1685 // since it's not always possible to restore sp from fp in a single
1686 // instruction.
1687 // FIXME: It will be better just to find spare register here.
1688 if (AFI->isThumb2Function() &&
1689 (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
1690 SavedRegs.set(ARM::R4);
1691
1692 // If a stack probe will be emitted, spill R4 and LR, since they are
1693 // clobbered by the stack probe call.
1694 // This estimate should be a safe, conservative estimate. The actual
1695 // stack probe is enabled based on the size of the local objects;
1696 // this estimate also includes the varargs store size.
1697 if (STI.isTargetWindows() &&
1698 WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
1699 SavedRegs.set(ARM::R4);
1700 SavedRegs.set(ARM::LR);
1701 }
1702
1703 if (AFI->isThumb1OnlyFunction()) {
1704 // Spill LR if Thumb1 function uses variable length argument lists.
1705 if (AFI->getArgRegsSaveSize() > 0)
1706 SavedRegs.set(ARM::LR);
1707
1708 // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
1709 // requires stack alignment. We don't know for sure what the stack size
1710 // will be, but for this, an estimate is good enough. If there anything
1711 // changes it, it'll be a spill, which implies we've used all the registers
1712 // and so R4 is already used, so not marking it here will be OK.
1713 // FIXME: It will be better just to find spare register here.
1714 if (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF) ||
1715 MFI.estimateStackSize(MF) > 508)
1716 SavedRegs.set(ARM::R4);
1717 }
1718
1719 // See if we can spill vector registers to aligned stack.
1720 checkNumAlignedDPRCS2Regs(MF, SavedRegs);
1721
1722 // Spill the BasePtr if it's used.
1723 if (RegInfo->hasBasePointer(MF))
1724 SavedRegs.set(RegInfo->getBaseRegister());
1725
1726 // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
1727 if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
1728 CanEliminateFrame = false;
1729
1730 // Don't spill FP if the frame can be eliminated. This is determined
1731 // by scanning the callee-save registers to see if any is modified.
1732 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
1733 for (unsigned i = 0; CSRegs[i]; ++i) {
1734 unsigned Reg = CSRegs[i];
1735 bool Spilled = false;
1736 if (SavedRegs.test(Reg)) {
1737 Spilled = true;
1738 CanEliminateFrame = false;
1739 }
1740
1741 if (!ARM::GPRRegClass.contains(Reg)) {
1742 if (Spilled) {
1743 if (ARM::SPRRegClass.contains(Reg))
1744 NumFPRSpills++;
1745 else if (ARM::DPRRegClass.contains(Reg))
1746 NumFPRSpills += 2;
1747 else if (ARM::QPRRegClass.contains(Reg))
1748 NumFPRSpills += 4;
1749 }
1750 continue;
1751 }
1752
1753 if (Spilled) {
1754 NumGPRSpills++;
1755
1756 if (!STI.splitFramePushPop(MF)) {
1757 if (Reg == ARM::LR)
1758 LRSpilled = true;
1759 CS1Spilled = true;
1760 continue;
1761 }
1762
1763 // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
1764 switch (Reg) {
1765 case ARM::LR:
1766 LRSpilled = true;
1767 LLVM_FALLTHROUGH;
1768 case ARM::R0: case ARM::R1:
1769 case ARM::R2: case ARM::R3:
1770 case ARM::R4: case ARM::R5:
1771 case ARM::R6: case ARM::R7:
1772 CS1Spilled = true;
1773 break;
1774 default:
1775 break;
1776 }
1777 } else {
1778 if (!STI.splitFramePushPop(MF)) {
1779 UnspilledCS1GPRs.push_back(Reg);
1780 continue;
1781 }
1782
1783 switch (Reg) {
1784 case ARM::R0: case ARM::R1:
1785 case ARM::R2: case ARM::R3:
1786 case ARM::R4: case ARM::R5:
1787 case ARM::R6: case ARM::R7:
1788 case ARM::LR:
1789 UnspilledCS1GPRs.push_back(Reg);
1790 break;
1791 default:
1792 UnspilledCS2GPRs.push_back(Reg);
1793 break;
1794 }
1795 }
1796 }
1797
1798 bool ForceLRSpill = false;
1799 if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
1800 unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
1801 // Force LR to be spilled if the Thumb function size is > 2048. This enables
1802 // use of BL to implement far jump.
1803 if (FnSize >= (1 << 11)) {
1804 CanEliminateFrame = false;
1805 ForceLRSpill = true;
1806 }
1807 }
1808
1809 // If any of the stack slot references may be out of range of an immediate
1810 // offset, make sure a register (or a spill slot) is available for the
1811 // register scavenger. Note that if we're indexing off the frame pointer, the
1812 // effective stack size is 4 bytes larger since the FP points to the stack
1813 // slot of the previous FP. Also, if we have variable sized objects in the
1814 // function, stack slot references will often be negative, and some of
1815 // our instructions are positive-offset only, so conservatively consider
1816 // that case to want a spill slot (or register) as well. Similarly, if
1817 // the function adjusts the stack pointer during execution and the
1818 // adjustments aren't already part of our stack size estimate, our offset
1819 // calculations may be off, so be conservative.
1820 // FIXME: We could add logic to be more precise about negative offsets
1821 // and which instructions will need a scratch register for them. Is it
1822 // worth the effort and added fragility?
1823 unsigned EstimatedStackSize =
1824 MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
1825
1826 // Determine biggest (positive) SP offset in MachineFrameInfo.
1827 int MaxFixedOffset = 0;
1828 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
1829 int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
1830 MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
1831 }
1832
1833 bool HasFP = hasFP(MF);
1834 if (HasFP) {
1835 if (AFI->hasStackFrame())
1836 EstimatedStackSize += 4;
1837 } else {
1838 // If FP is not used, SP will be used to access arguments, so count the
1839 // size of arguments into the estimation.
1840 EstimatedStackSize += MaxFixedOffset;
1841 }
1842 EstimatedStackSize += 16; // For possible paddings.
1843
1844 unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
1845 bool HasNonSPFrameIndex = false;
1846 if (AFI->isThumb1OnlyFunction()) {
1847 // For Thumb1, don't bother to iterate over the function. The only
1848 // instruction that requires an emergency spill slot is a store to a
1849 // frame index.
1850 //
1851 // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
1852 // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
1853 // a 5-bit unsigned immediate.
1854 //
1855 // We could try to check if the function actually contains a tSTRspi
1856 // that might need the spill slot, but it's not really important.
1857 // Functions with VLAs or extremely large call frames are rare, and
1858 // if a function is allocating more than 1KB of stack, an extra 4-byte
1859 // slot probably isn't relevant.
1860 if (RegInfo->hasBasePointer(MF))
1861 EstimatedRSStackSizeLimit = (1U << 5) * 4;
1862 else
1863 EstimatedRSStackSizeLimit = (1U << 8) * 4;
1864 EstimatedRSFixedSizeLimit = (1U << 5) * 4;
1865 } else {
1866 EstimatedRSStackSizeLimit =
1867 estimateRSStackSizeLimit(MF, this, HasNonSPFrameIndex);
1868 EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
1869 }
1870 // Final estimate of whether sp or bp-relative accesses might require
1871 // scavenging.
1872 bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
1873
1874 // If the stack pointer moves and we don't have a base pointer, the
1875 // estimate logic doesn't work. The actual offsets might be larger when
1876 // we're constructing a call frame, or we might need to use negative
1877 // offsets from fp.
1878 bool HasMovingSP = MFI.hasVarSizedObjects() ||
1879 (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
1880 bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
1881
1882 // If we have a frame pointer, we assume arguments will be accessed
1883 // relative to the frame pointer. Check whether fp-relative accesses to
1884 // arguments require scavenging.
1885 //
1886 // We could do slightly better on Thumb1; in some cases, an sp-relative
1887 // offset would be legal even though an fp-relative offset is not.
1888 int MaxFPOffset = getMaxFPOffset(STI, *AFI);
1889 bool HasLargeArgumentList =
1890 HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
1891
1892 bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
1893 HasLargeArgumentList || HasNonSPFrameIndex;
1894 LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
1895 << "; EstimatedStack: " << EstimatedStackSize
1896 << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
1897 << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
1898 if (BigFrameOffsets ||
1899 !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
1900 AFI->setHasStackFrame(true);
1901
1902 if (HasFP) {
1903 SavedRegs.set(FramePtr);
1904 // If the frame pointer is required by the ABI, also spill LR so that we
1905 // emit a complete frame record.
1906 if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) {
1907 SavedRegs.set(ARM::LR);
1908 LRSpilled = true;
1909 NumGPRSpills++;
1910 auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
1911 if (LRPos != UnspilledCS1GPRs.end())
1912 UnspilledCS1GPRs.erase(LRPos);
1913 }
1914 auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
1915 if (FPPos != UnspilledCS1GPRs.end())
1916 UnspilledCS1GPRs.erase(FPPos);
1917 NumGPRSpills++;
1918 if (FramePtr == ARM::R7)
1919 CS1Spilled = true;
1920 }
1921
1922 // This is true when we inserted a spill for a callee-save GPR which is
1923 // not otherwise used by the function. This guaranteees it is possible
1924 // to scavenge a register to hold the address of a stack slot. On Thumb1,
1925 // the register must be a valid operand to tSTRi, i.e. r4-r7. For other
1926 // subtargets, this is any GPR, i.e. r4-r11 or lr.
1927 //
1928 // If we don't insert a spill, we instead allocate an emergency spill
1929 // slot, which can be used by scavenging to spill an arbitrary register.
1930 //
1931 // We currently don't try to figure out whether any specific instruction
1932 // requires scavening an additional register.
1933 bool ExtraCSSpill = false;
1934
1935 if (AFI->isThumb1OnlyFunction()) {
1936 // For Thumb1-only targets, we need some low registers when we save and
1937 // restore the high registers (which aren't allocatable, but could be
1938 // used by inline assembly) because the push/pop instructions can not
1939 // access high registers. If necessary, we might need to push more low
1940 // registers to ensure that there is at least one free that can be used
1941 // for the saving & restoring, and preferably we should ensure that as
1942 // many as are needed are available so that fewer push/pop instructions
1943 // are required.
1944
1945 // Low registers which are not currently pushed, but could be (r4-r7).
1946 SmallVector<unsigned, 4> AvailableRegs;
1947
1948 // Unused argument registers (r0-r3) can be clobbered in the prologue for
1949 // free.
1950 int EntryRegDeficit = 0;
1951 for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
1952 if (!MF.getRegInfo().isLiveIn(Reg)) {
1953 --EntryRegDeficit;
1954 LLVM_DEBUG(dbgs()
1955 << printReg(Reg, TRI)
1956 << " is unused argument register, EntryRegDeficit = "
1957 << EntryRegDeficit << "\n");
1958 }
1959 }
1960
1961 // Unused return registers can be clobbered in the epilogue for free.
1962 int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
1963 LLVM_DEBUG(dbgs() << AFI->getReturnRegsCount()
1964 << " return regs used, ExitRegDeficit = "
1965 << ExitRegDeficit << "\n");
1966
1967 int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
1968 LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
1969
1970 // r4-r6 can be used in the prologue if they are pushed by the first push
1971 // instruction.
1972 for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
1973 if (SavedRegs.test(Reg)) {
1974 --RegDeficit;
1975 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
1976 << " is saved low register, RegDeficit = "
1977 << RegDeficit << "\n");
1978 } else {
1979 AvailableRegs.push_back(Reg);
1980 LLVM_DEBUG(
1981 dbgs()
1982 << printReg(Reg, TRI)
1983 << " is non-saved low register, adding to AvailableRegs\n");
1984 }
1985 }
1986
1987 // r7 can be used if it is not being used as the frame pointer.
1988 if (!HasFP) {
1989 if (SavedRegs.test(ARM::R7)) {
1990 --RegDeficit;
1991 LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
1992 << RegDeficit << "\n");
1993 } else {
1994 AvailableRegs.push_back(ARM::R7);
1995 LLVM_DEBUG(
1996 dbgs()
1997 << "%r7 is non-saved low register, adding to AvailableRegs\n");
1998 }
1999 }
2000
2001 // Each of r8-r11 needs to be copied to a low register, then pushed.
2002 for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2003 if (SavedRegs.test(Reg)) {
2004 ++RegDeficit;
2005 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2006 << " is saved high register, RegDeficit = "
2007 << RegDeficit << "\n");
2008 }
2009 }
2010
2011 // LR can only be used by PUSH, not POP, and can't be used at all if the
2012 // llvm.returnaddress intrinsic is used. This is only worth doing if we
2013 // are more limited at function entry than exit.
2014 if ((EntryRegDeficit > ExitRegDeficit) &&
2015 !(MF.getRegInfo().isLiveIn(ARM::LR) &&
2016 MF.getFrameInfo().isReturnAddressTaken())) {
2017 if (SavedRegs.test(ARM::LR)) {
2018 --RegDeficit;
2019 LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2020 << RegDeficit << "\n");
2021 } else {
2022 AvailableRegs.push_back(ARM::LR);
2023 LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2024 }
2025 }
2026
2027 // If there are more high registers that need pushing than low registers
2028 // available, push some more low registers so that we can use fewer push
2029 // instructions. This might not reduce RegDeficit all the way to zero,
2030 // because we can only guarantee that r4-r6 are available, but r8-r11 may
2031 // need saving.
2032 LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2033 for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
2034 unsigned Reg = AvailableRegs.pop_back_val();
2035 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2036 << " to make up reg deficit\n");
2037 SavedRegs.set(Reg);
2038 NumGPRSpills++;
2039 CS1Spilled = true;
2040 assert(!MRI.isReserved(Reg) && "Should not be reserved");
2041 if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
2042 ExtraCSSpill = true;
2043 UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
2044 if (Reg == ARM::LR)
2045 LRSpilled = true;
2046 }
2047 LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2048 << "\n");
2049 }
2050
2051 // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2052 // restore LR in that case.
2053 bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2054
2055 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2056 // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2057 if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2058 SavedRegs.set(ARM::LR);
2059 NumGPRSpills++;
2060 SmallVectorImpl<unsigned>::iterator LRPos;
2061 LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
2062 if (LRPos != UnspilledCS1GPRs.end())
2063 UnspilledCS1GPRs.erase(LRPos);
2064
2065 ForceLRSpill = false;
2066 if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
2067 !AFI->isThumb1OnlyFunction())
2068 ExtraCSSpill = true;
2069 }
2070
2071 // If stack and double are 8-byte aligned and we are spilling an odd number
2072 // of GPRs, spill one extra callee save GPR so we won't have to pad between
2073 // the integer and double callee save areas.
2074 LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2075 const Align TargetAlign = getStackAlign();
2076 if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
2077 if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2078 for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
2079 unsigned Reg = UnspilledCS1GPRs[i];
2080 // Don't spill high register if the function is thumb. In the case of
2081 // Windows on ARM, accept R11 (frame pointer)
2082 if (!AFI->isThumbFunction() ||
2083 (STI.isTargetWindows() && Reg == ARM::R11) ||
2084 isARMLowRegister(Reg) ||
2085 (Reg == ARM::LR && !ExpensiveLRRestore)) {
2086 SavedRegs.set(Reg);
2087 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2088 << " to make up alignment\n");
2089 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
2090 !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2091 ExtraCSSpill = true;
2092 break;
2093 }
2094 }
2095 } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
2096 unsigned Reg = UnspilledCS2GPRs.front();
2097 SavedRegs.set(Reg);
2098 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2099 << " to make up alignment\n");
2100 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
2101 ExtraCSSpill = true;
2102 }
2103 }
2104
2105 // Estimate if we might need to scavenge a register at some point in order
2106 // to materialize a stack offset. If so, either spill one additional
2107 // callee-saved register or reserve a special spill slot to facilitate
2108 // register scavenging. Thumb1 needs a spill slot for stack pointer
2109 // adjustments also, even when the frame itself is small.
2110 if (BigFrameOffsets && !ExtraCSSpill) {
2111 // If any non-reserved CS register isn't spilled, just spill one or two
2112 // extra. That should take care of it!
2113 unsigned NumExtras = TargetAlign.value() / 4;
2114 SmallVector<unsigned, 2> Extras;
2115 while (NumExtras && !UnspilledCS1GPRs.empty()) {
2116 unsigned Reg = UnspilledCS1GPRs.back();
2117 UnspilledCS1GPRs.pop_back();
2118 if (!MRI.isReserved(Reg) &&
2119 (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
2120 Extras.push_back(Reg);
2121 NumExtras--;
2122 }
2123 }
2124 // For non-Thumb1 functions, also check for hi-reg CS registers
2125 if (!AFI->isThumb1OnlyFunction()) {
2126 while (NumExtras && !UnspilledCS2GPRs.empty()) {
2127 unsigned Reg = UnspilledCS2GPRs.back();
2128 UnspilledCS2GPRs.pop_back();
2129 if (!MRI.isReserved(Reg)) {
2130 Extras.push_back(Reg);
2131 NumExtras--;
2132 }
2133 }
2134 }
2135 if (NumExtras == 0) {
2136 for (unsigned Reg : Extras) {
2137 SavedRegs.set(Reg);
2138 if (!MRI.isPhysRegUsed(Reg))
2139 ExtraCSSpill = true;
2140 }
2141 }
2142 if (!ExtraCSSpill && RS) {
2143 // Reserve a slot closest to SP or frame pointer.
2144 LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
2145 const TargetRegisterClass &RC = ARM::GPRRegClass;
2146 unsigned Size = TRI->getSpillSize(RC);
2147 Align Alignment = TRI->getSpillAlign(RC);
2148 RS->addScavengingFrameIndex(
2149 MFI.CreateStackObject(Size, Alignment, false));
2150 }
2151 }
2152 }
2153
2154 if (ForceLRSpill)
2155 SavedRegs.set(ARM::LR);
2156 AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
2157 }
2158
getCalleeSaves(const MachineFunction & MF,BitVector & SavedRegs) const2159 void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF,
2160 BitVector &SavedRegs) const {
2161 TargetFrameLowering::getCalleeSaves(MF, SavedRegs);
2162
2163 // If we have the "returned" parameter attribute which guarantees that we
2164 // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
2165 // record that fact for IPRA.
2166 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2167 if (AFI->getPreservesR0())
2168 SavedRegs.set(ARM::R0);
2169 }
2170
assignCalleeSavedSpillSlots(MachineFunction & MF,const TargetRegisterInfo * TRI,std::vector<CalleeSavedInfo> & CSI) const2171 bool ARMFrameLowering::assignCalleeSavedSpillSlots(
2172 MachineFunction &MF, const TargetRegisterInfo *TRI,
2173 std::vector<CalleeSavedInfo> &CSI) const {
2174 // For CMSE entry functions, handle floating-point context as if it was a
2175 // callee-saved register.
2176 if (STI.hasV8_1MMainlineOps() &&
2177 MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction()) {
2178 CSI.emplace_back(ARM::FPCXTNS);
2179 CSI.back().setRestored(false);
2180 }
2181
2182 return false;
2183 }
2184
2185 const TargetFrameLowering::SpillSlot *
getCalleeSavedSpillSlots(unsigned & NumEntries) const2186 ARMFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
2187 static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}};
2188 NumEntries = array_lengthof(FixedSpillOffsets);
2189 return FixedSpillOffsets;
2190 }
2191
eliminateCallFramePseudoInstr(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I) const2192 MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
2193 MachineFunction &MF, MachineBasicBlock &MBB,
2194 MachineBasicBlock::iterator I) const {
2195 const ARMBaseInstrInfo &TII =
2196 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2197 if (!hasReservedCallFrame(MF)) {
2198 // If we have alloca, convert as follows:
2199 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
2200 // ADJCALLSTACKUP -> add, sp, sp, amount
2201 MachineInstr &Old = *I;
2202 DebugLoc dl = Old.getDebugLoc();
2203 unsigned Amount = TII.getFrameSize(Old);
2204 if (Amount != 0) {
2205 // We need to keep the stack aligned properly. To do this, we round the
2206 // amount of space needed for the outgoing arguments up to the next
2207 // alignment boundary.
2208 Amount = alignSPAdjust(Amount);
2209
2210 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2211 assert(!AFI->isThumb1OnlyFunction() &&
2212 "This eliminateCallFramePseudoInstr does not support Thumb1!");
2213 bool isARM = !AFI->isThumbFunction();
2214
2215 // Replace the pseudo instruction with a new instruction...
2216 unsigned Opc = Old.getOpcode();
2217 int PIdx = Old.findFirstPredOperandIdx();
2218 ARMCC::CondCodes Pred =
2219 (PIdx == -1) ? ARMCC::AL
2220 : (ARMCC::CondCodes)Old.getOperand(PIdx).getImm();
2221 unsigned PredReg = TII.getFramePred(Old);
2222 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
2223 emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
2224 Pred, PredReg);
2225 } else {
2226 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
2227 emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
2228 Pred, PredReg);
2229 }
2230 }
2231 }
2232 return MBB.erase(I);
2233 }
2234
2235 /// Get the minimum constant for ARM that is greater than or equal to the
2236 /// argument. In ARM, constants can have any value that can be produced by
2237 /// rotating an 8-bit value to the right by an even number of bits within a
2238 /// 32-bit word.
alignToARMConstant(uint32_t Value)2239 static uint32_t alignToARMConstant(uint32_t Value) {
2240 unsigned Shifted = 0;
2241
2242 if (Value == 0)
2243 return 0;
2244
2245 while (!(Value & 0xC0000000)) {
2246 Value = Value << 2;
2247 Shifted += 2;
2248 }
2249
2250 bool Carry = (Value & 0x00FFFFFF);
2251 Value = ((Value & 0xFF000000) >> 24) + Carry;
2252
2253 if (Value & 0x0000100)
2254 Value = Value & 0x000001FC;
2255
2256 if (Shifted > 24)
2257 Value = Value >> (Shifted - 24);
2258 else
2259 Value = Value << (24 - Shifted);
2260
2261 return Value;
2262 }
2263
2264 // The stack limit in the TCB is set to this many bytes above the actual
2265 // stack limit.
2266 static const uint64_t kSplitStackAvailable = 256;
2267
2268 // Adjust the function prologue to enable split stacks. This currently only
2269 // supports android and linux.
2270 //
2271 // The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2272 // must be well defined in order to allow for consistent implementations of the
2273 // __morestack helper function. The ABI is also not a normal ABI in that it
2274 // doesn't follow the normal calling conventions because this allows the
2275 // prologue of each function to be optimized further.
2276 //
2277 // Currently, the ABI looks like (when calling __morestack)
2278 //
2279 // * r4 holds the minimum stack size requested for this function call
2280 // * r5 holds the stack size of the arguments to the function
2281 // * the beginning of the function is 3 instructions after the call to
2282 // __morestack
2283 //
2284 // Implementations of __morestack should use r4 to allocate a new stack, r5 to
2285 // place the arguments on to the new stack, and the 3-instruction knowledge to
2286 // jump directly to the body of the function when working on the new stack.
2287 //
2288 // An old (and possibly no longer compatible) implementation of __morestack for
2289 // ARM can be found at [1].
2290 //
2291 // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
adjustForSegmentedStacks(MachineFunction & MF,MachineBasicBlock & PrologueMBB) const2292 void ARMFrameLowering::adjustForSegmentedStacks(
2293 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2294 unsigned Opcode;
2295 unsigned CFIIndex;
2296 const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2297 bool Thumb = ST->isThumb();
2298
2299 // Sadly, this currently doesn't support varargs, platforms other than
2300 // android/linux. Note that thumb1/thumb2 are support for android/linux.
2301 if (MF.getFunction().isVarArg())
2302 report_fatal_error("Segmented stacks do not support vararg functions.");
2303 if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2304 report_fatal_error("Segmented stacks not supported on this platform.");
2305
2306 MachineFrameInfo &MFI = MF.getFrameInfo();
2307 MachineModuleInfo &MMI = MF.getMMI();
2308 MCContext &Context = MMI.getContext();
2309 const MCRegisterInfo *MRI = Context.getRegisterInfo();
2310 const ARMBaseInstrInfo &TII =
2311 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2312 ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
2313 DebugLoc DL;
2314
2315 uint64_t StackSize = MFI.getStackSize();
2316
2317 // Do not generate a prologue for leaf functions with a stack of size zero.
2318 // For non-leaf functions we have to allow for the possibility that the
2319 // callis to a non-split function, as in PR37807. This function could also
2320 // take the address of a non-split function. When the linker tries to adjust
2321 // its non-existent prologue, it would fail with an error. Mark the object
2322 // file so that such failures are not errors. See this Go language bug-report
2323 // https://go-review.googlesource.com/c/go/+/148819/
2324 if (StackSize == 0 && !MFI.hasTailCall()) {
2325 MF.getMMI().setHasNosplitStack(true);
2326 return;
2327 }
2328
2329 // Use R4 and R5 as scratch registers.
2330 // We save R4 and R5 before use and restore them before leaving the function.
2331 unsigned ScratchReg0 = ARM::R4;
2332 unsigned ScratchReg1 = ARM::R5;
2333 uint64_t AlignedStackSize;
2334
2335 MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
2336 MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
2337 MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
2338 MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
2339 MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
2340
2341 // Grab everything that reaches PrologueMBB to update there liveness as well.
2342 SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
2343 SmallVector<MachineBasicBlock *, 2> WalkList;
2344 WalkList.push_back(&PrologueMBB);
2345
2346 do {
2347 MachineBasicBlock *CurMBB = WalkList.pop_back_val();
2348 for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
2349 if (BeforePrologueRegion.insert(PredBB).second)
2350 WalkList.push_back(PredBB);
2351 }
2352 } while (!WalkList.empty());
2353
2354 // The order in that list is important.
2355 // The blocks will all be inserted before PrologueMBB using that order.
2356 // Therefore the block that should appear first in the CFG should appear
2357 // first in the list.
2358 MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
2359 PostStackMBB};
2360
2361 for (MachineBasicBlock *B : AddedBlocks)
2362 BeforePrologueRegion.insert(B);
2363
2364 for (const auto &LI : PrologueMBB.liveins()) {
2365 for (MachineBasicBlock *PredBB : BeforePrologueRegion)
2366 PredBB->addLiveIn(LI);
2367 }
2368
2369 // Remove the newly added blocks from the list, since we know
2370 // we do not have to do the following updates for them.
2371 for (MachineBasicBlock *B : AddedBlocks) {
2372 BeforePrologueRegion.erase(B);
2373 MF.insert(PrologueMBB.getIterator(), B);
2374 }
2375
2376 for (MachineBasicBlock *MBB : BeforePrologueRegion) {
2377 // Make sure the LiveIns are still sorted and unique.
2378 MBB->sortUniqueLiveIns();
2379 // Replace the edges to PrologueMBB by edges to the sequences
2380 // we are about to add.
2381 MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
2382 }
2383
2384 // The required stack size that is aligned to ARM constant criterion.
2385 AlignedStackSize = alignToARMConstant(StackSize);
2386
2387 // When the frame size is less than 256 we just compare the stack
2388 // boundary directly to the value of the stack pointer, per gcc.
2389 bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
2390
2391 // We will use two of the callee save registers as scratch registers so we
2392 // need to save those registers onto the stack.
2393 // We will use SR0 to hold stack limit and SR1 to hold the stack size
2394 // requested and arguments for __morestack().
2395 // SR0: Scratch Register #0
2396 // SR1: Scratch Register #1
2397 // push {SR0, SR1}
2398 if (Thumb) {
2399 BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
2400 .add(predOps(ARMCC::AL))
2401 .addReg(ScratchReg0)
2402 .addReg(ScratchReg1);
2403 } else {
2404 BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
2405 .addReg(ARM::SP, RegState::Define)
2406 .addReg(ARM::SP)
2407 .add(predOps(ARMCC::AL))
2408 .addReg(ScratchReg0)
2409 .addReg(ScratchReg1);
2410 }
2411
2412 // Emit the relevant DWARF information about the change in stack pointer as
2413 // well as where to find both r4 and r5 (the callee-save registers)
2414 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8));
2415 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2416 .addCFIIndex(CFIIndex);
2417 CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
2418 nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
2419 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2420 .addCFIIndex(CFIIndex);
2421 CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
2422 nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
2423 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2424 .addCFIIndex(CFIIndex);
2425
2426 // mov SR1, sp
2427 if (Thumb) {
2428 BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
2429 .addReg(ARM::SP)
2430 .add(predOps(ARMCC::AL));
2431 } else if (CompareStackPointer) {
2432 BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
2433 .addReg(ARM::SP)
2434 .add(predOps(ARMCC::AL))
2435 .add(condCodeOp());
2436 }
2437
2438 // sub SR1, sp, #StackSize
2439 if (!CompareStackPointer && Thumb) {
2440 BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
2441 .add(condCodeOp())
2442 .addReg(ScratchReg1)
2443 .addImm(AlignedStackSize)
2444 .add(predOps(ARMCC::AL));
2445 } else if (!CompareStackPointer) {
2446 BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
2447 .addReg(ARM::SP)
2448 .addImm(AlignedStackSize)
2449 .add(predOps(ARMCC::AL))
2450 .add(condCodeOp());
2451 }
2452
2453 if (Thumb && ST->isThumb1Only()) {
2454 unsigned PCLabelId = ARMFI->createPICLabelUId();
2455 ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(
2456 MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
2457 MachineConstantPool *MCP = MF.getConstantPool();
2458 unsigned CPI = MCP->getConstantPoolIndex(NewCPV, Align(4));
2459
2460 // ldr SR0, [pc, offset(STACK_LIMIT)]
2461 BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
2462 .addConstantPoolIndex(CPI)
2463 .add(predOps(ARMCC::AL));
2464
2465 // ldr SR0, [SR0]
2466 BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
2467 .addReg(ScratchReg0)
2468 .addImm(0)
2469 .add(predOps(ARMCC::AL));
2470 } else {
2471 // Get TLS base address from the coprocessor
2472 // mrc p15, #0, SR0, c13, c0, #3
2473 BuildMI(McrMBB, DL, TII.get(Thumb ? ARM::t2MRC : ARM::MRC),
2474 ScratchReg0)
2475 .addImm(15)
2476 .addImm(0)
2477 .addImm(13)
2478 .addImm(0)
2479 .addImm(3)
2480 .add(predOps(ARMCC::AL));
2481
2482 // Use the last tls slot on android and a private field of the TCP on linux.
2483 assert(ST->isTargetAndroid() || ST->isTargetLinux());
2484 unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
2485
2486 // Get the stack limit from the right offset
2487 // ldr SR0, [sr0, #4 * TlsOffset]
2488 BuildMI(GetMBB, DL, TII.get(Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
2489 ScratchReg0)
2490 .addReg(ScratchReg0)
2491 .addImm(4 * TlsOffset)
2492 .add(predOps(ARMCC::AL));
2493 }
2494
2495 // Compare stack limit with stack size requested.
2496 // cmp SR0, SR1
2497 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
2498 BuildMI(GetMBB, DL, TII.get(Opcode))
2499 .addReg(ScratchReg0)
2500 .addReg(ScratchReg1)
2501 .add(predOps(ARMCC::AL));
2502
2503 // This jump is taken if StackLimit < SP - stack required.
2504 Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
2505 BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB)
2506 .addImm(ARMCC::LO)
2507 .addReg(ARM::CPSR);
2508
2509
2510 // Calling __morestack(StackSize, Size of stack arguments).
2511 // __morestack knows that the stack size requested is in SR0(r4)
2512 // and amount size of stack arguments is in SR1(r5).
2513
2514 // Pass first argument for the __morestack by Scratch Register #0.
2515 // The amount size of stack required
2516 if (Thumb) {
2517 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
2518 .add(condCodeOp())
2519 .addImm(AlignedStackSize)
2520 .add(predOps(ARMCC::AL));
2521 } else {
2522 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
2523 .addImm(AlignedStackSize)
2524 .add(predOps(ARMCC::AL))
2525 .add(condCodeOp());
2526 }
2527 // Pass second argument for the __morestack by Scratch Register #1.
2528 // The amount size of stack consumed to save function arguments.
2529 if (Thumb) {
2530 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
2531 .add(condCodeOp())
2532 .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
2533 .add(predOps(ARMCC::AL));
2534 } else {
2535 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
2536 .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
2537 .add(predOps(ARMCC::AL))
2538 .add(condCodeOp());
2539 }
2540
2541 // push {lr} - Save return address of this function.
2542 if (Thumb) {
2543 BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
2544 .add(predOps(ARMCC::AL))
2545 .addReg(ARM::LR);
2546 } else {
2547 BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
2548 .addReg(ARM::SP, RegState::Define)
2549 .addReg(ARM::SP)
2550 .add(predOps(ARMCC::AL))
2551 .addReg(ARM::LR);
2552 }
2553
2554 // Emit the DWARF info about the change in stack as well as where to find the
2555 // previous link register
2556 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12));
2557 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2558 .addCFIIndex(CFIIndex);
2559 CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
2560 nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
2561 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2562 .addCFIIndex(CFIIndex);
2563
2564 // Call __morestack().
2565 if (Thumb) {
2566 BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
2567 .add(predOps(ARMCC::AL))
2568 .addExternalSymbol("__morestack");
2569 } else {
2570 BuildMI(AllocMBB, DL, TII.get(ARM::BL))
2571 .addExternalSymbol("__morestack");
2572 }
2573
2574 // pop {lr} - Restore return address of this original function.
2575 if (Thumb) {
2576 if (ST->isThumb1Only()) {
2577 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
2578 .add(predOps(ARMCC::AL))
2579 .addReg(ScratchReg0);
2580 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
2581 .addReg(ScratchReg0)
2582 .add(predOps(ARMCC::AL));
2583 } else {
2584 BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
2585 .addReg(ARM::LR, RegState::Define)
2586 .addReg(ARM::SP, RegState::Define)
2587 .addReg(ARM::SP)
2588 .addImm(4)
2589 .add(predOps(ARMCC::AL));
2590 }
2591 } else {
2592 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2593 .addReg(ARM::SP, RegState::Define)
2594 .addReg(ARM::SP)
2595 .add(predOps(ARMCC::AL))
2596 .addReg(ARM::LR);
2597 }
2598
2599 // Restore SR0 and SR1 in case of __morestack() was called.
2600 // __morestack() will skip PostStackMBB block so we need to restore
2601 // scratch registers from here.
2602 // pop {SR0, SR1}
2603 if (Thumb) {
2604 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
2605 .add(predOps(ARMCC::AL))
2606 .addReg(ScratchReg0)
2607 .addReg(ScratchReg1);
2608 } else {
2609 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
2610 .addReg(ARM::SP, RegState::Define)
2611 .addReg(ARM::SP)
2612 .add(predOps(ARMCC::AL))
2613 .addReg(ScratchReg0)
2614 .addReg(ScratchReg1);
2615 }
2616
2617 // Update the CFA offset now that we've popped
2618 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
2619 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2620 .addCFIIndex(CFIIndex);
2621
2622 // Return from this function.
2623 BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
2624
2625 // Restore SR0 and SR1 in case of __morestack() was not called.
2626 // pop {SR0, SR1}
2627 if (Thumb) {
2628 BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
2629 .add(predOps(ARMCC::AL))
2630 .addReg(ScratchReg0)
2631 .addReg(ScratchReg1);
2632 } else {
2633 BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
2634 .addReg(ARM::SP, RegState::Define)
2635 .addReg(ARM::SP)
2636 .add(predOps(ARMCC::AL))
2637 .addReg(ScratchReg0)
2638 .addReg(ScratchReg1);
2639 }
2640
2641 // Update the CFA offset now that we've popped
2642 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
2643 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2644 .addCFIIndex(CFIIndex);
2645
2646 // Tell debuggers that r4 and r5 are now the same as they were in the
2647 // previous function, that they're the "Same Value".
2648 CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
2649 nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
2650 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2651 .addCFIIndex(CFIIndex);
2652 CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
2653 nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
2654 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
2655 .addCFIIndex(CFIIndex);
2656
2657 // Organizing MBB lists
2658 PostStackMBB->addSuccessor(&PrologueMBB);
2659
2660 AllocMBB->addSuccessor(PostStackMBB);
2661
2662 GetMBB->addSuccessor(PostStackMBB);
2663 GetMBB->addSuccessor(AllocMBB);
2664
2665 McrMBB->addSuccessor(GetMBB);
2666
2667 PrevStackMBB->addSuccessor(McrMBB);
2668
2669 #ifdef EXPENSIVE_CHECKS
2670 MF.verify();
2671 #endif
2672 }
2673