1 //===- Thumb1FrameLowering.cpp - Thumb1 Frame Information -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the Thumb1 implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "Thumb1FrameLowering.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMBaseRegisterInfo.h"
16 #include "ARMMachineFunctionInfo.h"
17 #include "ARMSubtarget.h"
18 #include "Thumb1InstrInfo.h"
19 #include "ThumbRegisterInfo.h"
20 #include "Utils/ARMBaseInfo.h"
21 #include "llvm/ADT/BitVector.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/CodeGen/LivePhysRegs.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineFrameInfo.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineModuleInfo.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/CodeGen/TargetInstrInfo.h"
34 #include "llvm/CodeGen/TargetOpcodes.h"
35 #include "llvm/CodeGen/TargetSubtargetInfo.h"
36 #include "llvm/IR/DebugLoc.h"
37 #include "llvm/MC/MCContext.h"
38 #include "llvm/MC/MCDwarf.h"
39 #include "llvm/MC/MCRegisterInfo.h"
40 #include "llvm/Support/Compiler.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/MathExtras.h"
43 #include <bitset>
44 #include <cassert>
45 #include <iterator>
46 #include <vector>
47 
48 using namespace llvm;
49 
50 Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti)
51     : ARMFrameLowering(sti) {}
52 
53 bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{
54   const MachineFrameInfo &MFI = MF.getFrameInfo();
55   unsigned CFSize = MFI.getMaxCallFrameSize();
56   // It's not always a good idea to include the call frame as part of the
57   // stack frame. ARM (especially Thumb) has small immediate offset to
58   // address the stack frame. So a large call frame can cause poor codegen
59   // and may even makes it impossible to scavenge a register.
60   if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
61     return false;
62 
63   return !MFI.hasVarSizedObjects();
64 }
65 
66 static void
67 emitPrologueEpilogueSPUpdate(MachineBasicBlock &MBB,
68                              MachineBasicBlock::iterator &MBBI,
69                              const TargetInstrInfo &TII, const DebugLoc &dl,
70                              const ThumbRegisterInfo &MRI, int NumBytes,
71                              unsigned ScratchReg, unsigned MIFlags) {
72   // If it would take more than three instructions to adjust the stack pointer
73   // using tADDspi/tSUBspi, load an immediate instead.
74   if (std::abs(NumBytes) > 508 * 3) {
75     // We use a different codepath here from the normal
76     // emitThumbRegPlusImmediate so we don't have to deal with register
77     // scavenging. (Scavenging could try to use the emergency spill slot
78     // before we've actually finished setting up the stack.)
79     if (ScratchReg == ARM::NoRegister)
80       report_fatal_error("Failed to emit Thumb1 stack adjustment");
81     MachineFunction &MF = *MBB.getParent();
82     const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>();
83     if (ST.genExecuteOnly()) {
84       unsigned XOInstr = ST.useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
85       BuildMI(MBB, MBBI, dl, TII.get(XOInstr), ScratchReg)
86           .addImm(NumBytes).setMIFlags(MIFlags);
87     } else {
88       MRI.emitLoadConstPool(MBB, MBBI, dl, ScratchReg, 0, NumBytes, ARMCC::AL,
89                             0, MIFlags);
90     }
91     BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDhirr), ARM::SP)
92         .addReg(ARM::SP)
93         .addReg(ScratchReg, RegState::Kill)
94         .add(predOps(ARMCC::AL))
95         .setMIFlags(MIFlags);
96     return;
97   }
98   // FIXME: This is assuming the heuristics in emitThumbRegPlusImmediate
99   // won't change.
100   emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
101                             MRI, MIFlags);
102 
103 }
104 
105 static void emitCallSPUpdate(MachineBasicBlock &MBB,
106                              MachineBasicBlock::iterator &MBBI,
107                              const TargetInstrInfo &TII, const DebugLoc &dl,
108                              const ThumbRegisterInfo &MRI, int NumBytes,
109                              unsigned MIFlags = MachineInstr::NoFlags) {
110   emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
111                             MRI, MIFlags);
112 }
113 
114 
115 MachineBasicBlock::iterator Thumb1FrameLowering::
116 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
117                               MachineBasicBlock::iterator I) const {
118   const Thumb1InstrInfo &TII =
119       *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
120   const ThumbRegisterInfo *RegInfo =
121       static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
122   if (!hasReservedCallFrame(MF)) {
123     // If we have alloca, convert as follows:
124     // ADJCALLSTACKDOWN -> sub, sp, sp, amount
125     // ADJCALLSTACKUP   -> add, sp, sp, amount
126     MachineInstr &Old = *I;
127     DebugLoc dl = Old.getDebugLoc();
128     unsigned Amount = TII.getFrameSize(Old);
129     if (Amount != 0) {
130       // We need to keep the stack aligned properly.  To do this, we round the
131       // amount of space needed for the outgoing arguments up to the next
132       // alignment boundary.
133       Amount = alignTo(Amount, getStackAlign());
134 
135       // Replace the pseudo instruction with a new instruction...
136       unsigned Opc = Old.getOpcode();
137       if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
138         emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount);
139       } else {
140         assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
141         emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, Amount);
142       }
143     }
144   }
145   return MBB.erase(I);
146 }
147 
148 void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
149                                        MachineBasicBlock &MBB) const {
150   MachineBasicBlock::iterator MBBI = MBB.begin();
151   MachineFrameInfo &MFI = MF.getFrameInfo();
152   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
153   MachineModuleInfo &MMI = MF.getMMI();
154   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
155   const ThumbRegisterInfo *RegInfo =
156       static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
157   const Thumb1InstrInfo &TII =
158       *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
159 
160   unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
161   unsigned NumBytes = MFI.getStackSize();
162   assert(NumBytes >= ArgRegsSaveSize &&
163          "ArgRegsSaveSize is included in NumBytes");
164   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
165 
166   // Debug location must be unknown since the first debug location is used
167   // to determine the end of the prologue.
168   DebugLoc dl;
169 
170   Register FramePtr = RegInfo->getFrameRegister(MF);
171   Register BasePtr = RegInfo->getBaseRegister();
172   int CFAOffset = 0;
173 
174   // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
175   NumBytes = (NumBytes + 3) & ~3;
176   MFI.setStackSize(NumBytes);
177 
178   // Determine the sizes of each callee-save spill areas and record which frame
179   // belongs to which callee-save spill areas.
180   unsigned FRSize = 0, GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
181   int FramePtrSpillFI = 0;
182 
183   if (ArgRegsSaveSize) {
184     emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize,
185                                  ARM::NoRegister, MachineInstr::FrameSetup);
186     CFAOffset += ArgRegsSaveSize;
187     unsigned CFIIndex =
188         MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
189     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
190         .addCFIIndex(CFIIndex)
191         .setMIFlags(MachineInstr::FrameSetup);
192   }
193 
194   if (!AFI->hasStackFrame()) {
195     if (NumBytes - ArgRegsSaveSize != 0) {
196       emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo,
197                                    -(NumBytes - ArgRegsSaveSize),
198                                    ARM::NoRegister, MachineInstr::FrameSetup);
199       CFAOffset += NumBytes - ArgRegsSaveSize;
200       unsigned CFIIndex = MF.addFrameInst(
201           MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
202       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
203           .addCFIIndex(CFIIndex)
204           .setMIFlags(MachineInstr::FrameSetup);
205     }
206     return;
207   }
208 
209   bool HasFrameRecordArea = hasFP(MF) && ARM::hGPRRegClass.contains(FramePtr);
210 
211   for (const CalleeSavedInfo &I : CSI) {
212     Register Reg = I.getReg();
213     int FI = I.getFrameIdx();
214     if (Reg == FramePtr)
215       FramePtrSpillFI = FI;
216     switch (Reg) {
217     case ARM::R11:
218       if (HasFrameRecordArea) {
219         FRSize += 4;
220         break;
221       }
222       [[fallthrough]];
223     case ARM::R8:
224     case ARM::R9:
225     case ARM::R10:
226       if (STI.splitFramePushPop(MF)) {
227         GPRCS2Size += 4;
228         break;
229       }
230       [[fallthrough]];
231     case ARM::LR:
232       if (HasFrameRecordArea) {
233         FRSize += 4;
234         break;
235       }
236       [[fallthrough]];
237     case ARM::R4:
238     case ARM::R5:
239     case ARM::R6:
240     case ARM::R7:
241       GPRCS1Size += 4;
242       break;
243     default:
244       DPRCSSize += 8;
245     }
246   }
247 
248   MachineBasicBlock::iterator FRPush, GPRCS1Push, GPRCS2Push;
249   if (HasFrameRecordArea) {
250     // Skip Frame Record setup:
251     //   push {lr}
252     //   mov lr, r11
253     //   push {lr}
254     std::advance(MBBI, 2);
255     FRPush = MBBI++;
256   }
257 
258   if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
259     GPRCS1Push = MBBI;
260     ++MBBI;
261   }
262 
263   // Find last push instruction for GPRCS2 - spilling of high registers
264   // (r8-r11) could consist of multiple tPUSH and tMOVr instructions.
265   while (true) {
266     MachineBasicBlock::iterator OldMBBI = MBBI;
267     // Skip a run of tMOVr instructions
268     while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr &&
269            MBBI->getFlag(MachineInstr::FrameSetup))
270       MBBI++;
271     if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH &&
272         MBBI->getFlag(MachineInstr::FrameSetup)) {
273       GPRCS2Push = MBBI;
274       MBBI++;
275     } else {
276       // We have reached an instruction which is not a push, so the previous
277       // run of tMOVr instructions (which may have been empty) was not part of
278       // the prologue. Reset MBBI back to the last PUSH of the prologue.
279       MBBI = OldMBBI;
280       break;
281     }
282   }
283 
284   // Determine starting offsets of spill areas.
285   unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize -
286                          (FRSize + GPRCS1Size + GPRCS2Size + DPRCSSize);
287   unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
288   unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
289   bool HasFP = hasFP(MF);
290   if (HasFP)
291     AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
292                                 NumBytes);
293   if (HasFrameRecordArea)
294     AFI->setFrameRecordSavedAreaSize(FRSize);
295   AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
296   AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
297   AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
298   NumBytes = DPRCSOffset;
299 
300   int FramePtrOffsetInBlock = 0;
301   unsigned adjustedGPRCS1Size = GPRCS1Size;
302   if (GPRCS1Size > 0 && GPRCS2Size == 0 &&
303       tryFoldSPUpdateIntoPushPop(STI, MF, &*(GPRCS1Push), NumBytes)) {
304     FramePtrOffsetInBlock = NumBytes;
305     adjustedGPRCS1Size += NumBytes;
306     NumBytes = 0;
307   }
308   CFAOffset += adjustedGPRCS1Size;
309 
310   // Adjust FP so it point to the stack slot that contains the previous FP.
311   if (HasFP) {
312     MachineBasicBlock::iterator AfterPush =
313         HasFrameRecordArea ? std::next(FRPush) : std::next(GPRCS1Push);
314     if (HasFrameRecordArea) {
315       // We have just finished pushing the previous FP into the stack,
316       // so simply capture the SP value as the new Frame Pointer.
317       BuildMI(MBB, AfterPush, dl, TII.get(ARM::tMOVr), FramePtr)
318           .addReg(ARM::SP)
319           .setMIFlags(MachineInstr::FrameSetup)
320           .add(predOps(ARMCC::AL));
321     } else {
322       FramePtrOffsetInBlock +=
323           MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
324       BuildMI(MBB, AfterPush, dl, TII.get(ARM::tADDrSPi), FramePtr)
325           .addReg(ARM::SP)
326           .addImm(FramePtrOffsetInBlock / 4)
327           .setMIFlags(MachineInstr::FrameSetup)
328           .add(predOps(ARMCC::AL));
329     }
330 
331     if(FramePtrOffsetInBlock) {
332       unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
333           nullptr, MRI->getDwarfRegNum(FramePtr, true), (CFAOffset - FramePtrOffsetInBlock)));
334       BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
335           .addCFIIndex(CFIIndex)
336           .setMIFlags(MachineInstr::FrameSetup);
337     } else {
338       unsigned CFIIndex =
339           MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
340               nullptr, MRI->getDwarfRegNum(FramePtr, true)));
341       BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
342           .addCFIIndex(CFIIndex)
343           .setMIFlags(MachineInstr::FrameSetup);
344     }
345     if (NumBytes > 508)
346       // If offset is > 508 then sp cannot be adjusted in a single instruction,
347       // try restoring from fp instead.
348       AFI->setShouldRestoreSPFromFP(true);
349   }
350 
351   // Emit call frame information for the callee-saved low registers.
352   if (GPRCS1Size > 0) {
353     MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
354     if (adjustedGPRCS1Size) {
355       unsigned CFIIndex =
356           MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
357       BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
358           .addCFIIndex(CFIIndex)
359           .setMIFlags(MachineInstr::FrameSetup);
360     }
361     for (const CalleeSavedInfo &I : CSI) {
362       Register Reg = I.getReg();
363       int FI = I.getFrameIdx();
364       switch (Reg) {
365       case ARM::R8:
366       case ARM::R9:
367       case ARM::R10:
368       case ARM::R11:
369       case ARM::R12:
370         if (STI.splitFramePushPop(MF))
371           break;
372         [[fallthrough]];
373       case ARM::R0:
374       case ARM::R1:
375       case ARM::R2:
376       case ARM::R3:
377       case ARM::R4:
378       case ARM::R5:
379       case ARM::R6:
380       case ARM::R7:
381       case ARM::LR:
382         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
383             nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
384         BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
385             .addCFIIndex(CFIIndex)
386             .setMIFlags(MachineInstr::FrameSetup);
387         break;
388       }
389     }
390   }
391 
392   // Emit call frame information for the callee-saved high registers.
393   if (GPRCS2Size > 0) {
394     MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
395     for (auto &I : CSI) {
396       Register Reg = I.getReg();
397       int FI = I.getFrameIdx();
398       switch (Reg) {
399       case ARM::R8:
400       case ARM::R9:
401       case ARM::R10:
402       case ARM::R11:
403       case ARM::R12: {
404         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
405             nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
406         BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
407             .addCFIIndex(CFIIndex)
408             .setMIFlags(MachineInstr::FrameSetup);
409         break;
410       }
411       default:
412         break;
413       }
414     }
415   }
416 
417   if (NumBytes) {
418     // Insert it after all the callee-save spills.
419     //
420     // For a large stack frame, we might need a scratch register to store
421     // the size of the frame.  We know all callee-save registers are free
422     // at this point in the prologue, so pick one.
423     unsigned ScratchRegister = ARM::NoRegister;
424     for (auto &I : CSI) {
425       Register Reg = I.getReg();
426       if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) {
427         ScratchRegister = Reg;
428         break;
429       }
430     }
431     emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
432                                  ScratchRegister, MachineInstr::FrameSetup);
433     if (!HasFP) {
434       CFAOffset += NumBytes;
435       unsigned CFIIndex = MF.addFrameInst(
436           MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
437       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
438           .addCFIIndex(CFIIndex)
439           .setMIFlags(MachineInstr::FrameSetup);
440     }
441   }
442 
443   if (STI.isTargetELF() && HasFP)
444     MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
445                             AFI->getFramePtrSpillOffset());
446 
447   AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
448   AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
449   AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
450 
451   if (RegInfo->hasStackRealignment(MF)) {
452     const unsigned NrBitsToZero = Log2(MFI.getMaxAlign());
453     // Emit the following sequence, using R4 as a temporary, since we cannot use
454     // SP as a source or destination register for the shifts:
455     // mov  r4, sp
456     // lsrs r4, r4, #NrBitsToZero
457     // lsls r4, r4, #NrBitsToZero
458     // mov  sp, r4
459     BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
460       .addReg(ARM::SP, RegState::Kill)
461       .add(predOps(ARMCC::AL));
462 
463     BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSRri), ARM::R4)
464       .addDef(ARM::CPSR)
465       .addReg(ARM::R4, RegState::Kill)
466       .addImm(NrBitsToZero)
467       .add(predOps(ARMCC::AL));
468 
469     BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSLri), ARM::R4)
470       .addDef(ARM::CPSR)
471       .addReg(ARM::R4, RegState::Kill)
472       .addImm(NrBitsToZero)
473       .add(predOps(ARMCC::AL));
474 
475     BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
476       .addReg(ARM::R4, RegState::Kill)
477       .add(predOps(ARMCC::AL));
478 
479     AFI->setShouldRestoreSPFromFP(true);
480   }
481 
482   // If we need a base pointer, set it up here. It's whatever the value
483   // of the stack pointer is at this point. Any variable size objects
484   // will be allocated after this, so we can still use the base pointer
485   // to reference locals.
486   if (RegInfo->hasBasePointer(MF))
487     BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr)
488         .addReg(ARM::SP)
489         .add(predOps(ARMCC::AL));
490 
491   // If the frame has variable sized objects then the epilogue must restore
492   // the sp from fp. We can assume there's an FP here since hasFP already
493   // checks for hasVarSizedObjects.
494   if (MFI.hasVarSizedObjects())
495     AFI->setShouldRestoreSPFromFP(true);
496 
497   // In some cases, virtual registers have been introduced, e.g. by uses of
498   // emitThumbRegPlusImmInReg.
499   MF.getProperties().reset(MachineFunctionProperties::Property::NoVRegs);
500 }
501 
502 void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
503                                    MachineBasicBlock &MBB) const {
504   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
505   DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
506   MachineFrameInfo &MFI = MF.getFrameInfo();
507   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
508   const ThumbRegisterInfo *RegInfo =
509       static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
510   const Thumb1InstrInfo &TII =
511       *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
512 
513   unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
514   int NumBytes = (int)MFI.getStackSize();
515   assert((unsigned)NumBytes >= ArgRegsSaveSize &&
516          "ArgRegsSaveSize is included in NumBytes");
517   Register FramePtr = RegInfo->getFrameRegister(MF);
518 
519   if (!AFI->hasStackFrame()) {
520     if (NumBytes - ArgRegsSaveSize != 0)
521       emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo,
522                                    NumBytes - ArgRegsSaveSize, ARM::NoRegister,
523                                    MachineInstr::FrameDestroy);
524   } else {
525     // Unwind MBBI to point to first LDR / VLDRD.
526     if (MBBI != MBB.begin()) {
527       do
528         --MBBI;
529       while (MBBI != MBB.begin() && MBBI->getFlag(MachineInstr::FrameDestroy));
530       if (!MBBI->getFlag(MachineInstr::FrameDestroy))
531         ++MBBI;
532     }
533 
534     // Move SP to start of FP callee save spill area.
535     NumBytes -= (AFI->getFrameRecordSavedAreaSize() +
536                  AFI->getGPRCalleeSavedArea1Size() +
537                  AFI->getGPRCalleeSavedArea2Size() +
538                  AFI->getDPRCalleeSavedAreaSize() +
539                  ArgRegsSaveSize);
540 
541     if (AFI->shouldRestoreSPFromFP()) {
542       NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
543       // Reset SP based on frame pointer only if the stack frame extends beyond
544       // frame pointer stack slot, the target is ELF and the function has FP, or
545       // the target uses var sized objects.
546       if (NumBytes) {
547         assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
548                "No scratch register to restore SP from FP!");
549         emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
550                                   TII, *RegInfo, MachineInstr::FrameDestroy);
551         BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
552             .addReg(ARM::R4)
553             .add(predOps(ARMCC::AL))
554             .setMIFlag(MachineInstr::FrameDestroy);
555       } else
556         BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
557             .addReg(FramePtr)
558             .add(predOps(ARMCC::AL))
559             .setMIFlag(MachineInstr::FrameDestroy);
560     } else {
561       // For a large stack frame, we might need a scratch register to store
562       // the size of the frame.  We know all callee-save registers are free
563       // at this point in the epilogue, so pick one.
564       unsigned ScratchRegister = ARM::NoRegister;
565       bool HasFP = hasFP(MF);
566       for (auto &I : MFI.getCalleeSavedInfo()) {
567         Register Reg = I.getReg();
568         if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) {
569           ScratchRegister = Reg;
570           break;
571         }
572       }
573       if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET &&
574           &MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
575         MachineBasicBlock::iterator PMBBI = std::prev(MBBI);
576         if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*PMBBI, NumBytes))
577           emitPrologueEpilogueSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes,
578                                        ScratchRegister, MachineInstr::FrameDestroy);
579       } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
580         emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes,
581                                      ScratchRegister, MachineInstr::FrameDestroy);
582     }
583   }
584 
585   if (needPopSpecialFixUp(MF)) {
586     bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true);
587     (void)Done;
588     assert(Done && "Emission of the special fixup failed!?");
589   }
590 }
591 
592 bool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
593   if (!needPopSpecialFixUp(*MBB.getParent()))
594     return true;
595 
596   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
597   return emitPopSpecialFixUp(*TmpMBB, /* DoIt */ false);
598 }
599 
600 bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const {
601   ARMFunctionInfo *AFI =
602       const_cast<MachineFunction *>(&MF)->getInfo<ARMFunctionInfo>();
603   if (AFI->getArgRegsSaveSize())
604     return true;
605 
606   // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up.
607   for (const CalleeSavedInfo &CSI : MF.getFrameInfo().getCalleeSavedInfo())
608     if (CSI.getReg() == ARM::LR)
609       return true;
610 
611   return false;
612 }
613 
614 static void findTemporariesForLR(const BitVector &GPRsNoLRSP,
615                                  const BitVector &PopFriendly,
616                                  const LivePhysRegs &UsedRegs, unsigned &PopReg,
617                                  unsigned &TmpReg, MachineRegisterInfo &MRI) {
618   PopReg = TmpReg = 0;
619   for (auto Reg : GPRsNoLRSP.set_bits()) {
620     if (UsedRegs.available(MRI, Reg)) {
621       // Remember the first pop-friendly register and exit.
622       if (PopFriendly.test(Reg)) {
623         PopReg = Reg;
624         TmpReg = 0;
625         break;
626       }
627       // Otherwise, remember that the register will be available to
628       // save a pop-friendly register.
629       TmpReg = Reg;
630     }
631   }
632 }
633 
634 bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
635                                               bool DoIt) const {
636   MachineFunction &MF = *MBB.getParent();
637   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
638   unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
639   const TargetInstrInfo &TII = *STI.getInstrInfo();
640   const ThumbRegisterInfo *RegInfo =
641       static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
642 
643   // If MBBI is a return instruction, or is a tPOP followed by a return
644   // instruction in the successor BB, we may be able to directly restore
645   // LR in the PC.
646   // This is only possible with v5T ops (v4T can't change the Thumb bit via
647   // a POP PC instruction), and only if we do not need to emit any SP update.
648   // Otherwise, we need a temporary register to pop the value
649   // and copy that value into LR.
650   auto MBBI = MBB.getFirstTerminator();
651   bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize;
652   if (CanRestoreDirectly) {
653     if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB)
654       CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET ||
655                             MBBI->getOpcode() == ARM::tPOP_RET);
656     else {
657       auto MBBI_prev = MBBI;
658       MBBI_prev--;
659       assert(MBBI_prev->getOpcode() == ARM::tPOP);
660       assert(MBB.succ_size() == 1);
661       if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET)
662         MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET.
663       else
664         CanRestoreDirectly = false;
665     }
666   }
667 
668   if (CanRestoreDirectly) {
669     if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET)
670       return true;
671     MachineInstrBuilder MIB =
672         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET))
673             .add(predOps(ARMCC::AL))
674             .setMIFlag(MachineInstr::FrameDestroy);
675     // Copy implicit ops and popped registers, if any.
676     for (auto MO: MBBI->operands())
677       if (MO.isReg() && (MO.isImplicit() || MO.isDef()))
678         MIB.add(MO);
679     MIB.addReg(ARM::PC, RegState::Define);
680     // Erase the old instruction (tBX_RET or tPOP).
681     MBB.erase(MBBI);
682     return true;
683   }
684 
685   // Look for a temporary register to use.
686   // First, compute the liveness information.
687   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
688   LivePhysRegs UsedRegs(TRI);
689   UsedRegs.addLiveOuts(MBB);
690   // The semantic of pristines changed recently and now,
691   // the callee-saved registers that are touched in the function
692   // are not part of the pristines set anymore.
693   // Add those callee-saved now.
694   const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
695   for (unsigned i = 0; CSRegs[i]; ++i)
696     UsedRegs.addReg(CSRegs[i]);
697 
698   DebugLoc dl = DebugLoc();
699   if (MBBI != MBB.end()) {
700     dl = MBBI->getDebugLoc();
701     auto InstUpToMBBI = MBB.end();
702     while (InstUpToMBBI != MBBI)
703       // The pre-decrement is on purpose here.
704       // We want to have the liveness right before MBBI.
705       UsedRegs.stepBackward(*--InstUpToMBBI);
706   }
707 
708   // Look for a register that can be directly use in the POP.
709   unsigned PopReg = 0;
710   // And some temporary register, just in case.
711   unsigned TemporaryReg = 0;
712   BitVector PopFriendly =
713       TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::tGPRRegClassID));
714   // R7 may be used as a frame pointer, hence marked as not generally
715   // allocatable, however there's no reason to not use it as a temporary for
716   // restoring LR.
717   if (STI.getFramePointerReg() == ARM::R7)
718     PopFriendly.set(ARM::R7);
719 
720   assert(PopFriendly.any() && "No allocatable pop-friendly register?!");
721   // Rebuild the GPRs from the high registers because they are removed
722   // form the GPR reg class for thumb1.
723   BitVector GPRsNoLRSP =
724       TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::hGPRRegClassID));
725   GPRsNoLRSP |= PopFriendly;
726   GPRsNoLRSP.reset(ARM::LR);
727   GPRsNoLRSP.reset(ARM::SP);
728   GPRsNoLRSP.reset(ARM::PC);
729   findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg,
730                        MF.getRegInfo());
731 
732   // If we couldn't find a pop-friendly register, try restoring LR before
733   // popping the other callee-saved registers, so we could use one of them as a
734   // temporary.
735   bool UseLDRSP = false;
736   if (!PopReg && MBBI != MBB.begin()) {
737     auto PrevMBBI = MBBI;
738     PrevMBBI--;
739     if (PrevMBBI->getOpcode() == ARM::tPOP) {
740       UsedRegs.stepBackward(*PrevMBBI);
741       findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg,
742                            TemporaryReg, MF.getRegInfo());
743       if (PopReg) {
744         MBBI = PrevMBBI;
745         UseLDRSP = true;
746       }
747     }
748   }
749 
750   if (!DoIt && !PopReg && !TemporaryReg)
751     return false;
752 
753   assert((PopReg || TemporaryReg) && "Cannot get LR");
754 
755   if (UseLDRSP) {
756     assert(PopReg && "Do not know how to get LR");
757     // Load the LR via LDR tmp, [SP, #off]
758     BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRspi))
759       .addReg(PopReg, RegState::Define)
760       .addReg(ARM::SP)
761       .addImm(MBBI->getNumExplicitOperands() - 2)
762       .add(predOps(ARMCC::AL))
763       .setMIFlag(MachineInstr::FrameDestroy);
764     // Move from the temporary register to the LR.
765     BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
766       .addReg(ARM::LR, RegState::Define)
767       .addReg(PopReg, RegState::Kill)
768       .add(predOps(ARMCC::AL))
769       .setMIFlag(MachineInstr::FrameDestroy);
770     // Advance past the pop instruction.
771     MBBI++;
772     // Increment the SP.
773     emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo,
774                                  ArgRegsSaveSize + 4, ARM::NoRegister,
775                                  MachineInstr::FrameDestroy);
776     return true;
777   }
778 
779   if (TemporaryReg) {
780     assert(!PopReg && "Unnecessary MOV is about to be inserted");
781     PopReg = PopFriendly.find_first();
782     BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
783         .addReg(TemporaryReg, RegState::Define)
784         .addReg(PopReg, RegState::Kill)
785         .add(predOps(ARMCC::AL))
786         .setMIFlag(MachineInstr::FrameDestroy);
787   }
788 
789   if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) {
790     // We couldn't use the direct restoration above, so
791     // perform the opposite conversion: tPOP_RET to tPOP.
792     MachineInstrBuilder MIB =
793         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP))
794             .add(predOps(ARMCC::AL))
795             .setMIFlag(MachineInstr::FrameDestroy);
796     bool Popped = false;
797     for (auto MO: MBBI->operands())
798       if (MO.isReg() && (MO.isImplicit() || MO.isDef()) &&
799           MO.getReg() != ARM::PC) {
800         MIB.add(MO);
801         if (!MO.isImplicit())
802           Popped = true;
803       }
804     // Is there anything left to pop?
805     if (!Popped)
806       MBB.erase(MIB.getInstr());
807     // Erase the old instruction.
808     MBB.erase(MBBI);
809     MBBI = BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET))
810                .add(predOps(ARMCC::AL))
811                .setMIFlag(MachineInstr::FrameDestroy);
812   }
813 
814   assert(PopReg && "Do not know how to get LR");
815   BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))
816       .add(predOps(ARMCC::AL))
817       .addReg(PopReg, RegState::Define)
818       .setMIFlag(MachineInstr::FrameDestroy);
819 
820   emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize,
821                                ARM::NoRegister, MachineInstr::FrameDestroy);
822 
823   BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
824       .addReg(ARM::LR, RegState::Define)
825       .addReg(PopReg, RegState::Kill)
826       .add(predOps(ARMCC::AL))
827       .setMIFlag(MachineInstr::FrameDestroy);
828 
829   if (TemporaryReg)
830     BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
831         .addReg(PopReg, RegState::Define)
832         .addReg(TemporaryReg, RegState::Kill)
833         .add(predOps(ARMCC::AL))
834         .setMIFlag(MachineInstr::FrameDestroy);
835 
836   return true;
837 }
838 
839 static const SmallVector<Register> OrderedLowRegs = {ARM::R4, ARM::R5, ARM::R6,
840                                                      ARM::R7, ARM::LR};
841 static const SmallVector<Register> OrderedHighRegs = {ARM::R8, ARM::R9,
842                                                       ARM::R10, ARM::R11};
843 static const SmallVector<Register> OrderedCopyRegs = {
844     ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4,
845     ARM::R5, ARM::R6, ARM::R7, ARM::LR};
846 
847 static void splitLowAndHighRegs(const std::set<Register> &Regs,
848                                 std::set<Register> &LowRegs,
849                                 std::set<Register> &HighRegs) {
850   for (Register Reg : Regs) {
851     if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
852       LowRegs.insert(Reg);
853     } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) {
854       HighRegs.insert(Reg);
855     } else {
856       llvm_unreachable("callee-saved register of unexpected class");
857     }
858   }
859 }
860 
861 template <typename It>
862 It getNextOrderedReg(It OrderedStartIt, It OrderedEndIt,
863                      const std::set<Register> &RegSet) {
864   return std::find_if(OrderedStartIt, OrderedEndIt,
865                       [&](Register Reg) { return RegSet.count(Reg); });
866 }
867 
868 static void pushRegsToStack(MachineBasicBlock &MBB,
869                             MachineBasicBlock::iterator MI,
870                             const TargetInstrInfo &TII,
871                             const std::set<Register> &RegsToSave,
872                             const std::set<Register> &CopyRegs) {
873   MachineFunction &MF = *MBB.getParent();
874   const MachineRegisterInfo &MRI = MF.getRegInfo();
875   DebugLoc DL;
876 
877   std::set<Register> LowRegs, HighRegs;
878   splitLowAndHighRegs(RegsToSave, LowRegs, HighRegs);
879 
880   // Push low regs first
881   if (!LowRegs.empty()) {
882     MachineInstrBuilder MIB =
883         BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
884     for (unsigned Reg : OrderedLowRegs) {
885       if (LowRegs.count(Reg)) {
886         bool isKill = !MRI.isLiveIn(Reg);
887         if (isKill && !MRI.isReserved(Reg))
888           MBB.addLiveIn(Reg);
889 
890         MIB.addReg(Reg, getKillRegState(isKill));
891       }
892     }
893     MIB.setMIFlags(MachineInstr::FrameSetup);
894   }
895 
896   // Now push the high registers
897   // There are no store instructions that can access high registers directly,
898   // so we have to move them to low registers, and push them.
899   // This might take multiple pushes, as it is possible for there to
900   // be fewer low registers available than high registers which need saving.
901 
902   // Find the first register to save.
903   // Registers must be processed in reverse order so that in case we need to use
904   // multiple PUSH instructions, the order of the registers on the stack still
905   // matches the unwind info. They need to be swicthed back to ascending order
906   // before adding to the PUSH instruction.
907   auto HiRegToSave = getNextOrderedReg(OrderedHighRegs.rbegin(),
908                                        OrderedHighRegs.rend(),
909                                        HighRegs);
910 
911   while (HiRegToSave != OrderedHighRegs.rend()) {
912     // Find the first low register to use.
913     auto CopyRegIt = getNextOrderedReg(OrderedCopyRegs.rbegin(),
914                                        OrderedCopyRegs.rend(),
915                                        CopyRegs);
916 
917     // Create the PUSH, but don't insert it yet (the MOVs need to come first).
918     MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH))
919                                       .add(predOps(ARMCC::AL))
920                                       .setMIFlags(MachineInstr::FrameSetup);
921 
922     SmallVector<unsigned, 4> RegsToPush;
923     while (HiRegToSave != OrderedHighRegs.rend() &&
924            CopyRegIt != OrderedCopyRegs.rend()) {
925       if (HighRegs.count(*HiRegToSave)) {
926         bool isKill = !MRI.isLiveIn(*HiRegToSave);
927         if (isKill && !MRI.isReserved(*HiRegToSave))
928           MBB.addLiveIn(*HiRegToSave);
929 
930         // Emit a MOV from the high reg to the low reg.
931         BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
932             .addReg(*CopyRegIt, RegState::Define)
933             .addReg(*HiRegToSave, getKillRegState(isKill))
934             .add(predOps(ARMCC::AL))
935             .setMIFlags(MachineInstr::FrameSetup);
936 
937         // Record the register that must be added to the PUSH.
938         RegsToPush.push_back(*CopyRegIt);
939 
940         CopyRegIt = getNextOrderedReg(std::next(CopyRegIt),
941                                       OrderedCopyRegs.rend(),
942                                       CopyRegs);
943         HiRegToSave = getNextOrderedReg(std::next(HiRegToSave),
944                                         OrderedHighRegs.rend(),
945                                         HighRegs);
946       }
947     }
948 
949     // Add the low registers to the PUSH, in ascending order.
950     for (unsigned Reg : llvm::reverse(RegsToPush))
951       PushMIB.addReg(Reg, RegState::Kill);
952 
953     // Insert the PUSH instruction after the MOVs.
954     MBB.insert(MI, PushMIB);
955   }
956 }
957 
958 static void popRegsFromStack(MachineBasicBlock &MBB,
959                              MachineBasicBlock::iterator &MI,
960                              const TargetInstrInfo &TII,
961                              const std::set<Register> &RegsToRestore,
962                              const std::set<Register> &AvailableCopyRegs,
963                              bool IsVarArg, bool HasV5Ops) {
964   if (RegsToRestore.empty())
965     return;
966 
967   MachineFunction &MF = *MBB.getParent();
968   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
969   DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
970 
971   std::set<Register> LowRegs, HighRegs;
972   splitLowAndHighRegs(RegsToRestore, LowRegs, HighRegs);
973 
974   // Pop the high registers first
975   // There are no store instructions that can access high registers directly,
976   // so we have to pop into low registers and them move to  the high registers.
977   // This might take multiple pops, as it is possible for there to
978   // be fewer low registers available than high registers which need restoring.
979 
980   // Find the first register to restore.
981   auto HiRegToRestore = getNextOrderedReg(OrderedHighRegs.begin(),
982                                           OrderedHighRegs.end(),
983                                           HighRegs);
984 
985   std::set<Register> CopyRegs = AvailableCopyRegs;
986   Register LowScratchReg;
987   if (!HighRegs.empty() && CopyRegs.empty()) {
988     // No copy regs are available to pop high regs. Let's make use of a return
989     // register and the scratch register (IP/R12) to copy things around.
990     LowScratchReg = ARM::R0;
991     BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
992         .addReg(ARM::R12, RegState::Define)
993         .addReg(LowScratchReg, RegState::Kill)
994         .add(predOps(ARMCC::AL))
995         .setMIFlag(MachineInstr::FrameDestroy);
996     CopyRegs.insert(LowScratchReg);
997   }
998 
999   while (HiRegToRestore != OrderedHighRegs.end()) {
1000     assert(!CopyRegs.empty());
1001     // Find the first low register to use.
1002     auto CopyReg = getNextOrderedReg(OrderedCopyRegs.begin(),
1003                                      OrderedCopyRegs.end(),
1004                                      CopyRegs);
1005 
1006     // Create the POP instruction.
1007     MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP))
1008                                      .add(predOps(ARMCC::AL))
1009                                      .setMIFlag(MachineInstr::FrameDestroy);
1010 
1011     while (HiRegToRestore != OrderedHighRegs.end() &&
1012            CopyReg != OrderedCopyRegs.end()) {
1013       // Add the low register to the POP.
1014       PopMIB.addReg(*CopyReg, RegState::Define);
1015 
1016       // Create the MOV from low to high register.
1017       BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
1018           .addReg(*HiRegToRestore, RegState::Define)
1019           .addReg(*CopyReg, RegState::Kill)
1020           .add(predOps(ARMCC::AL))
1021           .setMIFlag(MachineInstr::FrameDestroy);
1022 
1023       CopyReg = getNextOrderedReg(std::next(CopyReg),
1024                                   OrderedCopyRegs.end(),
1025                                   CopyRegs);
1026       HiRegToRestore = getNextOrderedReg(std::next(HiRegToRestore),
1027                                          OrderedHighRegs.end(),
1028                                          HighRegs);
1029     }
1030   }
1031 
1032   // Restore low register used as scratch if necessary
1033   if (LowScratchReg.isValid()) {
1034     BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
1035         .addReg(LowScratchReg, RegState::Define)
1036         .addReg(ARM::R12, RegState::Kill)
1037         .add(predOps(ARMCC::AL))
1038         .setMIFlag(MachineInstr::FrameDestroy);
1039   }
1040 
1041   // Now pop the low registers
1042   if (!LowRegs.empty()) {
1043     MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP))
1044                                   .add(predOps(ARMCC::AL))
1045                                   .setMIFlag(MachineInstr::FrameDestroy);
1046 
1047     bool NeedsPop = false;
1048     for (Register Reg : OrderedLowRegs) {
1049       if (!LowRegs.count(Reg))
1050         continue;
1051 
1052       if (Reg == ARM::LR) {
1053         if (!MBB.succ_empty() ||
1054             MI->getOpcode() == ARM::TCRETURNdi ||
1055             MI->getOpcode() == ARM::TCRETURNri)
1056           // LR may only be popped into PC, as part of return sequence.
1057           // If this isn't the return sequence, we'll need emitPopSpecialFixUp
1058           // to restore LR the hard way.
1059           // FIXME: if we don't pass any stack arguments it would be actually
1060           // advantageous *and* correct to do the conversion to an ordinary call
1061           // instruction here.
1062           continue;
1063         // Special epilogue for vararg functions. See emitEpilogue
1064         if (IsVarArg)
1065           continue;
1066         // ARMv4T requires BX, see emitEpilogue
1067         if (!HasV5Ops)
1068           continue;
1069 
1070         // CMSE entry functions must return via BXNS, see emitEpilogue.
1071         if (AFI->isCmseNSEntryFunction())
1072           continue;
1073 
1074         // Pop LR into PC.
1075         Reg = ARM::PC;
1076         (*MIB).setDesc(TII.get(ARM::tPOP_RET));
1077         if (MI != MBB.end())
1078           MIB.copyImplicitOps(*MI);
1079         MI = MBB.erase(MI);
1080       }
1081       MIB.addReg(Reg, getDefRegState(true));
1082       NeedsPop = true;
1083     }
1084 
1085     // It's illegal to emit pop instruction without operands.
1086     if (NeedsPop)
1087       MBB.insert(MI, &*MIB);
1088     else
1089       MF.deleteMachineInstr(MIB);
1090   }
1091 }
1092 
1093 bool Thumb1FrameLowering::spillCalleeSavedRegisters(
1094     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1095     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
1096   if (CSI.empty())
1097     return false;
1098 
1099   const TargetInstrInfo &TII = *STI.getInstrInfo();
1100   MachineFunction &MF = *MBB.getParent();
1101   const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1102       MF.getSubtarget().getRegisterInfo());
1103   Register FPReg = RegInfo->getFrameRegister(MF);
1104 
1105   // In case FP is a high reg, we need a separate push sequence to generate
1106   // a correct Frame Record
1107   bool NeedsFrameRecordPush = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg);
1108 
1109   std::set<Register> FrameRecord;
1110   std::set<Register> SpilledGPRs;
1111   for (const CalleeSavedInfo &I : CSI) {
1112     Register Reg = I.getReg();
1113     if (NeedsFrameRecordPush && (Reg == FPReg || Reg == ARM::LR))
1114       FrameRecord.insert(Reg);
1115     else
1116       SpilledGPRs.insert(Reg);
1117   }
1118 
1119   pushRegsToStack(MBB, MI, TII, FrameRecord, {ARM::LR});
1120 
1121   // Determine intermediate registers which can be used for pushing high regs:
1122   // - Spilled low regs
1123   // - Unused argument registers
1124   std::set<Register> CopyRegs;
1125   for (Register Reg : SpilledGPRs)
1126     if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) &&
1127         !MF.getRegInfo().isLiveIn(Reg) && !(hasFP(MF) && Reg == FPReg))
1128       CopyRegs.insert(Reg);
1129   for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
1130     if (!MF.getRegInfo().isLiveIn(ArgReg))
1131       CopyRegs.insert(ArgReg);
1132 
1133   pushRegsToStack(MBB, MI, TII, SpilledGPRs, CopyRegs);
1134 
1135   return true;
1136 }
1137 
1138 bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
1139     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1140     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
1141   if (CSI.empty())
1142     return false;
1143 
1144   MachineFunction &MF = *MBB.getParent();
1145   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1146   const TargetInstrInfo &TII = *STI.getInstrInfo();
1147   const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1148       MF.getSubtarget().getRegisterInfo());
1149   bool IsVarArg = AFI->getArgRegsSaveSize() > 0;
1150   Register FPReg = RegInfo->getFrameRegister(MF);
1151 
1152   // In case FP is a high reg, we need a separate pop sequence to generate
1153   // a correct Frame Record
1154   bool NeedsFrameRecordPop = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg);
1155 
1156   std::set<Register> FrameRecord;
1157   std::set<Register> SpilledGPRs;
1158   for (CalleeSavedInfo &I : CSI) {
1159     Register Reg = I.getReg();
1160     if (NeedsFrameRecordPop && (Reg == FPReg || Reg == ARM::LR))
1161       FrameRecord.insert(Reg);
1162     else
1163       SpilledGPRs.insert(Reg);
1164 
1165     if (Reg == ARM::LR)
1166       I.setRestored(false);
1167   }
1168 
1169   // Determine intermidiate registers which can be used for popping high regs:
1170   // - Spilled low regs
1171   // - Unused return registers
1172   std::set<Register> CopyRegs;
1173   std::set<Register> UnusedReturnRegs;
1174   for (Register Reg : SpilledGPRs)
1175     if ((ARM::tGPRRegClass.contains(Reg)) && !(hasFP(MF) && Reg == FPReg))
1176       CopyRegs.insert(Reg);
1177   auto Terminator = MBB.getFirstTerminator();
1178   if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) {
1179     UnusedReturnRegs.insert(ARM::R0);
1180     UnusedReturnRegs.insert(ARM::R1);
1181     UnusedReturnRegs.insert(ARM::R2);
1182     UnusedReturnRegs.insert(ARM::R3);
1183     for (auto Op : Terminator->implicit_operands()) {
1184       if (Op.isReg())
1185         UnusedReturnRegs.erase(Op.getReg());
1186     }
1187   }
1188   CopyRegs.insert(UnusedReturnRegs.begin(), UnusedReturnRegs.end());
1189 
1190   // First pop regular spilled regs.
1191   popRegsFromStack(MBB, MI, TII, SpilledGPRs, CopyRegs, IsVarArg,
1192                    STI.hasV5TOps());
1193 
1194   // LR may only be popped into pc, as part of a return sequence.
1195   // Check that no other pop instructions are inserted after that.
1196   assert((!SpilledGPRs.count(ARM::LR) || FrameRecord.empty()) &&
1197          "Can't insert pop after return sequence");
1198 
1199   // Now pop Frame Record regs.
1200   // Only unused return registers can be used as copy regs at this point.
1201   popRegsFromStack(MBB, MI, TII, FrameRecord, UnusedReturnRegs, IsVarArg,
1202                    STI.hasV5TOps());
1203 
1204   return true;
1205 }
1206