1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCFrameLowering.h"
14 #include "PPCInstrBuilder.h"
15 #include "PPCInstrInfo.h"
16 #include "PPCMachineFunctionInfo.h"
17 #include "PPCSubtarget.h"
18 #include "PPCTargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/RegisterScavenging.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/Target/TargetOptions.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "framelowering"
32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
34 
35 static cl::opt<bool>
36 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
37                      cl::desc("Enable spills in prologue to vector registers."),
38                      cl::init(false), cl::Hidden);
39 
40 /// VRRegNo - Map from a numbered VR register to its enum value.
41 ///
42 static const MCPhysReg VRRegNo[] = {
43  PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
44  PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
45  PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
46  PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
47 };
48 
computeReturnSaveOffset(const PPCSubtarget & STI)49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
50   if (STI.isDarwinABI())
51     return STI.isPPC64() ? 16 : 8;
52   // SVR4 ABI:
53   return STI.isPPC64() ? 16 : 4;
54 }
55 
computeTOCSaveOffset(const PPCSubtarget & STI)56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
57   return STI.isELFv2ABI() ? 24 : 40;
58 }
59 
computeFramePointerSaveOffset(const PPCSubtarget & STI)60 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
61   // For the Darwin ABI:
62   // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
63   // for saving the frame pointer (if needed.)  While the published ABI has
64   // not used this slot since at least MacOSX 10.2, there is older code
65   // around that does use it, and that needs to continue to work.
66   if (STI.isDarwinABI())
67     return STI.isPPC64() ? -8U : -4U;
68 
69   // SVR4 ABI: First slot in the general register save area.
70   return STI.isPPC64() ? -8U : -4U;
71 }
72 
computeLinkageSize(const PPCSubtarget & STI)73 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
74   if ((STI.isDarwinABI() || STI.isAIXABI()) || STI.isPPC64())
75     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
76 
77   // 32-bit SVR4 ABI:
78   return 8;
79 }
80 
computeBasePointerSaveOffset(const PPCSubtarget & STI)81 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
82   if (STI.isDarwinABI())
83     return STI.isPPC64() ? -16U : -8U;
84 
85   // SVR4 ABI: First slot in the general register save area.
86   return STI.isPPC64()
87              ? -16U
88              : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U;
89 }
90 
PPCFrameLowering(const PPCSubtarget & STI)91 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
92     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
93                           STI.getPlatformStackAlignment(), 0),
94       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
95       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
96       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
97       LinkageSize(computeLinkageSize(Subtarget)),
98       BasePointerSaveOffset(computeBasePointerSaveOffset(STI)) {}
99 
100 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
getCalleeSavedSpillSlots(unsigned & NumEntries) const101 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
102     unsigned &NumEntries) const {
103   if (Subtarget.isDarwinABI()) {
104     NumEntries = 1;
105     if (Subtarget.isPPC64()) {
106       static const SpillSlot darwin64Offsets = {PPC::X31, -8};
107       return &darwin64Offsets;
108     } else {
109       static const SpillSlot darwinOffsets = {PPC::R31, -4};
110       return &darwinOffsets;
111     }
112   }
113 
114   // Early exit if not using the SVR4 ABI.
115   if (!Subtarget.isSVR4ABI()) {
116     NumEntries = 0;
117     return nullptr;
118   }
119 
120   // Note that the offsets here overlap, but this is fixed up in
121   // processFunctionBeforeFrameFinalized.
122 
123   static const SpillSlot Offsets[] = {
124       // Floating-point register save area offsets.
125       {PPC::F31, -8},
126       {PPC::F30, -16},
127       {PPC::F29, -24},
128       {PPC::F28, -32},
129       {PPC::F27, -40},
130       {PPC::F26, -48},
131       {PPC::F25, -56},
132       {PPC::F24, -64},
133       {PPC::F23, -72},
134       {PPC::F22, -80},
135       {PPC::F21, -88},
136       {PPC::F20, -96},
137       {PPC::F19, -104},
138       {PPC::F18, -112},
139       {PPC::F17, -120},
140       {PPC::F16, -128},
141       {PPC::F15, -136},
142       {PPC::F14, -144},
143 
144       // General register save area offsets.
145       {PPC::R31, -4},
146       {PPC::R30, -8},
147       {PPC::R29, -12},
148       {PPC::R28, -16},
149       {PPC::R27, -20},
150       {PPC::R26, -24},
151       {PPC::R25, -28},
152       {PPC::R24, -32},
153       {PPC::R23, -36},
154       {PPC::R22, -40},
155       {PPC::R21, -44},
156       {PPC::R20, -48},
157       {PPC::R19, -52},
158       {PPC::R18, -56},
159       {PPC::R17, -60},
160       {PPC::R16, -64},
161       {PPC::R15, -68},
162       {PPC::R14, -72},
163 
164       // CR save area offset.  We map each of the nonvolatile CR fields
165       // to the slot for CR2, which is the first of the nonvolatile CR
166       // fields to be assigned, so that we only allocate one save slot.
167       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
168       {PPC::CR2, -4},
169 
170       // VRSAVE save area offset.
171       {PPC::VRSAVE, -4},
172 
173       // Vector register save area
174       {PPC::V31, -16},
175       {PPC::V30, -32},
176       {PPC::V29, -48},
177       {PPC::V28, -64},
178       {PPC::V27, -80},
179       {PPC::V26, -96},
180       {PPC::V25, -112},
181       {PPC::V24, -128},
182       {PPC::V23, -144},
183       {PPC::V22, -160},
184       {PPC::V21, -176},
185       {PPC::V20, -192},
186 
187       // SPE register save area (overlaps Vector save area).
188       {PPC::S31, -8},
189       {PPC::S30, -16},
190       {PPC::S29, -24},
191       {PPC::S28, -32},
192       {PPC::S27, -40},
193       {PPC::S26, -48},
194       {PPC::S25, -56},
195       {PPC::S24, -64},
196       {PPC::S23, -72},
197       {PPC::S22, -80},
198       {PPC::S21, -88},
199       {PPC::S20, -96},
200       {PPC::S19, -104},
201       {PPC::S18, -112},
202       {PPC::S17, -120},
203       {PPC::S16, -128},
204       {PPC::S15, -136},
205       {PPC::S14, -144}};
206 
207   static const SpillSlot Offsets64[] = {
208       // Floating-point register save area offsets.
209       {PPC::F31, -8},
210       {PPC::F30, -16},
211       {PPC::F29, -24},
212       {PPC::F28, -32},
213       {PPC::F27, -40},
214       {PPC::F26, -48},
215       {PPC::F25, -56},
216       {PPC::F24, -64},
217       {PPC::F23, -72},
218       {PPC::F22, -80},
219       {PPC::F21, -88},
220       {PPC::F20, -96},
221       {PPC::F19, -104},
222       {PPC::F18, -112},
223       {PPC::F17, -120},
224       {PPC::F16, -128},
225       {PPC::F15, -136},
226       {PPC::F14, -144},
227 
228       // General register save area offsets.
229       {PPC::X31, -8},
230       {PPC::X30, -16},
231       {PPC::X29, -24},
232       {PPC::X28, -32},
233       {PPC::X27, -40},
234       {PPC::X26, -48},
235       {PPC::X25, -56},
236       {PPC::X24, -64},
237       {PPC::X23, -72},
238       {PPC::X22, -80},
239       {PPC::X21, -88},
240       {PPC::X20, -96},
241       {PPC::X19, -104},
242       {PPC::X18, -112},
243       {PPC::X17, -120},
244       {PPC::X16, -128},
245       {PPC::X15, -136},
246       {PPC::X14, -144},
247 
248       // VRSAVE save area offset.
249       {PPC::VRSAVE, -4},
250 
251       // Vector register save area
252       {PPC::V31, -16},
253       {PPC::V30, -32},
254       {PPC::V29, -48},
255       {PPC::V28, -64},
256       {PPC::V27, -80},
257       {PPC::V26, -96},
258       {PPC::V25, -112},
259       {PPC::V24, -128},
260       {PPC::V23, -144},
261       {PPC::V22, -160},
262       {PPC::V21, -176},
263       {PPC::V20, -192}};
264 
265   if (Subtarget.isPPC64()) {
266     NumEntries = array_lengthof(Offsets64);
267 
268     return Offsets64;
269   } else {
270     NumEntries = array_lengthof(Offsets);
271 
272     return Offsets;
273   }
274 }
275 
276 /// RemoveVRSaveCode - We have found that this function does not need any code
277 /// to manipulate the VRSAVE register, even though it uses vector registers.
278 /// This can happen when the only registers used are known to be live in or out
279 /// of the function.  Remove all of the VRSAVE related code from the function.
280 /// FIXME: The removal of the code results in a compile failure at -O0 when the
281 /// function contains a function call, as the GPR containing original VRSAVE
282 /// contents is spilled and reloaded around the call.  Without the prolog code,
283 /// the spill instruction refers to an undefined register.  This code needs
284 /// to account for all uses of that GPR.
RemoveVRSaveCode(MachineInstr & MI)285 static void RemoveVRSaveCode(MachineInstr &MI) {
286   MachineBasicBlock *Entry = MI.getParent();
287   MachineFunction *MF = Entry->getParent();
288 
289   // We know that the MTVRSAVE instruction immediately follows MI.  Remove it.
290   MachineBasicBlock::iterator MBBI = MI;
291   ++MBBI;
292   assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
293   MBBI->eraseFromParent();
294 
295   bool RemovedAllMTVRSAVEs = true;
296   // See if we can find and remove the MTVRSAVE instruction from all of the
297   // epilog blocks.
298   for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
299     // If last instruction is a return instruction, add an epilogue
300     if (I->isReturnBlock()) {
301       bool FoundIt = false;
302       for (MBBI = I->end(); MBBI != I->begin(); ) {
303         --MBBI;
304         if (MBBI->getOpcode() == PPC::MTVRSAVE) {
305           MBBI->eraseFromParent();  // remove it.
306           FoundIt = true;
307           break;
308         }
309       }
310       RemovedAllMTVRSAVEs &= FoundIt;
311     }
312   }
313 
314   // If we found and removed all MTVRSAVE instructions, remove the read of
315   // VRSAVE as well.
316   if (RemovedAllMTVRSAVEs) {
317     MBBI = MI;
318     assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
319     --MBBI;
320     assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
321     MBBI->eraseFromParent();
322   }
323 
324   // Finally, nuke the UPDATE_VRSAVE.
325   MI.eraseFromParent();
326 }
327 
328 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
329 // instruction selector.  Based on the vector registers that have been used,
330 // transform this into the appropriate ORI instruction.
HandleVRSaveUpdate(MachineInstr & MI,const TargetInstrInfo & TII)331 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
332   MachineFunction *MF = MI.getParent()->getParent();
333   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
334   DebugLoc dl = MI.getDebugLoc();
335 
336   const MachineRegisterInfo &MRI = MF->getRegInfo();
337   unsigned UsedRegMask = 0;
338   for (unsigned i = 0; i != 32; ++i)
339     if (MRI.isPhysRegModified(VRRegNo[i]))
340       UsedRegMask |= 1 << (31-i);
341 
342   // Live in and live out values already must be in the mask, so don't bother
343   // marking them.
344   for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) {
345     unsigned RegNo = TRI->getEncodingValue(LI.first);
346     if (VRRegNo[RegNo] == LI.first)        // If this really is a vector reg.
347       UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
348   }
349 
350   // Live out registers appear as use operands on return instructions.
351   for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
352        UsedRegMask != 0 && BI != BE; ++BI) {
353     const MachineBasicBlock &MBB = *BI;
354     if (!MBB.isReturnBlock())
355       continue;
356     const MachineInstr &Ret = MBB.back();
357     for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
358       const MachineOperand &MO = Ret.getOperand(I);
359       if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
360         continue;
361       unsigned RegNo = TRI->getEncodingValue(MO.getReg());
362       UsedRegMask &= ~(1 << (31-RegNo));
363     }
364   }
365 
366   // If no registers are used, turn this into a copy.
367   if (UsedRegMask == 0) {
368     // Remove all VRSAVE code.
369     RemoveVRSaveCode(MI);
370     return;
371   }
372 
373   unsigned SrcReg = MI.getOperand(1).getReg();
374   unsigned DstReg = MI.getOperand(0).getReg();
375 
376   if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
377     if (DstReg != SrcReg)
378       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
379           .addReg(SrcReg)
380           .addImm(UsedRegMask);
381     else
382       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
383           .addReg(SrcReg, RegState::Kill)
384           .addImm(UsedRegMask);
385   } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
386     if (DstReg != SrcReg)
387       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
388           .addReg(SrcReg)
389           .addImm(UsedRegMask >> 16);
390     else
391       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
392           .addReg(SrcReg, RegState::Kill)
393           .addImm(UsedRegMask >> 16);
394   } else {
395     if (DstReg != SrcReg)
396       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
397           .addReg(SrcReg)
398           .addImm(UsedRegMask >> 16);
399     else
400       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
401           .addReg(SrcReg, RegState::Kill)
402           .addImm(UsedRegMask >> 16);
403 
404     BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
405         .addReg(DstReg, RegState::Kill)
406         .addImm(UsedRegMask & 0xFFFF);
407   }
408 
409   // Remove the old UPDATE_VRSAVE instruction.
410   MI.eraseFromParent();
411 }
412 
spillsCR(const MachineFunction & MF)413 static bool spillsCR(const MachineFunction &MF) {
414   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
415   return FuncInfo->isCRSpilled();
416 }
417 
spillsVRSAVE(const MachineFunction & MF)418 static bool spillsVRSAVE(const MachineFunction &MF) {
419   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
420   return FuncInfo->isVRSAVESpilled();
421 }
422 
hasSpills(const MachineFunction & MF)423 static bool hasSpills(const MachineFunction &MF) {
424   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
425   return FuncInfo->hasSpills();
426 }
427 
hasNonRISpills(const MachineFunction & MF)428 static bool hasNonRISpills(const MachineFunction &MF) {
429   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
430   return FuncInfo->hasNonRISpills();
431 }
432 
433 /// MustSaveLR - Return true if this function requires that we save the LR
434 /// register onto the stack in the prolog and restore it in the epilog of the
435 /// function.
MustSaveLR(const MachineFunction & MF,unsigned LR)436 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
437   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
438 
439   // We need a save/restore of LR if there is any def of LR (which is
440   // defined by calls, including the PIC setup sequence), or if there is
441   // some use of the LR stack slot (e.g. for builtin_return_address).
442   // (LR comes in 32 and 64 bit versions.)
443   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
444   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
445 }
446 
447 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
448 /// call frame size. Update the MachineFunction object with the stack size.
449 unsigned
determineFrameLayoutAndUpdate(MachineFunction & MF,bool UseEstimate) const450 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
451                                                 bool UseEstimate) const {
452   unsigned NewMaxCallFrameSize = 0;
453   unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
454                                             &NewMaxCallFrameSize);
455   MF.getFrameInfo().setStackSize(FrameSize);
456   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
457   return FrameSize;
458 }
459 
460 /// determineFrameLayout - Determine the size of the frame and maximum call
461 /// frame size.
462 unsigned
determineFrameLayout(const MachineFunction & MF,bool UseEstimate,unsigned * NewMaxCallFrameSize) const463 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
464                                        bool UseEstimate,
465                                        unsigned *NewMaxCallFrameSize) const {
466   const MachineFrameInfo &MFI = MF.getFrameInfo();
467   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
468 
469   // Get the number of bytes to allocate from the FrameInfo
470   unsigned FrameSize =
471     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
472 
473   // Get stack alignments. The frame must be aligned to the greatest of these:
474   unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI
475   unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame
476   unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
477 
478   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
479 
480   unsigned LR = RegInfo->getRARegister();
481   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
482   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
483                        !MFI.adjustsStack() &&       // No calls.
484                        !MustSaveLR(MF, LR) &&       // No need to save LR.
485                        !FI->mustSaveTOC() &&        // No need to save TOC.
486                        !RegInfo->hasBasePointer(MF); // No special alignment.
487 
488   // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
489   // code if all local vars are reg-allocated.
490   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
491 
492   // Check whether we can skip adjusting the stack pointer (by using red zone)
493   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
494     // No need for frame
495     return 0;
496   }
497 
498   // Get the maximum call frame size of all the calls.
499   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
500 
501   // Maximum call frame needs to be at least big enough for linkage area.
502   unsigned minCallFrameSize = getLinkageSize();
503   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
504 
505   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
506   // that allocations will be aligned.
507   if (MFI.hasVarSizedObjects())
508     maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
509 
510   // Update the new max call frame size if the caller passes in a valid pointer.
511   if (NewMaxCallFrameSize)
512     *NewMaxCallFrameSize = maxCallFrameSize;
513 
514   // Include call frame size in total.
515   FrameSize += maxCallFrameSize;
516 
517   // Make sure the frame is aligned.
518   FrameSize = (FrameSize + AlignMask) & ~AlignMask;
519 
520   return FrameSize;
521 }
522 
523 // hasFP - Return true if the specified function actually has a dedicated frame
524 // pointer register.
hasFP(const MachineFunction & MF) const525 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
526   const MachineFrameInfo &MFI = MF.getFrameInfo();
527   // FIXME: This is pretty much broken by design: hasFP() might be called really
528   // early, before the stack layout was calculated and thus hasFP() might return
529   // true or false here depending on the time of call.
530   return (MFI.getStackSize()) && needsFP(MF);
531 }
532 
533 // needsFP - Return true if the specified function should have a dedicated frame
534 // pointer register.  This is true if the function has variable sized allocas or
535 // if frame pointer elimination is disabled.
needsFP(const MachineFunction & MF) const536 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
537   const MachineFrameInfo &MFI = MF.getFrameInfo();
538 
539   // Naked functions have no stack frame pushed, so we don't have a frame
540   // pointer.
541   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
542     return false;
543 
544   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
545     MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
546     (MF.getTarget().Options.GuaranteedTailCallOpt &&
547      MF.getInfo<PPCFunctionInfo>()->hasFastCall());
548 }
549 
replaceFPWithRealFP(MachineFunction & MF) const550 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
551   bool is31 = needsFP(MF);
552   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
553   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
554 
555   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
556   bool HasBP = RegInfo->hasBasePointer(MF);
557   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
558   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
559 
560   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
561        BI != BE; ++BI)
562     for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
563       --MBBI;
564       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
565         MachineOperand &MO = MBBI->getOperand(I);
566         if (!MO.isReg())
567           continue;
568 
569         switch (MO.getReg()) {
570         case PPC::FP:
571           MO.setReg(FPReg);
572           break;
573         case PPC::FP8:
574           MO.setReg(FP8Reg);
575           break;
576         case PPC::BP:
577           MO.setReg(BPReg);
578           break;
579         case PPC::BP8:
580           MO.setReg(BP8Reg);
581           break;
582 
583         }
584       }
585     }
586 }
587 
588 /*  This function will do the following:
589     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
590       respectively (defaults recommended by the ABI) and return true
591     - If MBB is not an entry block, initialize the register scavenger and look
592       for available registers.
593     - If the defaults (R0/R12) are available, return true
594     - If TwoUniqueRegsRequired is set to true, it looks for two unique
595       registers. Otherwise, look for a single available register.
596       - If the required registers are found, set SR1 and SR2 and return true.
597       - If the required registers are not found, set SR2 or both SR1 and SR2 to
598         PPC::NoRegister and return false.
599 
600     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
601     is not set, this function will attempt to find two different registers, but
602     still return true if only one register is available (and set SR1 == SR2).
603 */
604 bool
findScratchRegister(MachineBasicBlock * MBB,bool UseAtEnd,bool TwoUniqueRegsRequired,unsigned * SR1,unsigned * SR2) const605 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
606                                       bool UseAtEnd,
607                                       bool TwoUniqueRegsRequired,
608                                       unsigned *SR1,
609                                       unsigned *SR2) const {
610   RegScavenger RS;
611   unsigned R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
612   unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
613 
614   // Set the defaults for the two scratch registers.
615   if (SR1)
616     *SR1 = R0;
617 
618   if (SR2) {
619     assert (SR1 && "Asking for the second scratch register but not the first?");
620     *SR2 = R12;
621   }
622 
623   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
624   if ((UseAtEnd && MBB->isReturnBlock()) ||
625       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
626     return true;
627 
628   RS.enterBasicBlock(*MBB);
629 
630   if (UseAtEnd && !MBB->empty()) {
631     // The scratch register will be used at the end of the block, so must
632     // consider all registers used within the block
633 
634     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
635     // If no terminator, back iterator up to previous instruction.
636     if (MBBI == MBB->end())
637       MBBI = std::prev(MBBI);
638 
639     if (MBBI != MBB->begin())
640       RS.forward(MBBI);
641   }
642 
643   // If the two registers are available, we're all good.
644   // Note that we only return here if both R0 and R12 are available because
645   // although the function may not require two unique registers, it may benefit
646   // from having two so we should try to provide them.
647   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
648     return true;
649 
650   // Get the list of callee-saved registers for the target.
651   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
652   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
653 
654   // Get all the available registers in the block.
655   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
656                                      &PPC::GPRCRegClass);
657 
658   // We shouldn't use callee-saved registers as scratch registers as they may be
659   // available when looking for a candidate block for shrink wrapping but not
660   // available when the actual prologue/epilogue is being emitted because they
661   // were added as live-in to the prologue block by PrologueEpilogueInserter.
662   for (int i = 0; CSRegs[i]; ++i)
663     BV.reset(CSRegs[i]);
664 
665   // Set the first scratch register to the first available one.
666   if (SR1) {
667     int FirstScratchReg = BV.find_first();
668     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
669   }
670 
671   // If there is another one available, set the second scratch register to that.
672   // Otherwise, set it to either PPC::NoRegister if this function requires two
673   // or to whatever SR1 is set to if this function doesn't require two.
674   if (SR2) {
675     int SecondScratchReg = BV.find_next(*SR1);
676     if (SecondScratchReg != -1)
677       *SR2 = SecondScratchReg;
678     else
679       *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1;
680   }
681 
682   // Now that we've done our best to provide both registers, double check
683   // whether we were unable to provide enough.
684   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
685     return false;
686 
687   return true;
688 }
689 
690 // We need a scratch register for spilling LR and for spilling CR. By default,
691 // we use two scratch registers to hide latency. However, if only one scratch
692 // register is available, we can adjust for that by not overlapping the spill
693 // code. However, if we need to realign the stack (i.e. have a base pointer)
694 // and the stack frame is large, we need two scratch registers.
695 bool
twoUniqueScratchRegsRequired(MachineBasicBlock * MBB) const696 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
697   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
698   MachineFunction &MF = *(MBB->getParent());
699   bool HasBP = RegInfo->hasBasePointer(MF);
700   unsigned FrameSize = determineFrameLayout(MF);
701   int NegFrameSize = -FrameSize;
702   bool IsLargeFrame = !isInt<16>(NegFrameSize);
703   MachineFrameInfo &MFI = MF.getFrameInfo();
704   unsigned MaxAlign = MFI.getMaxAlignment();
705   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
706 
707   return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
708 }
709 
canUseAsPrologue(const MachineBasicBlock & MBB) const710 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
711   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
712 
713   return findScratchRegister(TmpMBB, false,
714                              twoUniqueScratchRegsRequired(TmpMBB));
715 }
716 
canUseAsEpilogue(const MachineBasicBlock & MBB) const717 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
718   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
719 
720   return findScratchRegister(TmpMBB, true);
721 }
722 
stackUpdateCanBeMoved(MachineFunction & MF) const723 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
724   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
725   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
726 
727   // Abort if there is no register info or function info.
728   if (!RegInfo || !FI)
729     return false;
730 
731   // Only move the stack update on ELFv2 ABI and PPC64.
732   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
733     return false;
734 
735   // Check the frame size first and return false if it does not fit the
736   // requirements.
737   // We need a non-zero frame size as well as a frame that will fit in the red
738   // zone. This is because by moving the stack pointer update we are now storing
739   // to the red zone until the stack pointer is updated. If we get an interrupt
740   // inside the prologue but before the stack update we now have a number of
741   // stores to the red zone and those stores must all fit.
742   MachineFrameInfo &MFI = MF.getFrameInfo();
743   unsigned FrameSize = MFI.getStackSize();
744   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
745     return false;
746 
747   // Frame pointers and base pointers complicate matters so don't do anything
748   // if we have them. For example having a frame pointer will sometimes require
749   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
750   // difficult.
751   if (hasFP(MF) || RegInfo->hasBasePointer(MF))
752     return false;
753 
754   // Calls to fast_cc functions use different rules for passing parameters on
755   // the stack from the ABI and using PIC base in the function imposes
756   // similar restrictions to using the base pointer. It is not generally safe
757   // to move the stack pointer update in these situations.
758   if (FI->hasFastCall() || FI->usesPICBase())
759     return false;
760 
761   // Finally we can move the stack update if we do not require register
762   // scavenging. Register scavenging can introduce more spills and so
763   // may make the frame size larger than we have computed.
764   return !RegInfo->requiresFrameIndexScavenging(MF);
765 }
766 
emitPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const767 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
768                                     MachineBasicBlock &MBB) const {
769   MachineBasicBlock::iterator MBBI = MBB.begin();
770   MachineFrameInfo &MFI = MF.getFrameInfo();
771   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
772   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
773 
774   MachineModuleInfo &MMI = MF.getMMI();
775   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
776   DebugLoc dl;
777   bool needsCFI = MMI.hasDebugInfo() ||
778     MF.getFunction().needsUnwindTableEntry();
779 
780   // Get processor type.
781   bool isPPC64 = Subtarget.isPPC64();
782   // Get the ABI.
783   bool isSVR4ABI = Subtarget.isSVR4ABI();
784   bool isELFv2ABI = Subtarget.isELFv2ABI();
785   assert((Subtarget.isDarwinABI() || isSVR4ABI) &&
786          "Currently only Darwin and SVR4 ABIs are supported for PowerPC.");
787 
788   // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
789   // process it.
790   if (!isSVR4ABI)
791     for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
792       if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
793         HandleVRSaveUpdate(*MBBI, TII);
794         break;
795       }
796     }
797 
798   // Move MBBI back to the beginning of the prologue block.
799   MBBI = MBB.begin();
800 
801   // Work out frame sizes.
802   unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
803   int NegFrameSize = -FrameSize;
804   if (!isInt<32>(NegFrameSize))
805     llvm_unreachable("Unhandled stack size!");
806 
807   if (MFI.isFrameAddressTaken())
808     replaceFPWithRealFP(MF);
809 
810   // Check if the link register (LR) must be saved.
811   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
812   bool MustSaveLR = FI->mustSaveLR();
813   bool MustSaveTOC = FI->mustSaveTOC();
814   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
815   bool MustSaveCR = !MustSaveCRs.empty();
816   // Do we have a frame pointer and/or base pointer for this function?
817   bool HasFP = hasFP(MF);
818   bool HasBP = RegInfo->hasBasePointer(MF);
819   bool HasRedZone = isPPC64 || !isSVR4ABI;
820 
821   unsigned SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
822   unsigned BPReg       = RegInfo->getBaseRegister(MF);
823   unsigned FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
824   unsigned LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
825   unsigned TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
826   unsigned ScratchReg  = 0;
827   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
828   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
829   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
830                                                 : PPC::MFLR );
831   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
832                                                  : PPC::STW );
833   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
834                                                      : PPC::STWU );
835   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
836                                                         : PPC::STWUX);
837   const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
838                                                           : PPC::LIS );
839   const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
840                                                  : PPC::ORI );
841   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
842                                               : PPC::OR );
843   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
844                                                             : PPC::SUBFC);
845   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
846                                                                : PPC::SUBFIC);
847 
848   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
849   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
850   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
851   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
852   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
853          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
854 
855   // Using the same bool variable as below to suppress compiler warnings.
856   bool SingleScratchReg =
857     findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
858                         &ScratchReg, &TempReg);
859   assert(SingleScratchReg &&
860          "Required number of registers not available in this block");
861 
862   SingleScratchReg = ScratchReg == TempReg;
863 
864   int LROffset = getReturnSaveOffset();
865 
866   int FPOffset = 0;
867   if (HasFP) {
868     if (isSVR4ABI) {
869       MachineFrameInfo &MFI = MF.getFrameInfo();
870       int FPIndex = FI->getFramePointerSaveIndex();
871       assert(FPIndex && "No Frame Pointer Save Slot!");
872       FPOffset = MFI.getObjectOffset(FPIndex);
873     } else {
874       FPOffset = getFramePointerSaveOffset();
875     }
876   }
877 
878   int BPOffset = 0;
879   if (HasBP) {
880     if (isSVR4ABI) {
881       MachineFrameInfo &MFI = MF.getFrameInfo();
882       int BPIndex = FI->getBasePointerSaveIndex();
883       assert(BPIndex && "No Base Pointer Save Slot!");
884       BPOffset = MFI.getObjectOffset(BPIndex);
885     } else {
886       BPOffset = getBasePointerSaveOffset();
887     }
888   }
889 
890   int PBPOffset = 0;
891   if (FI->usesPICBase()) {
892     MachineFrameInfo &MFI = MF.getFrameInfo();
893     int PBPIndex = FI->getPICBasePointerSaveIndex();
894     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
895     PBPOffset = MFI.getObjectOffset(PBPIndex);
896   }
897 
898   // Get stack alignments.
899   unsigned MaxAlign = MFI.getMaxAlignment();
900   if (HasBP && MaxAlign > 1)
901     assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
902            "Invalid alignment!");
903 
904   // Frames of 32KB & larger require special handling because they cannot be
905   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
906   bool isLargeFrame = !isInt<16>(NegFrameSize);
907 
908   assert((isPPC64 || !MustSaveCR) &&
909          "Prologue CR saving supported only in 64-bit mode");
910 
911   // Check if we can move the stack update instruction (stdu) down the prologue
912   // past the callee saves. Hopefully this will avoid the situation where the
913   // saves are waiting for the update on the store with update to complete.
914   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
915   bool MovingStackUpdateDown = false;
916 
917   // Check if we can move the stack update.
918   if (stackUpdateCanBeMoved(MF)) {
919     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
920     for (CalleeSavedInfo CSI : Info) {
921       int FrIdx = CSI.getFrameIdx();
922       // If the frame index is not negative the callee saved info belongs to a
923       // stack object that is not a fixed stack object. We ignore non-fixed
924       // stack objects because we won't move the stack update pointer past them.
925       if (FrIdx >= 0)
926         continue;
927 
928       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
929         StackUpdateLoc++;
930         MovingStackUpdateDown = true;
931       } else {
932         // We need all of the Frame Indices to meet these conditions.
933         // If they do not, abort the whole operation.
934         StackUpdateLoc = MBBI;
935         MovingStackUpdateDown = false;
936         break;
937       }
938     }
939 
940     // If the operation was not aborted then update the object offset.
941     if (MovingStackUpdateDown) {
942       for (CalleeSavedInfo CSI : Info) {
943         int FrIdx = CSI.getFrameIdx();
944         if (FrIdx < 0)
945           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
946       }
947     }
948   }
949 
950   // If we need to spill the CR and the LR but we don't have two separate
951   // registers available, we must spill them one at a time
952   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
953     // In the ELFv2 ABI, we are not required to save all CR fields.
954     // If only one or two CR fields are clobbered, it is more efficient to use
955     // mfocrf to selectively save just those fields, because mfocrf has short
956     // latency compares to mfcr.
957     unsigned MfcrOpcode = PPC::MFCR8;
958     unsigned CrState = RegState::ImplicitKill;
959     if (isELFv2ABI && MustSaveCRs.size() == 1) {
960       MfcrOpcode = PPC::MFOCRF8;
961       CrState = RegState::Kill;
962     }
963     MachineInstrBuilder MIB =
964       BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
965     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
966       MIB.addReg(MustSaveCRs[i], CrState);
967     BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
968       .addReg(TempReg, getKillRegState(true))
969       .addImm(8)
970       .addReg(SPReg);
971   }
972 
973   if (MustSaveLR)
974     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
975 
976   if (MustSaveCR &&
977       !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
978     // In the ELFv2 ABI, we are not required to save all CR fields.
979     // If only one or two CR fields are clobbered, it is more efficient to use
980     // mfocrf to selectively save just those fields, because mfocrf has short
981     // latency compares to mfcr.
982     unsigned MfcrOpcode = PPC::MFCR8;
983     unsigned CrState = RegState::ImplicitKill;
984     if (isELFv2ABI && MustSaveCRs.size() == 1) {
985       MfcrOpcode = PPC::MFOCRF8;
986       CrState = RegState::Kill;
987     }
988     MachineInstrBuilder MIB =
989       BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
990     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
991       MIB.addReg(MustSaveCRs[i], CrState);
992   }
993 
994   if (HasRedZone) {
995     if (HasFP)
996       BuildMI(MBB, MBBI, dl, StoreInst)
997         .addReg(FPReg)
998         .addImm(FPOffset)
999         .addReg(SPReg);
1000     if (FI->usesPICBase())
1001       BuildMI(MBB, MBBI, dl, StoreInst)
1002         .addReg(PPC::R30)
1003         .addImm(PBPOffset)
1004         .addReg(SPReg);
1005     if (HasBP)
1006       BuildMI(MBB, MBBI, dl, StoreInst)
1007         .addReg(BPReg)
1008         .addImm(BPOffset)
1009         .addReg(SPReg);
1010   }
1011 
1012   if (MustSaveLR)
1013     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
1014       .addReg(ScratchReg, getKillRegState(true))
1015       .addImm(LROffset)
1016       .addReg(SPReg);
1017 
1018   if (MustSaveCR &&
1019       !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
1020     assert(HasRedZone && "A red zone is always available on PPC64");
1021     BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
1022       .addReg(TempReg, getKillRegState(true))
1023       .addImm(8)
1024       .addReg(SPReg);
1025   }
1026 
1027   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
1028   if (!FrameSize)
1029     return;
1030 
1031   // Adjust stack pointer: r1 += NegFrameSize.
1032   // If there is a preferred stack alignment, align R1 now
1033 
1034   if (HasBP && HasRedZone) {
1035     // Save a copy of r1 as the base pointer.
1036     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1037       .addReg(SPReg)
1038       .addReg(SPReg);
1039   }
1040 
1041   // Have we generated a STUX instruction to claim stack frame? If so,
1042   // the negated frame size will be placed in ScratchReg.
1043   bool HasSTUX = false;
1044 
1045   // This condition must be kept in sync with canUseAsPrologue.
1046   if (HasBP && MaxAlign > 1) {
1047     if (isPPC64)
1048       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
1049         .addReg(SPReg)
1050         .addImm(0)
1051         .addImm(64 - Log2_32(MaxAlign));
1052     else // PPC32...
1053       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
1054         .addReg(SPReg)
1055         .addImm(0)
1056         .addImm(32 - Log2_32(MaxAlign))
1057         .addImm(31);
1058     if (!isLargeFrame) {
1059       BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
1060         .addReg(ScratchReg, RegState::Kill)
1061         .addImm(NegFrameSize);
1062     } else {
1063       assert(!SingleScratchReg && "Only a single scratch reg available");
1064       BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
1065         .addImm(NegFrameSize >> 16);
1066       BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
1067         .addReg(TempReg, RegState::Kill)
1068         .addImm(NegFrameSize & 0xFFFF);
1069       BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
1070         .addReg(ScratchReg, RegState::Kill)
1071         .addReg(TempReg, RegState::Kill);
1072     }
1073 
1074     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1075       .addReg(SPReg, RegState::Kill)
1076       .addReg(SPReg)
1077       .addReg(ScratchReg);
1078     HasSTUX = true;
1079 
1080   } else if (!isLargeFrame) {
1081     BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
1082       .addReg(SPReg)
1083       .addImm(NegFrameSize)
1084       .addReg(SPReg);
1085 
1086   } else {
1087     BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1088       .addImm(NegFrameSize >> 16);
1089     BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1090       .addReg(ScratchReg, RegState::Kill)
1091       .addImm(NegFrameSize & 0xFFFF);
1092     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1093       .addReg(SPReg, RegState::Kill)
1094       .addReg(SPReg)
1095       .addReg(ScratchReg);
1096     HasSTUX = true;
1097   }
1098 
1099   // Save the TOC register after the stack pointer update if a prologue TOC
1100   // save is required for the function.
1101   if (MustSaveTOC) {
1102     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
1103     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
1104       .addReg(TOCReg, getKillRegState(true))
1105       .addImm(TOCSaveOffset)
1106       .addReg(SPReg);
1107   }
1108 
1109   if (!HasRedZone) {
1110     assert(!isPPC64 && "A red zone is always available on PPC64");
1111     if (HasSTUX) {
1112       // The negated frame size is in ScratchReg, and the SPReg has been
1113       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
1114       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
1115       // the stack frame (i.e. the old SP), ideally, we would put the old
1116       // SP into a register and use it as the base for the stores. The
1117       // problem is that the only available register may be ScratchReg,
1118       // which could be R0, and R0 cannot be used as a base address.
1119 
1120       // First, set ScratchReg to the old SP. This may need to be modified
1121       // later.
1122       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1123         .addReg(ScratchReg, RegState::Kill)
1124         .addReg(SPReg);
1125 
1126       if (ScratchReg == PPC::R0) {
1127         // R0 cannot be used as a base register, but it can be used as an
1128         // index in a store-indexed.
1129         int LastOffset = 0;
1130         if (HasFP)  {
1131           // R0 += (FPOffset-LastOffset).
1132           // Need addic, since addi treats R0 as 0.
1133           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1134             .addReg(ScratchReg)
1135             .addImm(FPOffset-LastOffset);
1136           LastOffset = FPOffset;
1137           // Store FP into *R0.
1138           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1139             .addReg(FPReg, RegState::Kill)  // Save FP.
1140             .addReg(PPC::ZERO)
1141             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1142         }
1143         if (FI->usesPICBase()) {
1144           // R0 += (PBPOffset-LastOffset).
1145           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1146             .addReg(ScratchReg)
1147             .addImm(PBPOffset-LastOffset);
1148           LastOffset = PBPOffset;
1149           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1150             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1151             .addReg(PPC::ZERO)
1152             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1153         }
1154         if (HasBP) {
1155           // R0 += (BPOffset-LastOffset).
1156           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1157             .addReg(ScratchReg)
1158             .addImm(BPOffset-LastOffset);
1159           LastOffset = BPOffset;
1160           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1161             .addReg(BPReg, RegState::Kill)  // Save BP.
1162             .addReg(PPC::ZERO)
1163             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1164           // BP = R0-LastOffset
1165           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1166             .addReg(ScratchReg, RegState::Kill)
1167             .addImm(-LastOffset);
1168         }
1169       } else {
1170         // ScratchReg is not R0, so use it as the base register. It is
1171         // already set to the old SP, so we can use the offsets directly.
1172 
1173         // Now that the stack frame has been allocated, save all the necessary
1174         // registers using ScratchReg as the base address.
1175         if (HasFP)
1176           BuildMI(MBB, MBBI, dl, StoreInst)
1177             .addReg(FPReg)
1178             .addImm(FPOffset)
1179             .addReg(ScratchReg);
1180         if (FI->usesPICBase())
1181           BuildMI(MBB, MBBI, dl, StoreInst)
1182             .addReg(PPC::R30)
1183             .addImm(PBPOffset)
1184             .addReg(ScratchReg);
1185         if (HasBP) {
1186           BuildMI(MBB, MBBI, dl, StoreInst)
1187             .addReg(BPReg)
1188             .addImm(BPOffset)
1189             .addReg(ScratchReg);
1190           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1191             .addReg(ScratchReg, RegState::Kill)
1192             .addReg(ScratchReg);
1193         }
1194       }
1195     } else {
1196       // The frame size is a known 16-bit constant (fitting in the immediate
1197       // field of STWU). To be here we have to be compiling for PPC32.
1198       // Since the SPReg has been decreased by FrameSize, add it back to each
1199       // offset.
1200       if (HasFP)
1201         BuildMI(MBB, MBBI, dl, StoreInst)
1202           .addReg(FPReg)
1203           .addImm(FrameSize + FPOffset)
1204           .addReg(SPReg);
1205       if (FI->usesPICBase())
1206         BuildMI(MBB, MBBI, dl, StoreInst)
1207           .addReg(PPC::R30)
1208           .addImm(FrameSize + PBPOffset)
1209           .addReg(SPReg);
1210       if (HasBP) {
1211         BuildMI(MBB, MBBI, dl, StoreInst)
1212           .addReg(BPReg)
1213           .addImm(FrameSize + BPOffset)
1214           .addReg(SPReg);
1215         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1216           .addReg(SPReg)
1217           .addImm(FrameSize);
1218       }
1219     }
1220   }
1221 
1222   // Add Call Frame Information for the instructions we generated above.
1223   if (needsCFI) {
1224     unsigned CFIIndex;
1225 
1226     if (HasBP) {
1227       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1228       // because if the stack needed aligning then CFA won't be at a fixed
1229       // offset from FP/SP.
1230       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1231       CFIIndex = MF.addFrameInst(
1232           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1233     } else {
1234       // Adjust the definition of CFA to account for the change in SP.
1235       assert(NegFrameSize);
1236       CFIIndex = MF.addFrameInst(
1237           MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
1238     }
1239     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1240         .addCFIIndex(CFIIndex);
1241 
1242     if (HasFP) {
1243       // Describe where FP was saved, at a fixed offset from CFA.
1244       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1245       CFIIndex = MF.addFrameInst(
1246           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1247       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1248           .addCFIIndex(CFIIndex);
1249     }
1250 
1251     if (FI->usesPICBase()) {
1252       // Describe where FP was saved, at a fixed offset from CFA.
1253       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1254       CFIIndex = MF.addFrameInst(
1255           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1256       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1257           .addCFIIndex(CFIIndex);
1258     }
1259 
1260     if (HasBP) {
1261       // Describe where BP was saved, at a fixed offset from CFA.
1262       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1263       CFIIndex = MF.addFrameInst(
1264           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1265       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1266           .addCFIIndex(CFIIndex);
1267     }
1268 
1269     if (MustSaveLR) {
1270       // Describe where LR was saved, at a fixed offset from CFA.
1271       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1272       CFIIndex = MF.addFrameInst(
1273           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1274       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1275           .addCFIIndex(CFIIndex);
1276     }
1277   }
1278 
1279   // If there is a frame pointer, copy R1 into R31
1280   if (HasFP) {
1281     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1282       .addReg(SPReg)
1283       .addReg(SPReg);
1284 
1285     if (!HasBP && needsCFI) {
1286       // Change the definition of CFA from SP+offset to FP+offset, because SP
1287       // will change at every alloca.
1288       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1289       unsigned CFIIndex = MF.addFrameInst(
1290           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1291 
1292       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1293           .addCFIIndex(CFIIndex);
1294     }
1295   }
1296 
1297   if (needsCFI) {
1298     // Describe where callee saved registers were saved, at fixed offsets from
1299     // CFA.
1300     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1301     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1302       unsigned Reg = CSI[I].getReg();
1303       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1304 
1305       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1306       // subregisters of CR2. We just need to emit a move of CR2.
1307       if (PPC::CRBITRCRegClass.contains(Reg))
1308         continue;
1309 
1310       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1311         continue;
1312 
1313       // For SVR4, don't emit a move for the CR spill slot if we haven't
1314       // spilled CRs.
1315       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1316           && !MustSaveCR)
1317         continue;
1318 
1319       // For 64-bit SVR4 when we have spilled CRs, the spill location
1320       // is SP+8, not a frame-relative slot.
1321       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1322         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1323         // the whole CR word.  In the ELFv2 ABI, every CR that was
1324         // actually saved gets its own CFI record.
1325         unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1326         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1327             nullptr, MRI->getDwarfRegNum(CRReg, true), 8));
1328         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1329             .addCFIIndex(CFIIndex);
1330         continue;
1331       }
1332 
1333       if (CSI[I].isSpilledToReg()) {
1334         unsigned SpilledReg = CSI[I].getDstReg();
1335         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1336             nullptr, MRI->getDwarfRegNum(Reg, true),
1337             MRI->getDwarfRegNum(SpilledReg, true)));
1338         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1339           .addCFIIndex(CFIRegister);
1340       } else {
1341         int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1342         // We have changed the object offset above but we do not want to change
1343         // the actual offsets in the CFI instruction so we have to undo the
1344         // offset change here.
1345         if (MovingStackUpdateDown)
1346           Offset -= NegFrameSize;
1347 
1348         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1349             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1350         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1351             .addCFIIndex(CFIIndex);
1352       }
1353     }
1354   }
1355 }
1356 
emitEpilogue(MachineFunction & MF,MachineBasicBlock & MBB) const1357 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1358                                     MachineBasicBlock &MBB) const {
1359   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1360   DebugLoc dl;
1361 
1362   if (MBBI != MBB.end())
1363     dl = MBBI->getDebugLoc();
1364 
1365   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1366   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1367 
1368   // Get alignment info so we know how to restore the SP.
1369   const MachineFrameInfo &MFI = MF.getFrameInfo();
1370 
1371   // Get the number of bytes allocated from the FrameInfo.
1372   int FrameSize = MFI.getStackSize();
1373 
1374   // Get processor type.
1375   bool isPPC64 = Subtarget.isPPC64();
1376   // Get the ABI.
1377   bool isSVR4ABI = Subtarget.isSVR4ABI();
1378 
1379   // Check if the link register (LR) has been saved.
1380   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1381   bool MustSaveLR = FI->mustSaveLR();
1382   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
1383   bool MustSaveCR = !MustSaveCRs.empty();
1384   // Do we have a frame pointer and/or base pointer for this function?
1385   bool HasFP = hasFP(MF);
1386   bool HasBP = RegInfo->hasBasePointer(MF);
1387   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1388 
1389   unsigned SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1390   unsigned BPReg      = RegInfo->getBaseRegister(MF);
1391   unsigned FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1392   unsigned ScratchReg = 0;
1393   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1394   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1395                                                  : PPC::MTLR );
1396   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1397                                                  : PPC::LWZ );
1398   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1399                                                            : PPC::LIS );
1400   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1401                                               : PPC::OR );
1402   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1403                                                   : PPC::ORI );
1404   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1405                                                    : PPC::ADDI );
1406   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1407                                                 : PPC::ADD4 );
1408 
1409   int LROffset = getReturnSaveOffset();
1410 
1411   int FPOffset = 0;
1412 
1413   // Using the same bool variable as below to suppress compiler warnings.
1414   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1415                                               &TempReg);
1416   assert(SingleScratchReg &&
1417          "Could not find an available scratch register");
1418 
1419   SingleScratchReg = ScratchReg == TempReg;
1420 
1421   if (HasFP) {
1422     if (isSVR4ABI) {
1423       int FPIndex = FI->getFramePointerSaveIndex();
1424       assert(FPIndex && "No Frame Pointer Save Slot!");
1425       FPOffset = MFI.getObjectOffset(FPIndex);
1426     } else {
1427       FPOffset = getFramePointerSaveOffset();
1428     }
1429   }
1430 
1431   int BPOffset = 0;
1432   if (HasBP) {
1433     if (isSVR4ABI) {
1434       int BPIndex = FI->getBasePointerSaveIndex();
1435       assert(BPIndex && "No Base Pointer Save Slot!");
1436       BPOffset = MFI.getObjectOffset(BPIndex);
1437     } else {
1438       BPOffset = getBasePointerSaveOffset();
1439     }
1440   }
1441 
1442   int PBPOffset = 0;
1443   if (FI->usesPICBase()) {
1444     int PBPIndex = FI->getPICBasePointerSaveIndex();
1445     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1446     PBPOffset = MFI.getObjectOffset(PBPIndex);
1447   }
1448 
1449   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1450 
1451   if (IsReturnBlock) {
1452     unsigned RetOpcode = MBBI->getOpcode();
1453     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1454                       RetOpcode == PPC::TCRETURNdi ||
1455                       RetOpcode == PPC::TCRETURNai ||
1456                       RetOpcode == PPC::TCRETURNri8 ||
1457                       RetOpcode == PPC::TCRETURNdi8 ||
1458                       RetOpcode == PPC::TCRETURNai8;
1459 
1460     if (UsesTCRet) {
1461       int MaxTCRetDelta = FI->getTailCallSPDelta();
1462       MachineOperand &StackAdjust = MBBI->getOperand(1);
1463       assert(StackAdjust.isImm() && "Expecting immediate value.");
1464       // Adjust stack pointer.
1465       int StackAdj = StackAdjust.getImm();
1466       int Delta = StackAdj - MaxTCRetDelta;
1467       assert((Delta >= 0) && "Delta must be positive");
1468       if (MaxTCRetDelta>0)
1469         FrameSize += (StackAdj +Delta);
1470       else
1471         FrameSize += StackAdj;
1472     }
1473   }
1474 
1475   // Frames of 32KB & larger require special handling because they cannot be
1476   // indexed into with a simple LD/LWZ immediate offset operand.
1477   bool isLargeFrame = !isInt<16>(FrameSize);
1478 
1479   // On targets without red zone, the SP needs to be restored last, so that
1480   // all live contents of the stack frame are upwards of the SP. This means
1481   // that we cannot restore SP just now, since there may be more registers
1482   // to restore from the stack frame (e.g. R31). If the frame size is not
1483   // a simple immediate value, we will need a spare register to hold the
1484   // restored SP. If the frame size is known and small, we can simply adjust
1485   // the offsets of the registers to be restored, and still use SP to restore
1486   // them. In such case, the final update of SP will be to add the frame
1487   // size to it.
1488   // To simplify the code, set RBReg to the base register used to restore
1489   // values from the stack, and set SPAdd to the value that needs to be added
1490   // to the SP at the end. The default values are as if red zone was present.
1491   unsigned RBReg = SPReg;
1492   unsigned SPAdd = 0;
1493 
1494   // Check if we can move the stack update instruction up the epilogue
1495   // past the callee saves. This will allow the move to LR instruction
1496   // to be executed before the restores of the callee saves which means
1497   // that the callee saves can hide the latency from the MTLR instrcution.
1498   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1499   if (stackUpdateCanBeMoved(MF)) {
1500     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1501     for (CalleeSavedInfo CSI : Info) {
1502       int FrIdx = CSI.getFrameIdx();
1503       // If the frame index is not negative the callee saved info belongs to a
1504       // stack object that is not a fixed stack object. We ignore non-fixed
1505       // stack objects because we won't move the update of the stack pointer
1506       // past them.
1507       if (FrIdx >= 0)
1508         continue;
1509 
1510       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1511         StackUpdateLoc--;
1512       else {
1513         // Abort the operation as we can't update all CSR restores.
1514         StackUpdateLoc = MBBI;
1515         break;
1516       }
1517     }
1518   }
1519 
1520   if (FrameSize) {
1521     // In the prologue, the loaded (or persistent) stack pointer value is
1522     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1523     // zone add this offset back now.
1524 
1525     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1526     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1527     // call which invalidates the stack pointer value in SP(0). So we use the
1528     // value of R31 in this case.
1529     if (FI->hasFastCall()) {
1530       assert(HasFP && "Expecting a valid frame pointer.");
1531       if (!HasRedZone)
1532         RBReg = FPReg;
1533       if (!isLargeFrame) {
1534         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1535           .addReg(FPReg).addImm(FrameSize);
1536       } else {
1537         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1538           .addImm(FrameSize >> 16);
1539         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1540           .addReg(ScratchReg, RegState::Kill)
1541           .addImm(FrameSize & 0xFFFF);
1542         BuildMI(MBB, MBBI, dl, AddInst)
1543           .addReg(RBReg)
1544           .addReg(FPReg)
1545           .addReg(ScratchReg);
1546       }
1547     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1548       if (HasRedZone) {
1549         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1550           .addReg(SPReg)
1551           .addImm(FrameSize);
1552       } else {
1553         // Make sure that adding FrameSize will not overflow the max offset
1554         // size.
1555         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1556                "Local offsets should be negative");
1557         SPAdd = FrameSize;
1558         FPOffset += FrameSize;
1559         BPOffset += FrameSize;
1560         PBPOffset += FrameSize;
1561       }
1562     } else {
1563       // We don't want to use ScratchReg as a base register, because it
1564       // could happen to be R0. Use FP instead, but make sure to preserve it.
1565       if (!HasRedZone) {
1566         // If FP is not saved, copy it to ScratchReg.
1567         if (!HasFP)
1568           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1569             .addReg(FPReg)
1570             .addReg(FPReg);
1571         RBReg = FPReg;
1572       }
1573       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1574         .addImm(0)
1575         .addReg(SPReg);
1576     }
1577   }
1578   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1579   // If there is no red zone, ScratchReg may be needed for holding a useful
1580   // value (although not the base register). Make sure it is not overwritten
1581   // too early.
1582 
1583   assert((isPPC64 || !MustSaveCR) &&
1584          "Epilogue CR restoring supported only in 64-bit mode");
1585 
1586   // If we need to restore both the LR and the CR and we only have one
1587   // available scratch register, we must do them one at a time.
1588   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1589     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1590     // is live here.
1591     assert(HasRedZone && "Expecting red zone");
1592     BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1593       .addImm(8)
1594       .addReg(SPReg);
1595     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1596       BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1597         .addReg(TempReg, getKillRegState(i == e-1));
1598   }
1599 
1600   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1601   // LR is stored in the caller's stack frame. ScratchReg will be needed
1602   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1603   // a base register anyway, because it may happen to be R0.
1604   bool LoadedLR = false;
1605   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1606     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1607       .addImm(LROffset+SPAdd)
1608       .addReg(RBReg);
1609     LoadedLR = true;
1610   }
1611 
1612   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1613     // This will only occur for PPC64.
1614     assert(isPPC64 && "Expecting 64-bit mode");
1615     assert(RBReg == SPReg && "Should be using SP as a base register");
1616     BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1617       .addImm(8)
1618       .addReg(RBReg);
1619   }
1620 
1621   if (HasFP) {
1622     // If there is red zone, restore FP directly, since SP has already been
1623     // restored. Otherwise, restore the value of FP into ScratchReg.
1624     if (HasRedZone || RBReg == SPReg)
1625       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1626         .addImm(FPOffset)
1627         .addReg(SPReg);
1628     else
1629       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1630         .addImm(FPOffset)
1631         .addReg(RBReg);
1632   }
1633 
1634   if (FI->usesPICBase())
1635     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1636       .addImm(PBPOffset)
1637       .addReg(RBReg);
1638 
1639   if (HasBP)
1640     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1641       .addImm(BPOffset)
1642       .addReg(RBReg);
1643 
1644   // There is nothing more to be loaded from the stack, so now we can
1645   // restore SP: SP = RBReg + SPAdd.
1646   if (RBReg != SPReg || SPAdd != 0) {
1647     assert(!HasRedZone && "This should not happen with red zone");
1648     // If SPAdd is 0, generate a copy.
1649     if (SPAdd == 0)
1650       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1651         .addReg(RBReg)
1652         .addReg(RBReg);
1653     else
1654       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1655         .addReg(RBReg)
1656         .addImm(SPAdd);
1657 
1658     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1659     if (RBReg == FPReg)
1660       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1661         .addReg(ScratchReg)
1662         .addReg(ScratchReg);
1663 
1664     // Now load the LR from the caller's stack frame.
1665     if (MustSaveLR && !LoadedLR)
1666       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1667         .addImm(LROffset)
1668         .addReg(SPReg);
1669   }
1670 
1671   if (MustSaveCR &&
1672       !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
1673     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1674       BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1675         .addReg(TempReg, getKillRegState(i == e-1));
1676 
1677   if (MustSaveLR)
1678     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1679 
1680   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1681   // call optimization
1682   if (IsReturnBlock) {
1683     unsigned RetOpcode = MBBI->getOpcode();
1684     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1685         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1686         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1687       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1688       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1689 
1690       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1691         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1692           .addReg(SPReg).addImm(CallerAllocatedAmt);
1693       } else {
1694         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1695           .addImm(CallerAllocatedAmt >> 16);
1696         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1697           .addReg(ScratchReg, RegState::Kill)
1698           .addImm(CallerAllocatedAmt & 0xFFFF);
1699         BuildMI(MBB, MBBI, dl, AddInst)
1700           .addReg(SPReg)
1701           .addReg(FPReg)
1702           .addReg(ScratchReg);
1703       }
1704     } else {
1705       createTailCallBranchInstr(MBB);
1706     }
1707   }
1708 }
1709 
createTailCallBranchInstr(MachineBasicBlock & MBB) const1710 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1711   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1712 
1713   // If we got this far a first terminator should exist.
1714   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1715 
1716   DebugLoc dl = MBBI->getDebugLoc();
1717   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1718 
1719   // Create branch instruction for pseudo tail call return instruction
1720   unsigned RetOpcode = MBBI->getOpcode();
1721   if (RetOpcode == PPC::TCRETURNdi) {
1722     MBBI = MBB.getLastNonDebugInstr();
1723     MachineOperand &JumpTarget = MBBI->getOperand(0);
1724     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1725       addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1726   } else if (RetOpcode == PPC::TCRETURNri) {
1727     MBBI = MBB.getLastNonDebugInstr();
1728     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1729     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1730   } else if (RetOpcode == PPC::TCRETURNai) {
1731     MBBI = MBB.getLastNonDebugInstr();
1732     MachineOperand &JumpTarget = MBBI->getOperand(0);
1733     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1734   } else if (RetOpcode == PPC::TCRETURNdi8) {
1735     MBBI = MBB.getLastNonDebugInstr();
1736     MachineOperand &JumpTarget = MBBI->getOperand(0);
1737     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1738       addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1739   } else if (RetOpcode == PPC::TCRETURNri8) {
1740     MBBI = MBB.getLastNonDebugInstr();
1741     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1742     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1743   } else if (RetOpcode == PPC::TCRETURNai8) {
1744     MBBI = MBB.getLastNonDebugInstr();
1745     MachineOperand &JumpTarget = MBBI->getOperand(0);
1746     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1747   }
1748 }
1749 
determineCalleeSaves(MachineFunction & MF,BitVector & SavedRegs,RegScavenger * RS) const1750 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1751                                             BitVector &SavedRegs,
1752                                             RegScavenger *RS) const {
1753   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1754 
1755   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1756 
1757   //  Save and clear the LR state.
1758   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1759   unsigned LR = RegInfo->getRARegister();
1760   FI->setMustSaveLR(MustSaveLR(MF, LR));
1761   SavedRegs.reset(LR);
1762 
1763   //  Save R31 if necessary
1764   int FPSI = FI->getFramePointerSaveIndex();
1765   bool isPPC64 = Subtarget.isPPC64();
1766   bool isDarwinABI  = Subtarget.isDarwinABI();
1767   MachineFrameInfo &MFI = MF.getFrameInfo();
1768 
1769   // If the frame pointer save index hasn't been defined yet.
1770   if (!FPSI && needsFP(MF)) {
1771     // Find out what the fix offset of the frame pointer save area.
1772     int FPOffset = getFramePointerSaveOffset();
1773     // Allocate the frame index for frame pointer save area.
1774     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1775     // Save the result.
1776     FI->setFramePointerSaveIndex(FPSI);
1777   }
1778 
1779   int BPSI = FI->getBasePointerSaveIndex();
1780   if (!BPSI && RegInfo->hasBasePointer(MF)) {
1781     int BPOffset = getBasePointerSaveOffset();
1782     // Allocate the frame index for the base pointer save area.
1783     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1784     // Save the result.
1785     FI->setBasePointerSaveIndex(BPSI);
1786   }
1787 
1788   // Reserve stack space for the PIC Base register (R30).
1789   // Only used in SVR4 32-bit.
1790   if (FI->usesPICBase()) {
1791     int PBPSI = MFI.CreateFixedObject(4, -8, true);
1792     FI->setPICBasePointerSaveIndex(PBPSI);
1793   }
1794 
1795   // Make sure we don't explicitly spill r31, because, for example, we have
1796   // some inline asm which explicitly clobbers it, when we otherwise have a
1797   // frame pointer and are using r31's spill slot for the prologue/epilogue
1798   // code. Same goes for the base pointer and the PIC base register.
1799   if (needsFP(MF))
1800     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1801   if (RegInfo->hasBasePointer(MF))
1802     SavedRegs.reset(RegInfo->getBaseRegister(MF));
1803   if (FI->usesPICBase())
1804     SavedRegs.reset(PPC::R30);
1805 
1806   // Reserve stack space to move the linkage area to in case of a tail call.
1807   int TCSPDelta = 0;
1808   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1809       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
1810     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
1811   }
1812 
1813   // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
1814   // function uses CR 2, 3, or 4.
1815   if (!isPPC64 && !isDarwinABI &&
1816       (SavedRegs.test(PPC::CR2) ||
1817        SavedRegs.test(PPC::CR3) ||
1818        SavedRegs.test(PPC::CR4))) {
1819     int FrameIdx = MFI.CreateFixedObject((uint64_t)4, (int64_t)-4, true);
1820     FI->setCRSpillFrameIndex(FrameIdx);
1821   }
1822 }
1823 
processFunctionBeforeFrameFinalized(MachineFunction & MF,RegScavenger * RS) const1824 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
1825                                                        RegScavenger *RS) const {
1826   // Early exit if not using the SVR4 ABI.
1827   if (!Subtarget.isSVR4ABI()) {
1828     addScavengingSpillSlot(MF, RS);
1829     return;
1830   }
1831 
1832   // Get callee saved register information.
1833   MachineFrameInfo &MFI = MF.getFrameInfo();
1834   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1835 
1836   // If the function is shrink-wrapped, and if the function has a tail call, the
1837   // tail call might not be in the new RestoreBlock, so real branch instruction
1838   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
1839   // RestoreBlock. So we handle this case here.
1840   if (MFI.getSavePoint() && MFI.hasTailCall()) {
1841     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
1842     for (MachineBasicBlock &MBB : MF) {
1843       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
1844         createTailCallBranchInstr(MBB);
1845     }
1846   }
1847 
1848   // Early exit if no callee saved registers are modified!
1849   if (CSI.empty() && !needsFP(MF)) {
1850     addScavengingSpillSlot(MF, RS);
1851     return;
1852   }
1853 
1854   unsigned MinGPR = PPC::R31;
1855   unsigned MinG8R = PPC::X31;
1856   unsigned MinFPR = PPC::F31;
1857   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
1858 
1859   bool HasGPSaveArea = false;
1860   bool HasG8SaveArea = false;
1861   bool HasFPSaveArea = false;
1862   bool HasVRSAVESaveArea = false;
1863   bool HasVRSaveArea = false;
1864 
1865   SmallVector<CalleeSavedInfo, 18> GPRegs;
1866   SmallVector<CalleeSavedInfo, 18> G8Regs;
1867   SmallVector<CalleeSavedInfo, 18> FPRegs;
1868   SmallVector<CalleeSavedInfo, 18> VRegs;
1869 
1870   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1871     unsigned Reg = CSI[i].getReg();
1872     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
1873             (Reg != PPC::X2 && Reg != PPC::R2)) &&
1874            "Not expecting to try to spill R2 in a function that must save TOC");
1875     if (PPC::GPRCRegClass.contains(Reg) ||
1876         PPC::SPE4RCRegClass.contains(Reg)) {
1877       HasGPSaveArea = true;
1878 
1879       GPRegs.push_back(CSI[i]);
1880 
1881       if (Reg < MinGPR) {
1882         MinGPR = Reg;
1883       }
1884     } else if (PPC::G8RCRegClass.contains(Reg)) {
1885       HasG8SaveArea = true;
1886 
1887       G8Regs.push_back(CSI[i]);
1888 
1889       if (Reg < MinG8R) {
1890         MinG8R = Reg;
1891       }
1892     } else if (PPC::F8RCRegClass.contains(Reg)) {
1893       HasFPSaveArea = true;
1894 
1895       FPRegs.push_back(CSI[i]);
1896 
1897       if (Reg < MinFPR) {
1898         MinFPR = Reg;
1899       }
1900     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
1901                PPC::CRRCRegClass.contains(Reg)) {
1902       ; // do nothing, as we already know whether CRs are spilled
1903     } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
1904       HasVRSAVESaveArea = true;
1905     } else if (PPC::VRRCRegClass.contains(Reg) ||
1906                PPC::SPERCRegClass.contains(Reg)) {
1907       // Altivec and SPE are mutually exclusive, but have the same stack
1908       // alignment requirements, so overload the save area for both cases.
1909       HasVRSaveArea = true;
1910 
1911       VRegs.push_back(CSI[i]);
1912 
1913       if (Reg < MinVR) {
1914         MinVR = Reg;
1915       }
1916     } else {
1917       llvm_unreachable("Unknown RegisterClass!");
1918     }
1919   }
1920 
1921   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
1922   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1923 
1924   int64_t LowerBound = 0;
1925 
1926   // Take into account stack space reserved for tail calls.
1927   int TCSPDelta = 0;
1928   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1929       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
1930     LowerBound = TCSPDelta;
1931   }
1932 
1933   // The Floating-point register save area is right below the back chain word
1934   // of the previous stack frame.
1935   if (HasFPSaveArea) {
1936     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
1937       int FI = FPRegs[i].getFrameIdx();
1938 
1939       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1940     }
1941 
1942     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
1943   }
1944 
1945   // Check whether the frame pointer register is allocated. If so, make sure it
1946   // is spilled to the correct offset.
1947   if (needsFP(MF)) {
1948     int FI = PFI->getFramePointerSaveIndex();
1949     assert(FI && "No Frame Pointer Save Slot!");
1950     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1951     // FP is R31/X31, so no need to update MinGPR/MinG8R.
1952     HasGPSaveArea = true;
1953   }
1954 
1955   if (PFI->usesPICBase()) {
1956     int FI = PFI->getPICBasePointerSaveIndex();
1957     assert(FI && "No PIC Base Pointer Save Slot!");
1958     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1959 
1960     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
1961     HasGPSaveArea = true;
1962   }
1963 
1964   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1965   if (RegInfo->hasBasePointer(MF)) {
1966     int FI = PFI->getBasePointerSaveIndex();
1967     assert(FI && "No Base Pointer Save Slot!");
1968     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1969 
1970     unsigned BP = RegInfo->getBaseRegister(MF);
1971     if (PPC::G8RCRegClass.contains(BP)) {
1972       MinG8R = std::min<unsigned>(MinG8R, BP);
1973       HasG8SaveArea = true;
1974     } else if (PPC::GPRCRegClass.contains(BP)) {
1975       MinGPR = std::min<unsigned>(MinGPR, BP);
1976       HasGPSaveArea = true;
1977     }
1978   }
1979 
1980   // General register save area starts right below the Floating-point
1981   // register save area.
1982   if (HasGPSaveArea || HasG8SaveArea) {
1983     // Move general register save area spill slots down, taking into account
1984     // the size of the Floating-point register save area.
1985     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
1986       if (!GPRegs[i].isSpilledToReg()) {
1987         int FI = GPRegs[i].getFrameIdx();
1988         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1989       }
1990     }
1991 
1992     // Move general register save area spill slots down, taking into account
1993     // the size of the Floating-point register save area.
1994     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
1995       if (!G8Regs[i].isSpilledToReg()) {
1996         int FI = G8Regs[i].getFrameIdx();
1997         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1998       }
1999     }
2000 
2001     unsigned MinReg =
2002       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2003                          TRI->getEncodingValue(MinG8R));
2004 
2005     if (Subtarget.isPPC64()) {
2006       LowerBound -= (31 - MinReg + 1) * 8;
2007     } else {
2008       LowerBound -= (31 - MinReg + 1) * 4;
2009     }
2010   }
2011 
2012   // For 32-bit only, the CR save area is below the general register
2013   // save area.  For 64-bit SVR4, the CR save area is addressed relative
2014   // to the stack pointer and hence does not need an adjustment here.
2015   // Only CR2 (the first nonvolatile spilled) has an associated frame
2016   // index so that we have a single uniform save area.
2017   if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) {
2018     // Adjust the frame index of the CR spill slot.
2019     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2020       unsigned Reg = CSI[i].getReg();
2021 
2022       if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2)
2023           // Leave Darwin logic as-is.
2024           || (!Subtarget.isSVR4ABI() &&
2025               (PPC::CRBITRCRegClass.contains(Reg) ||
2026                PPC::CRRCRegClass.contains(Reg)))) {
2027         int FI = CSI[i].getFrameIdx();
2028 
2029         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2030       }
2031     }
2032 
2033     LowerBound -= 4; // The CR save area is always 4 bytes long.
2034   }
2035 
2036   if (HasVRSAVESaveArea) {
2037     // FIXME SVR4: Is it actually possible to have multiple elements in CSI
2038     //             which have the VRSAVE register class?
2039     // Adjust the frame index of the VRSAVE spill slot.
2040     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2041       unsigned Reg = CSI[i].getReg();
2042 
2043       if (PPC::VRSAVERCRegClass.contains(Reg)) {
2044         int FI = CSI[i].getFrameIdx();
2045 
2046         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2047       }
2048     }
2049 
2050     LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
2051   }
2052 
2053   // Both Altivec and SPE have the same alignment and padding requirements
2054   // within the stack frame.
2055   if (HasVRSaveArea) {
2056     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2057     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2058     // we are using negative number here (the stack grows downward). We should
2059     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2060     // is the alignment size ( n = 16 here) and y is the size after aligning.
2061     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2062     LowerBound &= ~(15);
2063 
2064     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2065       int FI = VRegs[i].getFrameIdx();
2066 
2067       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2068     }
2069   }
2070 
2071   addScavengingSpillSlot(MF, RS);
2072 }
2073 
2074 void
addScavengingSpillSlot(MachineFunction & MF,RegScavenger * RS) const2075 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2076                                          RegScavenger *RS) const {
2077   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2078   // a large stack, which will require scavenging a register to materialize a
2079   // large offset.
2080 
2081   // We need to have a scavenger spill slot for spills if the frame size is
2082   // large. In case there is no free register for large-offset addressing,
2083   // this slot is used for the necessary emergency spill. Also, we need the
2084   // slot for dynamic stack allocations.
2085 
2086   // The scavenger might be invoked if the frame offset does not fit into
2087   // the 16-bit immediate. We don't know the complete frame size here
2088   // because we've not yet computed callee-saved register spills or the
2089   // needed alignment padding.
2090   unsigned StackSize = determineFrameLayout(MF, true);
2091   MachineFrameInfo &MFI = MF.getFrameInfo();
2092   if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
2093       hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
2094     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2095     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2096     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2097     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2098     unsigned Size = TRI.getSpillSize(RC);
2099     unsigned Align = TRI.getSpillAlignment(RC);
2100     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2101 
2102     // Might we have over-aligned allocas?
2103     bool HasAlVars = MFI.hasVarSizedObjects() &&
2104                      MFI.getMaxAlignment() > getStackAlignment();
2105 
2106     // These kinds of spills might need two registers.
2107     if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
2108       RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2109 
2110   }
2111 }
2112 
2113 // This function checks if a callee saved gpr can be spilled to a volatile
2114 // vector register. This occurs for leaf functions when the option
2115 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2116 // which were not spilled to vectors, return false so the target independent
2117 // code can handle them by assigning a FrameIdx to a stack slot.
assignCalleeSavedSpillSlots(MachineFunction & MF,const TargetRegisterInfo * TRI,std::vector<CalleeSavedInfo> & CSI) const2118 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2119     MachineFunction &MF, const TargetRegisterInfo *TRI,
2120     std::vector<CalleeSavedInfo> &CSI) const {
2121 
2122   if (CSI.empty())
2123     return true; // Early exit if no callee saved registers are modified!
2124 
2125   // Early exit if cannot spill gprs to volatile vector registers.
2126   MachineFrameInfo &MFI = MF.getFrameInfo();
2127   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2128     return false;
2129 
2130   // Build a BitVector of VSRs that can be used for spilling GPRs.
2131   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2132   BitVector BVCalleeSaved(TRI->getNumRegs());
2133   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2134   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2135   for (unsigned i = 0; CSRegs[i]; ++i)
2136     BVCalleeSaved.set(CSRegs[i]);
2137 
2138   for (unsigned Reg : BVAllocatable.set_bits()) {
2139     // Set to 0 if the register is not a volatile VF/F8 register, or if it is
2140     // used in the function.
2141     if (BVCalleeSaved[Reg] ||
2142         (!PPC::F8RCRegClass.contains(Reg) &&
2143          !PPC::VFRCRegClass.contains(Reg)) ||
2144         (MF.getRegInfo().isPhysRegUsed(Reg)))
2145       BVAllocatable.reset(Reg);
2146   }
2147 
2148   bool AllSpilledToReg = true;
2149   for (auto &CS : CSI) {
2150     if (BVAllocatable.none())
2151       return false;
2152 
2153     unsigned Reg = CS.getReg();
2154     if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
2155       AllSpilledToReg = false;
2156       continue;
2157     }
2158 
2159     unsigned VolatileVFReg = BVAllocatable.find_first();
2160     if (VolatileVFReg < BVAllocatable.size()) {
2161       CS.setDstReg(VolatileVFReg);
2162       BVAllocatable.reset(VolatileVFReg);
2163     } else {
2164       AllSpilledToReg = false;
2165     }
2166   }
2167   return AllSpilledToReg;
2168 }
2169 
2170 
2171 bool
spillCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,const std::vector<CalleeSavedInfo> & CSI,const TargetRegisterInfo * TRI) const2172 PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
2173                                      MachineBasicBlock::iterator MI,
2174                                      const std::vector<CalleeSavedInfo> &CSI,
2175                                      const TargetRegisterInfo *TRI) const {
2176 
2177   // Currently, this function only handles SVR4 32- and 64-bit ABIs.
2178   // Return false otherwise to maintain pre-existing behavior.
2179   if (!Subtarget.isSVR4ABI())
2180     return false;
2181 
2182   MachineFunction *MF = MBB.getParent();
2183   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2184   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2185   bool MustSaveTOC = FI->mustSaveTOC();
2186   DebugLoc DL;
2187   bool CRSpilled = false;
2188   MachineInstrBuilder CRMIB;
2189 
2190   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2191     unsigned Reg = CSI[i].getReg();
2192     // Only Darwin actually uses the VRSAVE register, but it can still appear
2193     // here if, for example, @llvm.eh.unwind.init() is used.  If we're not on
2194     // Darwin, ignore it.
2195     if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
2196       continue;
2197 
2198     // CR2 through CR4 are the nonvolatile CR fields.
2199     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2200 
2201     // Add the callee-saved register as live-in; it's killed at the spill.
2202     // Do not do this for callee-saved registers that are live-in to the
2203     // function because they will already be marked live-in and this will be
2204     // adding it for a second time. It is an error to add the same register
2205     // to the set more than once.
2206     const MachineRegisterInfo &MRI = MF->getRegInfo();
2207     bool IsLiveIn = MRI.isLiveIn(Reg);
2208     if (!IsLiveIn)
2209        MBB.addLiveIn(Reg);
2210 
2211     if (CRSpilled && IsCRField) {
2212       CRMIB.addReg(Reg, RegState::ImplicitKill);
2213       continue;
2214     }
2215 
2216     // The actual spill will happen in the prologue.
2217     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2218       continue;
2219 
2220     // Insert the spill to the stack frame.
2221     if (IsCRField) {
2222       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2223       if (Subtarget.isPPC64()) {
2224         // The actual spill will happen at the start of the prologue.
2225         FuncInfo->addMustSaveCR(Reg);
2226       } else {
2227         CRSpilled = true;
2228         FuncInfo->setSpillsCR();
2229 
2230         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2231         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2232         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2233                   .addReg(Reg, RegState::ImplicitKill);
2234 
2235         MBB.insert(MI, CRMIB);
2236         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2237                                          .addReg(PPC::R12,
2238                                                  getKillRegState(true)),
2239                                          CSI[i].getFrameIdx()));
2240       }
2241     } else {
2242       if (CSI[i].isSpilledToReg()) {
2243         NumPESpillVSR++;
2244         BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
2245           .addReg(Reg, getKillRegState(true));
2246       } else {
2247         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2248         // Use !IsLiveIn for the kill flag.
2249         // We do not want to kill registers that are live in this function
2250         // before their use because they will become undefined registers.
2251         TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
2252                                 CSI[i].getFrameIdx(), RC, TRI);
2253       }
2254     }
2255   }
2256   return true;
2257 }
2258 
2259 static void
restoreCRs(bool isPPC64,bool is31,bool CR2Spilled,bool CR3Spilled,bool CR4Spilled,MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,const std::vector<CalleeSavedInfo> & CSI,unsigned CSIIndex)2260 restoreCRs(bool isPPC64, bool is31,
2261            bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
2262            MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2263            const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
2264 
2265   MachineFunction *MF = MBB.getParent();
2266   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2267   DebugLoc DL;
2268   unsigned RestoreOp, MoveReg;
2269 
2270   if (isPPC64)
2271     // This is handled during epilogue generation.
2272     return;
2273   else {
2274     // 32-bit:  FP-relative
2275     MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
2276                                              PPC::R12),
2277                                      CSI[CSIIndex].getFrameIdx()));
2278     RestoreOp = PPC::MTOCRF;
2279     MoveReg = PPC::R12;
2280   }
2281 
2282   if (CR2Spilled)
2283     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2284                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2285 
2286   if (CR3Spilled)
2287     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2288                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2289 
2290   if (CR4Spilled)
2291     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2292                .addReg(MoveReg, getKillRegState(true)));
2293 }
2294 
2295 MachineBasicBlock::iterator PPCFrameLowering::
eliminateCallFramePseudoInstr(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I) const2296 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2297                               MachineBasicBlock::iterator I) const {
2298   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2299   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2300       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2301     // Add (actually subtract) back the amount the callee popped on return.
2302     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2303       bool is64Bit = Subtarget.isPPC64();
2304       CalleeAmt *= -1;
2305       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2306       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2307       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2308       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2309       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2310       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2311       const DebugLoc &dl = I->getDebugLoc();
2312 
2313       if (isInt<16>(CalleeAmt)) {
2314         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2315           .addReg(StackReg, RegState::Kill)
2316           .addImm(CalleeAmt);
2317       } else {
2318         MachineBasicBlock::iterator MBBI = I;
2319         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2320           .addImm(CalleeAmt >> 16);
2321         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2322           .addReg(TmpReg, RegState::Kill)
2323           .addImm(CalleeAmt & 0xFFFF);
2324         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2325           .addReg(StackReg, RegState::Kill)
2326           .addReg(TmpReg);
2327       }
2328     }
2329   }
2330   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2331   return MBB.erase(I);
2332 }
2333 
2334 bool
restoreCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,std::vector<CalleeSavedInfo> & CSI,const TargetRegisterInfo * TRI) const2335 PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
2336                                         MachineBasicBlock::iterator MI,
2337                                         std::vector<CalleeSavedInfo> &CSI,
2338                                         const TargetRegisterInfo *TRI) const {
2339 
2340   // Currently, this function only handles SVR4 32- and 64-bit ABIs.
2341   // Return false otherwise to maintain pre-existing behavior.
2342   if (!Subtarget.isSVR4ABI())
2343     return false;
2344 
2345   MachineFunction *MF = MBB.getParent();
2346   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2347   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2348   bool MustSaveTOC = FI->mustSaveTOC();
2349   bool CR2Spilled = false;
2350   bool CR3Spilled = false;
2351   bool CR4Spilled = false;
2352   unsigned CSIIndex = 0;
2353 
2354   // Initialize insertion-point logic; we will be restoring in reverse
2355   // order of spill.
2356   MachineBasicBlock::iterator I = MI, BeforeI = I;
2357   bool AtStart = I == MBB.begin();
2358 
2359   if (!AtStart)
2360     --BeforeI;
2361 
2362   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2363     unsigned Reg = CSI[i].getReg();
2364 
2365     // Only Darwin actually uses the VRSAVE register, but it can still appear
2366     // here if, for example, @llvm.eh.unwind.init() is used.  If we're not on
2367     // Darwin, ignore it.
2368     if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
2369       continue;
2370 
2371     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2372       continue;
2373 
2374     if (Reg == PPC::CR2) {
2375       CR2Spilled = true;
2376       // The spill slot is associated only with CR2, which is the
2377       // first nonvolatile spilled.  Save it here.
2378       CSIIndex = i;
2379       continue;
2380     } else if (Reg == PPC::CR3) {
2381       CR3Spilled = true;
2382       continue;
2383     } else if (Reg == PPC::CR4) {
2384       CR4Spilled = true;
2385       continue;
2386     } else {
2387       // When we first encounter a non-CR register after seeing at
2388       // least one CR register, restore all spilled CRs together.
2389       if ((CR2Spilled || CR3Spilled || CR4Spilled)
2390           && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
2391         bool is31 = needsFP(*MF);
2392         restoreCRs(Subtarget.isPPC64(), is31,
2393                    CR2Spilled, CR3Spilled, CR4Spilled,
2394                    MBB, I, CSI, CSIIndex);
2395         CR2Spilled = CR3Spilled = CR4Spilled = false;
2396       }
2397 
2398       if (CSI[i].isSpilledToReg()) {
2399         DebugLoc DL;
2400         NumPEReloadVSR++;
2401         BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
2402             .addReg(CSI[i].getDstReg(), getKillRegState(true));
2403       } else {
2404        // Default behavior for non-CR saves.
2405         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2406         TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2407         assert(I != MBB.begin() &&
2408                "loadRegFromStackSlot didn't insert any code!");
2409       }
2410     }
2411 
2412     // Insert in reverse order.
2413     if (AtStart)
2414       I = MBB.begin();
2415     else {
2416       I = BeforeI;
2417       ++I;
2418     }
2419   }
2420 
2421   // If we haven't yet spilled the CRs, do so now.
2422   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2423     bool is31 = needsFP(*MF);
2424     restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled,
2425                MBB, I, CSI, CSIIndex);
2426   }
2427 
2428   return true;
2429 }
2430 
enableShrinkWrapping(const MachineFunction & MF) const2431 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2432   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2433     return false;
2434   return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2435           MF.getSubtarget<PPCSubtarget>().isPPC64());
2436 }
2437