1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "MCTargetDesc/PPCPredicates.h"
14 #include "PPCFrameLowering.h"
15 #include "PPCInstrBuilder.h"
16 #include "PPCInstrInfo.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCSubtarget.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineModuleInfo.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/RegisterScavenging.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/Target/TargetOptions.h"
29 
30 using namespace llvm;
31 
32 #define DEBUG_TYPE "framelowering"
33 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
34 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
35 STATISTIC(NumPrologProbed, "Number of prologues probed");
36 
37 static cl::opt<bool>
38 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
39                      cl::desc("Enable spills in prologue to vector registers."),
40                      cl::init(false), cl::Hidden);
41 
42 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
43   if (STI.isAIXABI())
44     return STI.isPPC64() ? 16 : 8;
45   // SVR4 ABI:
46   return STI.isPPC64() ? 16 : 4;
47 }
48 
49 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
50   if (STI.isAIXABI())
51     return STI.isPPC64() ? 40 : 20;
52   return STI.isELFv2ABI() ? 24 : 40;
53 }
54 
55 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
56   // First slot in the general register save area.
57   return STI.isPPC64() ? -8U : -4U;
58 }
59 
60 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
61   if (STI.isAIXABI() || STI.isPPC64())
62     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
63 
64   // 32-bit SVR4 ABI:
65   return 8;
66 }
67 
68 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
69   // Third slot in the general purpose register save area.
70   if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
71     return -12U;
72 
73   // Second slot in the general purpose register save area.
74   return STI.isPPC64() ? -16U : -8U;
75 }
76 
77 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
78   return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
79 }
80 
81 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
82     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
83                           STI.getPlatformStackAlignment(), 0),
84       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
85       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
86       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
87       LinkageSize(computeLinkageSize(Subtarget)),
88       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
89       CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
90 
91 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
92 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
93     unsigned &NumEntries) const {
94 
95 // Floating-point register save area offsets.
96 #define CALLEE_SAVED_FPRS \
97       {PPC::F31, -8},     \
98       {PPC::F30, -16},    \
99       {PPC::F29, -24},    \
100       {PPC::F28, -32},    \
101       {PPC::F27, -40},    \
102       {PPC::F26, -48},    \
103       {PPC::F25, -56},    \
104       {PPC::F24, -64},    \
105       {PPC::F23, -72},    \
106       {PPC::F22, -80},    \
107       {PPC::F21, -88},    \
108       {PPC::F20, -96},    \
109       {PPC::F19, -104},   \
110       {PPC::F18, -112},   \
111       {PPC::F17, -120},   \
112       {PPC::F16, -128},   \
113       {PPC::F15, -136},   \
114       {PPC::F14, -144}
115 
116 // 32-bit general purpose register save area offsets shared by ELF and
117 // AIX. AIX has an extra CSR with r13.
118 #define CALLEE_SAVED_GPRS32 \
119       {PPC::R31, -4},       \
120       {PPC::R30, -8},       \
121       {PPC::R29, -12},      \
122       {PPC::R28, -16},      \
123       {PPC::R27, -20},      \
124       {PPC::R26, -24},      \
125       {PPC::R25, -28},      \
126       {PPC::R24, -32},      \
127       {PPC::R23, -36},      \
128       {PPC::R22, -40},      \
129       {PPC::R21, -44},      \
130       {PPC::R20, -48},      \
131       {PPC::R19, -52},      \
132       {PPC::R18, -56},      \
133       {PPC::R17, -60},      \
134       {PPC::R16, -64},      \
135       {PPC::R15, -68},      \
136       {PPC::R14, -72}
137 
138 // 64-bit general purpose register save area offsets.
139 #define CALLEE_SAVED_GPRS64 \
140       {PPC::X31, -8},       \
141       {PPC::X30, -16},      \
142       {PPC::X29, -24},      \
143       {PPC::X28, -32},      \
144       {PPC::X27, -40},      \
145       {PPC::X26, -48},      \
146       {PPC::X25, -56},      \
147       {PPC::X24, -64},      \
148       {PPC::X23, -72},      \
149       {PPC::X22, -80},      \
150       {PPC::X21, -88},      \
151       {PPC::X20, -96},      \
152       {PPC::X19, -104},     \
153       {PPC::X18, -112},     \
154       {PPC::X17, -120},     \
155       {PPC::X16, -128},     \
156       {PPC::X15, -136},     \
157       {PPC::X14, -144}
158 
159 // Vector register save area offsets.
160 #define CALLEE_SAVED_VRS \
161       {PPC::V31, -16},   \
162       {PPC::V30, -32},   \
163       {PPC::V29, -48},   \
164       {PPC::V28, -64},   \
165       {PPC::V27, -80},   \
166       {PPC::V26, -96},   \
167       {PPC::V25, -112},  \
168       {PPC::V24, -128},  \
169       {PPC::V23, -144},  \
170       {PPC::V22, -160},  \
171       {PPC::V21, -176},  \
172       {PPC::V20, -192}
173 
174   // Note that the offsets here overlap, but this is fixed up in
175   // processFunctionBeforeFrameFinalized.
176 
177   static const SpillSlot ELFOffsets32[] = {
178       CALLEE_SAVED_FPRS,
179       CALLEE_SAVED_GPRS32,
180 
181       // CR save area offset.  We map each of the nonvolatile CR fields
182       // to the slot for CR2, which is the first of the nonvolatile CR
183       // fields to be assigned, so that we only allocate one save slot.
184       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
185       {PPC::CR2, -4},
186 
187       // VRSAVE save area offset.
188       {PPC::VRSAVE, -4},
189 
190       CALLEE_SAVED_VRS,
191 
192       // SPE register save area (overlaps Vector save area).
193       {PPC::S31, -8},
194       {PPC::S30, -16},
195       {PPC::S29, -24},
196       {PPC::S28, -32},
197       {PPC::S27, -40},
198       {PPC::S26, -48},
199       {PPC::S25, -56},
200       {PPC::S24, -64},
201       {PPC::S23, -72},
202       {PPC::S22, -80},
203       {PPC::S21, -88},
204       {PPC::S20, -96},
205       {PPC::S19, -104},
206       {PPC::S18, -112},
207       {PPC::S17, -120},
208       {PPC::S16, -128},
209       {PPC::S15, -136},
210       {PPC::S14, -144}};
211 
212   static const SpillSlot ELFOffsets64[] = {
213       CALLEE_SAVED_FPRS,
214       CALLEE_SAVED_GPRS64,
215 
216       // VRSAVE save area offset.
217       {PPC::VRSAVE, -4},
218       CALLEE_SAVED_VRS
219   };
220 
221   static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS,
222                                            CALLEE_SAVED_GPRS32,
223                                            // Add AIX's extra CSR.
224                                            {PPC::R13, -76},
225                                            CALLEE_SAVED_VRS};
226 
227   static const SpillSlot AIXOffsets64[] = {
228       CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS};
229 
230   if (Subtarget.is64BitELFABI()) {
231     NumEntries = array_lengthof(ELFOffsets64);
232     return ELFOffsets64;
233   }
234 
235   if (Subtarget.is32BitELFABI()) {
236     NumEntries = array_lengthof(ELFOffsets32);
237     return ELFOffsets32;
238   }
239 
240   assert(Subtarget.isAIXABI() && "Unexpected ABI.");
241 
242   if (Subtarget.isPPC64()) {
243     NumEntries = array_lengthof(AIXOffsets64);
244     return AIXOffsets64;
245   }
246 
247   NumEntries = array_lengthof(AIXOffsets32);
248   return AIXOffsets32;
249 }
250 
251 static bool spillsCR(const MachineFunction &MF) {
252   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
253   return FuncInfo->isCRSpilled();
254 }
255 
256 static bool hasSpills(const MachineFunction &MF) {
257   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
258   return FuncInfo->hasSpills();
259 }
260 
261 static bool hasNonRISpills(const MachineFunction &MF) {
262   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
263   return FuncInfo->hasNonRISpills();
264 }
265 
266 /// MustSaveLR - Return true if this function requires that we save the LR
267 /// register onto the stack in the prolog and restore it in the epilog of the
268 /// function.
269 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
270   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
271 
272   // We need a save/restore of LR if there is any def of LR (which is
273   // defined by calls, including the PIC setup sequence), or if there is
274   // some use of the LR stack slot (e.g. for builtin_return_address).
275   // (LR comes in 32 and 64 bit versions.)
276   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
277   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
278 }
279 
280 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
281 /// call frame size. Update the MachineFunction object with the stack size.
282 unsigned
283 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
284                                                 bool UseEstimate) const {
285   unsigned NewMaxCallFrameSize = 0;
286   unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
287                                             &NewMaxCallFrameSize);
288   MF.getFrameInfo().setStackSize(FrameSize);
289   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
290   return FrameSize;
291 }
292 
293 /// determineFrameLayout - Determine the size of the frame and maximum call
294 /// frame size.
295 unsigned
296 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
297                                        bool UseEstimate,
298                                        unsigned *NewMaxCallFrameSize) const {
299   const MachineFrameInfo &MFI = MF.getFrameInfo();
300   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
301 
302   // Get the number of bytes to allocate from the FrameInfo
303   unsigned FrameSize =
304     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
305 
306   // Get stack alignments. The frame must be aligned to the greatest of these:
307   Align TargetAlign = getStackAlign(); // alignment required per the ABI
308   Align MaxAlign = MFI.getMaxAlign();  // algmt required by data in frame
309   Align Alignment = std::max(TargetAlign, MaxAlign);
310 
311   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
312 
313   unsigned LR = RegInfo->getRARegister();
314   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
315   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
316                        !MFI.adjustsStack() &&       // No calls.
317                        !MustSaveLR(MF, LR) &&       // No need to save LR.
318                        !FI->mustSaveTOC() &&        // No need to save TOC.
319                        !RegInfo->hasBasePointer(MF); // No special alignment.
320 
321   // Note: for PPC32 SVR4ABI, we can still generate stackless
322   // code if all local vars are reg-allocated.
323   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
324 
325   // Check whether we can skip adjusting the stack pointer (by using red zone)
326   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
327     // No need for frame
328     return 0;
329   }
330 
331   // Get the maximum call frame size of all the calls.
332   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
333 
334   // Maximum call frame needs to be at least big enough for linkage area.
335   unsigned minCallFrameSize = getLinkageSize();
336   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
337 
338   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
339   // that allocations will be aligned.
340   if (MFI.hasVarSizedObjects())
341     maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
342 
343   // Update the new max call frame size if the caller passes in a valid pointer.
344   if (NewMaxCallFrameSize)
345     *NewMaxCallFrameSize = maxCallFrameSize;
346 
347   // Include call frame size in total.
348   FrameSize += maxCallFrameSize;
349 
350   // Make sure the frame is aligned.
351   FrameSize = alignTo(FrameSize, Alignment);
352 
353   return FrameSize;
354 }
355 
356 // hasFP - Return true if the specified function actually has a dedicated frame
357 // pointer register.
358 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
359   const MachineFrameInfo &MFI = MF.getFrameInfo();
360   // FIXME: This is pretty much broken by design: hasFP() might be called really
361   // early, before the stack layout was calculated and thus hasFP() might return
362   // true or false here depending on the time of call.
363   return (MFI.getStackSize()) && needsFP(MF);
364 }
365 
366 // needsFP - Return true if the specified function should have a dedicated frame
367 // pointer register.  This is true if the function has variable sized allocas or
368 // if frame pointer elimination is disabled.
369 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
370   const MachineFrameInfo &MFI = MF.getFrameInfo();
371 
372   // Naked functions have no stack frame pushed, so we don't have a frame
373   // pointer.
374   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
375     return false;
376 
377   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
378          MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
379          MF.exposesReturnsTwice() ||
380          (MF.getTarget().Options.GuaranteedTailCallOpt &&
381           MF.getInfo<PPCFunctionInfo>()->hasFastCall());
382 }
383 
384 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
385   bool is31 = needsFP(MF);
386   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
387   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
388 
389   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
390   bool HasBP = RegInfo->hasBasePointer(MF);
391   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
392   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
393 
394   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
395        BI != BE; ++BI)
396     for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
397       --MBBI;
398       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
399         MachineOperand &MO = MBBI->getOperand(I);
400         if (!MO.isReg())
401           continue;
402 
403         switch (MO.getReg()) {
404         case PPC::FP:
405           MO.setReg(FPReg);
406           break;
407         case PPC::FP8:
408           MO.setReg(FP8Reg);
409           break;
410         case PPC::BP:
411           MO.setReg(BPReg);
412           break;
413         case PPC::BP8:
414           MO.setReg(BP8Reg);
415           break;
416 
417         }
418       }
419     }
420 }
421 
422 /*  This function will do the following:
423     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
424       respectively (defaults recommended by the ABI) and return true
425     - If MBB is not an entry block, initialize the register scavenger and look
426       for available registers.
427     - If the defaults (R0/R12) are available, return true
428     - If TwoUniqueRegsRequired is set to true, it looks for two unique
429       registers. Otherwise, look for a single available register.
430       - If the required registers are found, set SR1 and SR2 and return true.
431       - If the required registers are not found, set SR2 or both SR1 and SR2 to
432         PPC::NoRegister and return false.
433 
434     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
435     is not set, this function will attempt to find two different registers, but
436     still return true if only one register is available (and set SR1 == SR2).
437 */
438 bool
439 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
440                                       bool UseAtEnd,
441                                       bool TwoUniqueRegsRequired,
442                                       Register *SR1,
443                                       Register *SR2) const {
444   RegScavenger RS;
445   Register R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
446   Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
447 
448   // Set the defaults for the two scratch registers.
449   if (SR1)
450     *SR1 = R0;
451 
452   if (SR2) {
453     assert (SR1 && "Asking for the second scratch register but not the first?");
454     *SR2 = R12;
455   }
456 
457   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
458   if ((UseAtEnd && MBB->isReturnBlock()) ||
459       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
460     return true;
461 
462   RS.enterBasicBlock(*MBB);
463 
464   if (UseAtEnd && !MBB->empty()) {
465     // The scratch register will be used at the end of the block, so must
466     // consider all registers used within the block
467 
468     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
469     // If no terminator, back iterator up to previous instruction.
470     if (MBBI == MBB->end())
471       MBBI = std::prev(MBBI);
472 
473     if (MBBI != MBB->begin())
474       RS.forward(MBBI);
475   }
476 
477   // If the two registers are available, we're all good.
478   // Note that we only return here if both R0 and R12 are available because
479   // although the function may not require two unique registers, it may benefit
480   // from having two so we should try to provide them.
481   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
482     return true;
483 
484   // Get the list of callee-saved registers for the target.
485   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
486   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
487 
488   // Get all the available registers in the block.
489   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
490                                      &PPC::GPRCRegClass);
491 
492   // We shouldn't use callee-saved registers as scratch registers as they may be
493   // available when looking for a candidate block for shrink wrapping but not
494   // available when the actual prologue/epilogue is being emitted because they
495   // were added as live-in to the prologue block by PrologueEpilogueInserter.
496   for (int i = 0; CSRegs[i]; ++i)
497     BV.reset(CSRegs[i]);
498 
499   // Set the first scratch register to the first available one.
500   if (SR1) {
501     int FirstScratchReg = BV.find_first();
502     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
503   }
504 
505   // If there is another one available, set the second scratch register to that.
506   // Otherwise, set it to either PPC::NoRegister if this function requires two
507   // or to whatever SR1 is set to if this function doesn't require two.
508   if (SR2) {
509     int SecondScratchReg = BV.find_next(*SR1);
510     if (SecondScratchReg != -1)
511       *SR2 = SecondScratchReg;
512     else
513       *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
514   }
515 
516   // Now that we've done our best to provide both registers, double check
517   // whether we were unable to provide enough.
518   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
519     return false;
520 
521   return true;
522 }
523 
524 // We need a scratch register for spilling LR and for spilling CR. By default,
525 // we use two scratch registers to hide latency. However, if only one scratch
526 // register is available, we can adjust for that by not overlapping the spill
527 // code. However, if we need to realign the stack (i.e. have a base pointer)
528 // and the stack frame is large, we need two scratch registers.
529 // Also, stack probe requires two scratch registers, one for old sp, one for
530 // large frame and large probe size.
531 bool
532 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
533   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
534   MachineFunction &MF = *(MBB->getParent());
535   bool HasBP = RegInfo->hasBasePointer(MF);
536   unsigned FrameSize = determineFrameLayout(MF);
537   int NegFrameSize = -FrameSize;
538   bool IsLargeFrame = !isInt<16>(NegFrameSize);
539   MachineFrameInfo &MFI = MF.getFrameInfo();
540   Align MaxAlign = MFI.getMaxAlign();
541   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
542   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
543 
544   return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) ||
545          TLI.hasInlineStackProbe(MF);
546 }
547 
548 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
549   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
550 
551   return findScratchRegister(TmpMBB, false,
552                              twoUniqueScratchRegsRequired(TmpMBB));
553 }
554 
555 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
556   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
557 
558   return findScratchRegister(TmpMBB, true);
559 }
560 
561 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
562   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
563   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
564 
565   // Abort if there is no register info or function info.
566   if (!RegInfo || !FI)
567     return false;
568 
569   // Only move the stack update on ELFv2 ABI and PPC64.
570   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
571     return false;
572 
573   // Check the frame size first and return false if it does not fit the
574   // requirements.
575   // We need a non-zero frame size as well as a frame that will fit in the red
576   // zone. This is because by moving the stack pointer update we are now storing
577   // to the red zone until the stack pointer is updated. If we get an interrupt
578   // inside the prologue but before the stack update we now have a number of
579   // stores to the red zone and those stores must all fit.
580   MachineFrameInfo &MFI = MF.getFrameInfo();
581   unsigned FrameSize = MFI.getStackSize();
582   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
583     return false;
584 
585   // Frame pointers and base pointers complicate matters so don't do anything
586   // if we have them. For example having a frame pointer will sometimes require
587   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
588   // difficult. Similar situation exists with setjmp.
589   if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice())
590     return false;
591 
592   // Calls to fast_cc functions use different rules for passing parameters on
593   // the stack from the ABI and using PIC base in the function imposes
594   // similar restrictions to using the base pointer. It is not generally safe
595   // to move the stack pointer update in these situations.
596   if (FI->hasFastCall() || FI->usesPICBase())
597     return false;
598 
599   // Finally we can move the stack update if we do not require register
600   // scavenging. Register scavenging can introduce more spills and so
601   // may make the frame size larger than we have computed.
602   return !RegInfo->requiresFrameIndexScavenging(MF);
603 }
604 
605 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
606                                     MachineBasicBlock &MBB) const {
607   MachineBasicBlock::iterator MBBI = MBB.begin();
608   MachineFrameInfo &MFI = MF.getFrameInfo();
609   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
610   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
611   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
612 
613   MachineModuleInfo &MMI = MF.getMMI();
614   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
615   DebugLoc dl;
616   // AIX assembler does not support cfi directives.
617   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
618 
619   // Get processor type.
620   bool isPPC64 = Subtarget.isPPC64();
621   // Get the ABI.
622   bool isSVR4ABI = Subtarget.isSVR4ABI();
623   bool isELFv2ABI = Subtarget.isELFv2ABI();
624   assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.");
625 
626   // Work out frame sizes.
627   unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
628   int NegFrameSize = -FrameSize;
629   if (!isInt<32>(NegFrameSize))
630     llvm_unreachable("Unhandled stack size!");
631 
632   if (MFI.isFrameAddressTaken())
633     replaceFPWithRealFP(MF);
634 
635   // Check if the link register (LR) must be saved.
636   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
637   bool MustSaveLR = FI->mustSaveLR();
638   bool MustSaveTOC = FI->mustSaveTOC();
639   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
640   bool MustSaveCR = !MustSaveCRs.empty();
641   // Do we have a frame pointer and/or base pointer for this function?
642   bool HasFP = hasFP(MF);
643   bool HasBP = RegInfo->hasBasePointer(MF);
644   bool HasRedZone = isPPC64 || !isSVR4ABI;
645 
646   Register SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
647   Register BPReg = RegInfo->getBaseRegister(MF);
648   Register FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
649   Register LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
650   Register TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
651   Register ScratchReg;
652   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
653   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
654   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
655                                                 : PPC::MFLR );
656   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
657                                                  : PPC::STW );
658   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
659                                                      : PPC::STWU );
660   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
661                                                         : PPC::STWUX);
662   const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
663                                                           : PPC::LIS );
664   const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
665                                                  : PPC::ORI );
666   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
667                                               : PPC::OR );
668   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
669                                                             : PPC::SUBFC);
670   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
671                                                                : PPC::SUBFIC);
672   const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
673                                                            : PPC::MFCR);
674   const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
675 
676   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
677   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
678   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
679   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
680   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
681          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
682 
683   // Using the same bool variable as below to suppress compiler warnings.
684   bool SingleScratchReg = findScratchRegister(
685       &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg);
686   assert(SingleScratchReg &&
687          "Required number of registers not available in this block");
688 
689   SingleScratchReg = ScratchReg == TempReg;
690 
691   int LROffset = getReturnSaveOffset();
692 
693   int FPOffset = 0;
694   if (HasFP) {
695     MachineFrameInfo &MFI = MF.getFrameInfo();
696     int FPIndex = FI->getFramePointerSaveIndex();
697     assert(FPIndex && "No Frame Pointer Save Slot!");
698     FPOffset = MFI.getObjectOffset(FPIndex);
699   }
700 
701   int BPOffset = 0;
702   if (HasBP) {
703     MachineFrameInfo &MFI = MF.getFrameInfo();
704     int BPIndex = FI->getBasePointerSaveIndex();
705     assert(BPIndex && "No Base Pointer Save Slot!");
706     BPOffset = MFI.getObjectOffset(BPIndex);
707   }
708 
709   int PBPOffset = 0;
710   if (FI->usesPICBase()) {
711     MachineFrameInfo &MFI = MF.getFrameInfo();
712     int PBPIndex = FI->getPICBasePointerSaveIndex();
713     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
714     PBPOffset = MFI.getObjectOffset(PBPIndex);
715   }
716 
717   // Get stack alignments.
718   Align MaxAlign = MFI.getMaxAlign();
719   if (HasBP && MaxAlign > 1)
720     assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
721 
722   // Frames of 32KB & larger require special handling because they cannot be
723   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
724   bool isLargeFrame = !isInt<16>(NegFrameSize);
725 
726   // Check if we can move the stack update instruction (stdu) down the prologue
727   // past the callee saves. Hopefully this will avoid the situation where the
728   // saves are waiting for the update on the store with update to complete.
729   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
730   bool MovingStackUpdateDown = false;
731 
732   // Check if we can move the stack update.
733   if (stackUpdateCanBeMoved(MF)) {
734     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
735     for (CalleeSavedInfo CSI : Info) {
736       int FrIdx = CSI.getFrameIdx();
737       // If the frame index is not negative the callee saved info belongs to a
738       // stack object that is not a fixed stack object. We ignore non-fixed
739       // stack objects because we won't move the stack update pointer past them.
740       if (FrIdx >= 0)
741         continue;
742 
743       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
744         StackUpdateLoc++;
745         MovingStackUpdateDown = true;
746       } else {
747         // We need all of the Frame Indices to meet these conditions.
748         // If they do not, abort the whole operation.
749         StackUpdateLoc = MBBI;
750         MovingStackUpdateDown = false;
751         break;
752       }
753     }
754 
755     // If the operation was not aborted then update the object offset.
756     if (MovingStackUpdateDown) {
757       for (CalleeSavedInfo CSI : Info) {
758         int FrIdx = CSI.getFrameIdx();
759         if (FrIdx < 0)
760           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
761       }
762     }
763   }
764 
765   // Where in the prologue we move the CR fields depends on how many scratch
766   // registers we have, and if we need to save the link register or not. This
767   // lambda is to avoid duplicating the logic in 2 places.
768   auto BuildMoveFromCR = [&]() {
769     if (isELFv2ABI && MustSaveCRs.size() == 1) {
770     // In the ELFv2 ABI, we are not required to save all CR fields.
771     // If only one CR field is clobbered, it is more efficient to use
772     // mfocrf to selectively save just that field, because mfocrf has short
773     // latency compares to mfcr.
774       assert(isPPC64 && "V2 ABI is 64-bit only.");
775       MachineInstrBuilder MIB =
776           BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
777       MIB.addReg(MustSaveCRs[0], RegState::Kill);
778     } else {
779       MachineInstrBuilder MIB =
780           BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
781       for (unsigned CRfield : MustSaveCRs)
782         MIB.addReg(CRfield, RegState::ImplicitKill);
783     }
784   };
785 
786   // If we need to spill the CR and the LR but we don't have two separate
787   // registers available, we must spill them one at a time
788   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
789     BuildMoveFromCR();
790     BuildMI(MBB, MBBI, dl, StoreWordInst)
791         .addReg(TempReg, getKillRegState(true))
792         .addImm(CRSaveOffset)
793         .addReg(SPReg);
794   }
795 
796   if (MustSaveLR)
797     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
798 
799   if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
800     BuildMoveFromCR();
801 
802   if (HasRedZone) {
803     if (HasFP)
804       BuildMI(MBB, MBBI, dl, StoreInst)
805         .addReg(FPReg)
806         .addImm(FPOffset)
807         .addReg(SPReg);
808     if (FI->usesPICBase())
809       BuildMI(MBB, MBBI, dl, StoreInst)
810         .addReg(PPC::R30)
811         .addImm(PBPOffset)
812         .addReg(SPReg);
813     if (HasBP)
814       BuildMI(MBB, MBBI, dl, StoreInst)
815         .addReg(BPReg)
816         .addImm(BPOffset)
817         .addReg(SPReg);
818   }
819 
820   if (MustSaveLR)
821     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
822       .addReg(ScratchReg, getKillRegState(true))
823       .addImm(LROffset)
824       .addReg(SPReg);
825 
826   if (MustSaveCR &&
827       !(SingleScratchReg && MustSaveLR)) {
828     assert(HasRedZone && "A red zone is always available on PPC64");
829     BuildMI(MBB, MBBI, dl, StoreWordInst)
830       .addReg(TempReg, getKillRegState(true))
831       .addImm(CRSaveOffset)
832       .addReg(SPReg);
833   }
834 
835   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
836   if (!FrameSize)
837     return;
838 
839   // Adjust stack pointer: r1 += NegFrameSize.
840   // If there is a preferred stack alignment, align R1 now
841 
842   if (HasBP && HasRedZone) {
843     // Save a copy of r1 as the base pointer.
844     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
845       .addReg(SPReg)
846       .addReg(SPReg);
847   }
848 
849   // Have we generated a STUX instruction to claim stack frame? If so,
850   // the negated frame size will be placed in ScratchReg.
851   bool HasSTUX = false;
852 
853   // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
854   // pointer is always stored at SP, we will get a free probe due to an essential
855   // STU(X) instruction.
856   if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
857     // To be consistent with other targets, a pseudo instruction is emitted and
858     // will be later expanded in `inlineStackProbe`.
859     BuildMI(MBB, MBBI, dl,
860             TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
861                             : PPC::PROBED_STACKALLOC_32))
862         .addDef(TempReg)
863         .addDef(ScratchReg) // ScratchReg stores the old sp.
864         .addImm(NegFrameSize);
865     // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
866     // update the ScratchReg to meet the assumption that ScratchReg contains
867     // the NegFrameSize. This solution is rather tricky.
868     if (!HasRedZone) {
869       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
870           .addReg(ScratchReg)
871           .addReg(SPReg);
872       HasSTUX = true;
873     }
874   } else {
875     // This condition must be kept in sync with canUseAsPrologue.
876     if (HasBP && MaxAlign > 1) {
877       if (isPPC64)
878         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
879             .addReg(SPReg)
880             .addImm(0)
881             .addImm(64 - Log2(MaxAlign));
882       else // PPC32...
883         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
884             .addReg(SPReg)
885             .addImm(0)
886             .addImm(32 - Log2(MaxAlign))
887             .addImm(31);
888       if (!isLargeFrame) {
889         BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
890             .addReg(ScratchReg, RegState::Kill)
891             .addImm(NegFrameSize);
892       } else {
893         assert(!SingleScratchReg && "Only a single scratch reg available");
894         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
895             .addImm(NegFrameSize >> 16);
896         BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
897             .addReg(TempReg, RegState::Kill)
898             .addImm(NegFrameSize & 0xFFFF);
899         BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
900             .addReg(ScratchReg, RegState::Kill)
901             .addReg(TempReg, RegState::Kill);
902       }
903 
904       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
905           .addReg(SPReg, RegState::Kill)
906           .addReg(SPReg)
907           .addReg(ScratchReg);
908       HasSTUX = true;
909 
910     } else if (!isLargeFrame) {
911       BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
912           .addReg(SPReg)
913           .addImm(NegFrameSize)
914           .addReg(SPReg);
915 
916     } else {
917       BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
918           .addImm(NegFrameSize >> 16);
919       BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
920           .addReg(ScratchReg, RegState::Kill)
921           .addImm(NegFrameSize & 0xFFFF);
922       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
923           .addReg(SPReg, RegState::Kill)
924           .addReg(SPReg)
925           .addReg(ScratchReg);
926       HasSTUX = true;
927     }
928   }
929 
930   // Save the TOC register after the stack pointer update if a prologue TOC
931   // save is required for the function.
932   if (MustSaveTOC) {
933     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
934     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
935       .addReg(TOCReg, getKillRegState(true))
936       .addImm(TOCSaveOffset)
937       .addReg(SPReg);
938   }
939 
940   if (!HasRedZone) {
941     assert(!isPPC64 && "A red zone is always available on PPC64");
942     if (HasSTUX) {
943       // The negated frame size is in ScratchReg, and the SPReg has been
944       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
945       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
946       // the stack frame (i.e. the old SP), ideally, we would put the old
947       // SP into a register and use it as the base for the stores. The
948       // problem is that the only available register may be ScratchReg,
949       // which could be R0, and R0 cannot be used as a base address.
950 
951       // First, set ScratchReg to the old SP. This may need to be modified
952       // later.
953       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
954         .addReg(ScratchReg, RegState::Kill)
955         .addReg(SPReg);
956 
957       if (ScratchReg == PPC::R0) {
958         // R0 cannot be used as a base register, but it can be used as an
959         // index in a store-indexed.
960         int LastOffset = 0;
961         if (HasFP)  {
962           // R0 += (FPOffset-LastOffset).
963           // Need addic, since addi treats R0 as 0.
964           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
965             .addReg(ScratchReg)
966             .addImm(FPOffset-LastOffset);
967           LastOffset = FPOffset;
968           // Store FP into *R0.
969           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
970             .addReg(FPReg, RegState::Kill)  // Save FP.
971             .addReg(PPC::ZERO)
972             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
973         }
974         if (FI->usesPICBase()) {
975           // R0 += (PBPOffset-LastOffset).
976           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
977             .addReg(ScratchReg)
978             .addImm(PBPOffset-LastOffset);
979           LastOffset = PBPOffset;
980           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
981             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
982             .addReg(PPC::ZERO)
983             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
984         }
985         if (HasBP) {
986           // R0 += (BPOffset-LastOffset).
987           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
988             .addReg(ScratchReg)
989             .addImm(BPOffset-LastOffset);
990           LastOffset = BPOffset;
991           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
992             .addReg(BPReg, RegState::Kill)  // Save BP.
993             .addReg(PPC::ZERO)
994             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
995           // BP = R0-LastOffset
996           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
997             .addReg(ScratchReg, RegState::Kill)
998             .addImm(-LastOffset);
999         }
1000       } else {
1001         // ScratchReg is not R0, so use it as the base register. It is
1002         // already set to the old SP, so we can use the offsets directly.
1003 
1004         // Now that the stack frame has been allocated, save all the necessary
1005         // registers using ScratchReg as the base address.
1006         if (HasFP)
1007           BuildMI(MBB, MBBI, dl, StoreInst)
1008             .addReg(FPReg)
1009             .addImm(FPOffset)
1010             .addReg(ScratchReg);
1011         if (FI->usesPICBase())
1012           BuildMI(MBB, MBBI, dl, StoreInst)
1013             .addReg(PPC::R30)
1014             .addImm(PBPOffset)
1015             .addReg(ScratchReg);
1016         if (HasBP) {
1017           BuildMI(MBB, MBBI, dl, StoreInst)
1018             .addReg(BPReg)
1019             .addImm(BPOffset)
1020             .addReg(ScratchReg);
1021           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1022             .addReg(ScratchReg, RegState::Kill)
1023             .addReg(ScratchReg);
1024         }
1025       }
1026     } else {
1027       // The frame size is a known 16-bit constant (fitting in the immediate
1028       // field of STWU). To be here we have to be compiling for PPC32.
1029       // Since the SPReg has been decreased by FrameSize, add it back to each
1030       // offset.
1031       if (HasFP)
1032         BuildMI(MBB, MBBI, dl, StoreInst)
1033           .addReg(FPReg)
1034           .addImm(FrameSize + FPOffset)
1035           .addReg(SPReg);
1036       if (FI->usesPICBase())
1037         BuildMI(MBB, MBBI, dl, StoreInst)
1038           .addReg(PPC::R30)
1039           .addImm(FrameSize + PBPOffset)
1040           .addReg(SPReg);
1041       if (HasBP) {
1042         BuildMI(MBB, MBBI, dl, StoreInst)
1043           .addReg(BPReg)
1044           .addImm(FrameSize + BPOffset)
1045           .addReg(SPReg);
1046         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1047           .addReg(SPReg)
1048           .addImm(FrameSize);
1049       }
1050     }
1051   }
1052 
1053   // Add Call Frame Information for the instructions we generated above.
1054   if (needsCFI) {
1055     unsigned CFIIndex;
1056 
1057     if (HasBP) {
1058       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1059       // because if the stack needed aligning then CFA won't be at a fixed
1060       // offset from FP/SP.
1061       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1062       CFIIndex = MF.addFrameInst(
1063           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1064     } else {
1065       // Adjust the definition of CFA to account for the change in SP.
1066       assert(NegFrameSize);
1067       CFIIndex = MF.addFrameInst(
1068           MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1069     }
1070     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1071         .addCFIIndex(CFIIndex);
1072 
1073     if (HasFP) {
1074       // Describe where FP was saved, at a fixed offset from CFA.
1075       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1076       CFIIndex = MF.addFrameInst(
1077           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1078       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1079           .addCFIIndex(CFIIndex);
1080     }
1081 
1082     if (FI->usesPICBase()) {
1083       // Describe where FP was saved, at a fixed offset from CFA.
1084       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1085       CFIIndex = MF.addFrameInst(
1086           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1087       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1088           .addCFIIndex(CFIIndex);
1089     }
1090 
1091     if (HasBP) {
1092       // Describe where BP was saved, at a fixed offset from CFA.
1093       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1094       CFIIndex = MF.addFrameInst(
1095           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1096       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1097           .addCFIIndex(CFIIndex);
1098     }
1099 
1100     if (MustSaveLR) {
1101       // Describe where LR was saved, at a fixed offset from CFA.
1102       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1103       CFIIndex = MF.addFrameInst(
1104           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1105       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1106           .addCFIIndex(CFIIndex);
1107     }
1108   }
1109 
1110   // If there is a frame pointer, copy R1 into R31
1111   if (HasFP) {
1112     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1113       .addReg(SPReg)
1114       .addReg(SPReg);
1115 
1116     if (!HasBP && needsCFI) {
1117       // Change the definition of CFA from SP+offset to FP+offset, because SP
1118       // will change at every alloca.
1119       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1120       unsigned CFIIndex = MF.addFrameInst(
1121           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1122 
1123       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1124           .addCFIIndex(CFIIndex);
1125     }
1126   }
1127 
1128   if (needsCFI) {
1129     // Describe where callee saved registers were saved, at fixed offsets from
1130     // CFA.
1131     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1132     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1133       unsigned Reg = CSI[I].getReg();
1134       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1135 
1136       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1137       // subregisters of CR2. We just need to emit a move of CR2.
1138       if (PPC::CRBITRCRegClass.contains(Reg))
1139         continue;
1140 
1141       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1142         continue;
1143 
1144       // For SVR4, don't emit a move for the CR spill slot if we haven't
1145       // spilled CRs.
1146       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1147           && !MustSaveCR)
1148         continue;
1149 
1150       // For 64-bit SVR4 when we have spilled CRs, the spill location
1151       // is SP+8, not a frame-relative slot.
1152       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1153         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1154         // the whole CR word.  In the ELFv2 ABI, every CR that was
1155         // actually saved gets its own CFI record.
1156         unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1157         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1158             nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1159         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1160             .addCFIIndex(CFIIndex);
1161         continue;
1162       }
1163 
1164       if (CSI[I].isSpilledToReg()) {
1165         unsigned SpilledReg = CSI[I].getDstReg();
1166         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1167             nullptr, MRI->getDwarfRegNum(Reg, true),
1168             MRI->getDwarfRegNum(SpilledReg, true)));
1169         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1170           .addCFIIndex(CFIRegister);
1171       } else {
1172         int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1173         // We have changed the object offset above but we do not want to change
1174         // the actual offsets in the CFI instruction so we have to undo the
1175         // offset change here.
1176         if (MovingStackUpdateDown)
1177           Offset -= NegFrameSize;
1178 
1179         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1180             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1181         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1182             .addCFIIndex(CFIIndex);
1183       }
1184     }
1185   }
1186 }
1187 
1188 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
1189                                         MachineBasicBlock &PrologMBB) const {
1190   bool isPPC64 = Subtarget.isPPC64();
1191   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
1192   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1193   MachineFrameInfo &MFI = MF.getFrameInfo();
1194   MachineModuleInfo &MMI = MF.getMMI();
1195   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
1196   // AIX assembler does not support cfi directives.
1197   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
1198   auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
1199     int Opc = MI.getOpcode();
1200     return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
1201   });
1202   if (StackAllocMIPos == PrologMBB.end())
1203     return;
1204   const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
1205   MachineBasicBlock *CurrentMBB = &PrologMBB;
1206   DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
1207   MachineInstr &MI = *StackAllocMIPos;
1208   int64_t NegFrameSize = MI.getOperand(2).getImm();
1209   unsigned ProbeSize = TLI.getStackProbeSize(MF);
1210   int64_t NegProbeSize = -(int64_t)ProbeSize;
1211   assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
1212   int64_t NumBlocks = NegFrameSize / NegProbeSize;
1213   int64_t NegResidualSize = NegFrameSize % NegProbeSize;
1214   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1215   Register ScratchReg = MI.getOperand(0).getReg();
1216   Register FPReg = MI.getOperand(1).getReg();
1217   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1218   bool HasBP = RegInfo->hasBasePointer(MF);
1219   Register BPReg = RegInfo->getBaseRegister(MF);
1220   Align MaxAlign = MFI.getMaxAlign();
1221   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1222   const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
1223   // Subroutines to generate .cfi_* directives.
1224   auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
1225                             MachineBasicBlock::iterator MBBI, Register Reg) {
1226     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1227     unsigned CFIIndex = MF.addFrameInst(
1228         MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
1229     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1230         .addCFIIndex(CFIIndex);
1231   };
1232   auto buildDefCFA = [&](MachineBasicBlock &MBB,
1233                          MachineBasicBlock::iterator MBBI, Register Reg,
1234                          int Offset) {
1235     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1236     unsigned CFIIndex = MBB.getParent()->addFrameInst(
1237         MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
1238     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1239         .addCFIIndex(CFIIndex);
1240   };
1241   // Subroutine to determine if we can use the Imm as part of d-form.
1242   auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
1243   // Subroutine to materialize the Imm into TempReg.
1244   auto MaterializeImm = [&](MachineBasicBlock &MBB,
1245                             MachineBasicBlock::iterator MBBI, int64_t Imm,
1246                             Register &TempReg) {
1247     assert(isInt<32>(Imm) && "Unhandled imm");
1248     if (isInt<16>(Imm))
1249       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
1250           .addImm(Imm);
1251     else {
1252       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
1253           .addImm(Imm >> 16);
1254       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
1255           .addReg(TempReg)
1256           .addImm(Imm & 0xFFFF);
1257     }
1258   };
1259   // Subroutine to store frame pointer and decrease stack pointer by probe size.
1260   auto allocateAndProbe = [&](MachineBasicBlock &MBB,
1261                               MachineBasicBlock::iterator MBBI, int64_t NegSize,
1262                               Register NegSizeReg, bool UseDForm,
1263                               Register StoreReg) {
1264     if (UseDForm)
1265       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
1266           .addReg(StoreReg)
1267           .addImm(NegSize)
1268           .addReg(SPReg);
1269     else
1270       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
1271           .addReg(StoreReg)
1272           .addReg(SPReg)
1273           .addReg(NegSizeReg);
1274   };
1275   // Used to probe stack when realignment is required.
1276   // Note that, according to ABI's requirement, *sp must always equals the
1277   // value of back-chain pointer, only st(w|d)u(x) can be used to update sp.
1278   // Following is pseudo code:
1279   // final_sp = (sp & align) + negframesize;
1280   // neg_gap = final_sp - sp;
1281   // while (neg_gap < negprobesize) {
1282   //   stdu fp, negprobesize(sp);
1283   //   neg_gap -= negprobesize;
1284   // }
1285   // stdux fp, sp, neg_gap
1286   //
1287   // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg
1288   // before probe code, we don't need to save it, so we get one additional reg
1289   // that can be used to materialize the probeside if needed to use xform.
1290   // Otherwise, we can NOT materialize probeside, so we can only use Dform for
1291   // now.
1292   //
1293   // The allocations are:
1294   // if (HasBP && HasRedzone) {
1295   //   r0: materialize the probesize if needed so that we can use xform.
1296   //   r12: `neg_gap`
1297   // } else {
1298   //   r0: back-chain pointer
1299   //   r12: `neg_gap`.
1300   // }
1301   auto probeRealignedStack = [&](MachineBasicBlock &MBB,
1302                                  MachineBasicBlock::iterator MBBI,
1303                                  Register ScratchReg, Register TempReg) {
1304     assert(HasBP && "The function is supposed to have base pointer when its "
1305                     "stack is realigned.");
1306     assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
1307 
1308     // FIXME: We can eliminate this limitation if we get more infomation about
1309     // which part of redzone are already used. Used redzone can be treated
1310     // probed. But there might be `holes' in redzone probed, this could
1311     // complicate the implementation.
1312     assert(ProbeSize >= Subtarget.getRedZoneSize() &&
1313            "Probe size should be larger or equal to the size of red-zone so "
1314            "that red-zone is not clobbered by probing.");
1315 
1316     Register &FinalStackPtr = TempReg;
1317     // FIXME: We only support NegProbeSize materializable by DForm currently.
1318     // When HasBP && HasRedzone, we can use xform if we have an additional idle
1319     // register.
1320     NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15));
1321     assert(isInt<16>(NegProbeSize) &&
1322            "NegProbeSize should be materializable by DForm");
1323     Register CRReg = PPC::CR0;
1324     // Layout of output assembly kinda like:
1325     // bb.0:
1326     //   ...
1327     //   sub $scratchreg, $finalsp, r1
1328     //   cmpdi $scratchreg, <negprobesize>
1329     //   bge bb.2
1330     // bb.1:
1331     //   stdu <backchain>, <negprobesize>(r1)
1332     //   sub $scratchreg, $scratchreg, negprobesize
1333     //   cmpdi $scratchreg, <negprobesize>
1334     //   blt bb.1
1335     // bb.2:
1336     //   stdux <backchain>, r1, $scratchreg
1337     MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
1338     MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
1339     MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
1340     MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1341     MF.insert(MBBInsertPoint, ProbeExitMBB);
1342     // bb.2
1343     {
1344       Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1345       allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false,
1346                        BackChainPointer);
1347       if (HasRedZone)
1348         // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg
1349         // to TempReg to satisfy it.
1350         BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg)
1351             .addReg(BPReg)
1352             .addReg(BPReg);
1353       ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
1354       ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
1355     }
1356     // bb.0
1357     {
1358       BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg)
1359           .addReg(SPReg)
1360           .addReg(FinalStackPtr);
1361       if (!HasRedZone)
1362         BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg);
1363       BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg)
1364           .addReg(ScratchReg)
1365           .addImm(NegProbeSize);
1366       BuildMI(&MBB, DL, TII.get(PPC::BCC))
1367           .addImm(PPC::PRED_GE)
1368           .addReg(CRReg)
1369           .addMBB(ProbeExitMBB);
1370       MBB.addSuccessor(ProbeLoopBodyMBB);
1371       MBB.addSuccessor(ProbeExitMBB);
1372     }
1373     // bb.1
1374     {
1375       Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1376       allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize,
1377                        0, true /*UseDForm*/, BackChainPointer);
1378       BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI),
1379               ScratchReg)
1380           .addReg(ScratchReg)
1381           .addImm(-NegProbeSize);
1382       BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI),
1383               CRReg)
1384           .addReg(ScratchReg)
1385           .addImm(NegProbeSize);
1386       BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
1387           .addImm(PPC::PRED_LT)
1388           .addReg(CRReg)
1389           .addMBB(ProbeLoopBodyMBB);
1390       ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
1391       ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
1392     }
1393     // Update liveins.
1394     recomputeLiveIns(*ProbeLoopBodyMBB);
1395     recomputeLiveIns(*ProbeExitMBB);
1396     return ProbeExitMBB;
1397   };
1398   // For case HasBP && MaxAlign > 1, we have to realign the SP by performing
1399   // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since
1400   // the offset subtracted from SP is determined by SP's runtime value.
1401   if (HasBP && MaxAlign > 1) {
1402     // Calculate final stack pointer.
1403     if (isPPC64)
1404       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
1405           .addReg(SPReg)
1406           .addImm(0)
1407           .addImm(64 - Log2(MaxAlign));
1408     else
1409       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
1410           .addReg(SPReg)
1411           .addImm(0)
1412           .addImm(32 - Log2(MaxAlign))
1413           .addImm(31);
1414     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF),
1415             FPReg)
1416         .addReg(ScratchReg)
1417         .addReg(SPReg);
1418     MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg);
1419     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
1420             FPReg)
1421         .addReg(ScratchReg)
1422         .addReg(FPReg);
1423     CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg);
1424     if (needsCFI)
1425       buildDefCFAReg(*CurrentMBB, {MI}, FPReg);
1426   } else {
1427     // Initialize current frame pointer.
1428     BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
1429     // Use FPReg to calculate CFA.
1430     if (needsCFI)
1431       buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
1432     // Probe residual part.
1433     if (NegResidualSize) {
1434       bool ResidualUseDForm = CanUseDForm(NegResidualSize);
1435       if (!ResidualUseDForm)
1436         MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
1437       allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
1438                        ResidualUseDForm, FPReg);
1439     }
1440     bool UseDForm = CanUseDForm(NegProbeSize);
1441     // If number of blocks is small, just probe them directly.
1442     if (NumBlocks < 3) {
1443       if (!UseDForm)
1444         MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1445       for (int i = 0; i < NumBlocks; ++i)
1446         allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
1447                          FPReg);
1448       if (needsCFI) {
1449         // Restore using SPReg to calculate CFA.
1450         buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
1451       }
1452     } else {
1453       // Since CTR is a volatile register and current shrinkwrap implementation
1454       // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
1455       // CTR loop to probe.
1456       // Calculate trip count and stores it in CTRReg.
1457       MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
1458       BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
1459           .addReg(ScratchReg, RegState::Kill);
1460       if (!UseDForm)
1461         MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1462       // Create MBBs of the loop.
1463       MachineFunction::iterator MBBInsertPoint =
1464           std::next(CurrentMBB->getIterator());
1465       MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
1466       MF.insert(MBBInsertPoint, LoopMBB);
1467       MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1468       MF.insert(MBBInsertPoint, ExitMBB);
1469       // Synthesize the loop body.
1470       allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
1471                        UseDForm, FPReg);
1472       BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
1473           .addMBB(LoopMBB);
1474       LoopMBB->addSuccessor(ExitMBB);
1475       LoopMBB->addSuccessor(LoopMBB);
1476       // Synthesize the exit MBB.
1477       ExitMBB->splice(ExitMBB->end(), CurrentMBB,
1478                       std::next(MachineBasicBlock::iterator(MI)),
1479                       CurrentMBB->end());
1480       ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
1481       CurrentMBB->addSuccessor(LoopMBB);
1482       if (needsCFI) {
1483         // Restore using SPReg to calculate CFA.
1484         buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
1485       }
1486       // Update liveins.
1487       recomputeLiveIns(*LoopMBB);
1488       recomputeLiveIns(*ExitMBB);
1489     }
1490   }
1491   ++NumPrologProbed;
1492   MI.eraseFromParent();
1493 }
1494 
1495 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1496                                     MachineBasicBlock &MBB) const {
1497   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1498   DebugLoc dl;
1499 
1500   if (MBBI != MBB.end())
1501     dl = MBBI->getDebugLoc();
1502 
1503   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1504   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1505 
1506   // Get alignment info so we know how to restore the SP.
1507   const MachineFrameInfo &MFI = MF.getFrameInfo();
1508 
1509   // Get the number of bytes allocated from the FrameInfo.
1510   int FrameSize = MFI.getStackSize();
1511 
1512   // Get processor type.
1513   bool isPPC64 = Subtarget.isPPC64();
1514 
1515   // Check if the link register (LR) has been saved.
1516   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1517   bool MustSaveLR = FI->mustSaveLR();
1518   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1519   bool MustSaveCR = !MustSaveCRs.empty();
1520   // Do we have a frame pointer and/or base pointer for this function?
1521   bool HasFP = hasFP(MF);
1522   bool HasBP = RegInfo->hasBasePointer(MF);
1523   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1524 
1525   Register SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1526   Register BPReg = RegInfo->getBaseRegister(MF);
1527   Register FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1528   Register ScratchReg;
1529   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1530   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1531                                                  : PPC::MTLR );
1532   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1533                                                  : PPC::LWZ );
1534   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1535                                                            : PPC::LIS );
1536   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1537                                               : PPC::OR );
1538   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1539                                                   : PPC::ORI );
1540   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1541                                                    : PPC::ADDI );
1542   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1543                                                 : PPC::ADD4 );
1544   const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1545                                                      : PPC::LWZ);
1546   const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1547                                                      : PPC::MTOCRF);
1548   int LROffset = getReturnSaveOffset();
1549 
1550   int FPOffset = 0;
1551 
1552   // Using the same bool variable as below to suppress compiler warnings.
1553   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1554                                               &TempReg);
1555   assert(SingleScratchReg &&
1556          "Could not find an available scratch register");
1557 
1558   SingleScratchReg = ScratchReg == TempReg;
1559 
1560   if (HasFP) {
1561     int FPIndex = FI->getFramePointerSaveIndex();
1562     assert(FPIndex && "No Frame Pointer Save Slot!");
1563     FPOffset = MFI.getObjectOffset(FPIndex);
1564   }
1565 
1566   int BPOffset = 0;
1567   if (HasBP) {
1568       int BPIndex = FI->getBasePointerSaveIndex();
1569       assert(BPIndex && "No Base Pointer Save Slot!");
1570       BPOffset = MFI.getObjectOffset(BPIndex);
1571   }
1572 
1573   int PBPOffset = 0;
1574   if (FI->usesPICBase()) {
1575     int PBPIndex = FI->getPICBasePointerSaveIndex();
1576     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1577     PBPOffset = MFI.getObjectOffset(PBPIndex);
1578   }
1579 
1580   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1581 
1582   if (IsReturnBlock) {
1583     unsigned RetOpcode = MBBI->getOpcode();
1584     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1585                       RetOpcode == PPC::TCRETURNdi ||
1586                       RetOpcode == PPC::TCRETURNai ||
1587                       RetOpcode == PPC::TCRETURNri8 ||
1588                       RetOpcode == PPC::TCRETURNdi8 ||
1589                       RetOpcode == PPC::TCRETURNai8;
1590 
1591     if (UsesTCRet) {
1592       int MaxTCRetDelta = FI->getTailCallSPDelta();
1593       MachineOperand &StackAdjust = MBBI->getOperand(1);
1594       assert(StackAdjust.isImm() && "Expecting immediate value.");
1595       // Adjust stack pointer.
1596       int StackAdj = StackAdjust.getImm();
1597       int Delta = StackAdj - MaxTCRetDelta;
1598       assert((Delta >= 0) && "Delta must be positive");
1599       if (MaxTCRetDelta>0)
1600         FrameSize += (StackAdj +Delta);
1601       else
1602         FrameSize += StackAdj;
1603     }
1604   }
1605 
1606   // Frames of 32KB & larger require special handling because they cannot be
1607   // indexed into with a simple LD/LWZ immediate offset operand.
1608   bool isLargeFrame = !isInt<16>(FrameSize);
1609 
1610   // On targets without red zone, the SP needs to be restored last, so that
1611   // all live contents of the stack frame are upwards of the SP. This means
1612   // that we cannot restore SP just now, since there may be more registers
1613   // to restore from the stack frame (e.g. R31). If the frame size is not
1614   // a simple immediate value, we will need a spare register to hold the
1615   // restored SP. If the frame size is known and small, we can simply adjust
1616   // the offsets of the registers to be restored, and still use SP to restore
1617   // them. In such case, the final update of SP will be to add the frame
1618   // size to it.
1619   // To simplify the code, set RBReg to the base register used to restore
1620   // values from the stack, and set SPAdd to the value that needs to be added
1621   // to the SP at the end. The default values are as if red zone was present.
1622   unsigned RBReg = SPReg;
1623   unsigned SPAdd = 0;
1624 
1625   // Check if we can move the stack update instruction up the epilogue
1626   // past the callee saves. This will allow the move to LR instruction
1627   // to be executed before the restores of the callee saves which means
1628   // that the callee saves can hide the latency from the MTLR instrcution.
1629   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1630   if (stackUpdateCanBeMoved(MF)) {
1631     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1632     for (CalleeSavedInfo CSI : Info) {
1633       int FrIdx = CSI.getFrameIdx();
1634       // If the frame index is not negative the callee saved info belongs to a
1635       // stack object that is not a fixed stack object. We ignore non-fixed
1636       // stack objects because we won't move the update of the stack pointer
1637       // past them.
1638       if (FrIdx >= 0)
1639         continue;
1640 
1641       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1642         StackUpdateLoc--;
1643       else {
1644         // Abort the operation as we can't update all CSR restores.
1645         StackUpdateLoc = MBBI;
1646         break;
1647       }
1648     }
1649   }
1650 
1651   if (FrameSize) {
1652     // In the prologue, the loaded (or persistent) stack pointer value is
1653     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1654     // zone add this offset back now.
1655 
1656     // If the function has a base pointer, the stack pointer has been copied
1657     // to it so we can restore it by copying in the other direction.
1658     if (HasRedZone && HasBP) {
1659       BuildMI(MBB, MBBI, dl, OrInst, RBReg).
1660         addReg(BPReg).
1661         addReg(BPReg);
1662     }
1663     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1664     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1665     // call which invalidates the stack pointer value in SP(0). So we use the
1666     // value of R31 in this case. Similar situation exists with setjmp.
1667     else if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
1668       assert(HasFP && "Expecting a valid frame pointer.");
1669       if (!HasRedZone)
1670         RBReg = FPReg;
1671       if (!isLargeFrame) {
1672         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1673           .addReg(FPReg).addImm(FrameSize);
1674       } else {
1675         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1676           .addImm(FrameSize >> 16);
1677         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1678           .addReg(ScratchReg, RegState::Kill)
1679           .addImm(FrameSize & 0xFFFF);
1680         BuildMI(MBB, MBBI, dl, AddInst)
1681           .addReg(RBReg)
1682           .addReg(FPReg)
1683           .addReg(ScratchReg);
1684       }
1685     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1686       if (HasRedZone) {
1687         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1688           .addReg(SPReg)
1689           .addImm(FrameSize);
1690       } else {
1691         // Make sure that adding FrameSize will not overflow the max offset
1692         // size.
1693         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1694                "Local offsets should be negative");
1695         SPAdd = FrameSize;
1696         FPOffset += FrameSize;
1697         BPOffset += FrameSize;
1698         PBPOffset += FrameSize;
1699       }
1700     } else {
1701       // We don't want to use ScratchReg as a base register, because it
1702       // could happen to be R0. Use FP instead, but make sure to preserve it.
1703       if (!HasRedZone) {
1704         // If FP is not saved, copy it to ScratchReg.
1705         if (!HasFP)
1706           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1707             .addReg(FPReg)
1708             .addReg(FPReg);
1709         RBReg = FPReg;
1710       }
1711       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1712         .addImm(0)
1713         .addReg(SPReg);
1714     }
1715   }
1716   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1717   // If there is no red zone, ScratchReg may be needed for holding a useful
1718   // value (although not the base register). Make sure it is not overwritten
1719   // too early.
1720 
1721   // If we need to restore both the LR and the CR and we only have one
1722   // available scratch register, we must do them one at a time.
1723   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1724     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1725     // is live here.
1726     assert(HasRedZone && "Expecting red zone");
1727     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1728       .addImm(CRSaveOffset)
1729       .addReg(SPReg);
1730     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1731       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1732         .addReg(TempReg, getKillRegState(i == e-1));
1733   }
1734 
1735   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1736   // LR is stored in the caller's stack frame. ScratchReg will be needed
1737   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1738   // a base register anyway, because it may happen to be R0.
1739   bool LoadedLR = false;
1740   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1741     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1742       .addImm(LROffset+SPAdd)
1743       .addReg(RBReg);
1744     LoadedLR = true;
1745   }
1746 
1747   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1748     assert(RBReg == SPReg && "Should be using SP as a base register");
1749     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1750       .addImm(CRSaveOffset)
1751       .addReg(RBReg);
1752   }
1753 
1754   if (HasFP) {
1755     // If there is red zone, restore FP directly, since SP has already been
1756     // restored. Otherwise, restore the value of FP into ScratchReg.
1757     if (HasRedZone || RBReg == SPReg)
1758       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1759         .addImm(FPOffset)
1760         .addReg(SPReg);
1761     else
1762       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1763         .addImm(FPOffset)
1764         .addReg(RBReg);
1765   }
1766 
1767   if (FI->usesPICBase())
1768     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1769       .addImm(PBPOffset)
1770       .addReg(RBReg);
1771 
1772   if (HasBP)
1773     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1774       .addImm(BPOffset)
1775       .addReg(RBReg);
1776 
1777   // There is nothing more to be loaded from the stack, so now we can
1778   // restore SP: SP = RBReg + SPAdd.
1779   if (RBReg != SPReg || SPAdd != 0) {
1780     assert(!HasRedZone && "This should not happen with red zone");
1781     // If SPAdd is 0, generate a copy.
1782     if (SPAdd == 0)
1783       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1784         .addReg(RBReg)
1785         .addReg(RBReg);
1786     else
1787       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1788         .addReg(RBReg)
1789         .addImm(SPAdd);
1790 
1791     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1792     if (RBReg == FPReg)
1793       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1794         .addReg(ScratchReg)
1795         .addReg(ScratchReg);
1796 
1797     // Now load the LR from the caller's stack frame.
1798     if (MustSaveLR && !LoadedLR)
1799       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1800         .addImm(LROffset)
1801         .addReg(SPReg);
1802   }
1803 
1804   if (MustSaveCR &&
1805       !(SingleScratchReg && MustSaveLR))
1806     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1807       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1808         .addReg(TempReg, getKillRegState(i == e-1));
1809 
1810   if (MustSaveLR)
1811     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1812 
1813   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1814   // call optimization
1815   if (IsReturnBlock) {
1816     unsigned RetOpcode = MBBI->getOpcode();
1817     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1818         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1819         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1820       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1821       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1822 
1823       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1824         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1825           .addReg(SPReg).addImm(CallerAllocatedAmt);
1826       } else {
1827         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1828           .addImm(CallerAllocatedAmt >> 16);
1829         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1830           .addReg(ScratchReg, RegState::Kill)
1831           .addImm(CallerAllocatedAmt & 0xFFFF);
1832         BuildMI(MBB, MBBI, dl, AddInst)
1833           .addReg(SPReg)
1834           .addReg(FPReg)
1835           .addReg(ScratchReg);
1836       }
1837     } else {
1838       createTailCallBranchInstr(MBB);
1839     }
1840   }
1841 }
1842 
1843 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1844   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1845 
1846   // If we got this far a first terminator should exist.
1847   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1848 
1849   DebugLoc dl = MBBI->getDebugLoc();
1850   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1851 
1852   // Create branch instruction for pseudo tail call return instruction.
1853   // The TCRETURNdi variants are direct calls. Valid targets for those are
1854   // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1855   // since we can tail call external functions with PC-Rel (i.e. we don't need
1856   // to worry about different TOC pointers). Some of the external functions will
1857   // be MO_GlobalAddress while others like memcpy for example, are going to
1858   // be MO_ExternalSymbol.
1859   unsigned RetOpcode = MBBI->getOpcode();
1860   if (RetOpcode == PPC::TCRETURNdi) {
1861     MBBI = MBB.getLastNonDebugInstr();
1862     MachineOperand &JumpTarget = MBBI->getOperand(0);
1863     if (JumpTarget.isGlobal())
1864       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1865         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1866     else if (JumpTarget.isSymbol())
1867       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1868         addExternalSymbol(JumpTarget.getSymbolName());
1869     else
1870       llvm_unreachable("Expecting Global or External Symbol");
1871   } else if (RetOpcode == PPC::TCRETURNri) {
1872     MBBI = MBB.getLastNonDebugInstr();
1873     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1874     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1875   } else if (RetOpcode == PPC::TCRETURNai) {
1876     MBBI = MBB.getLastNonDebugInstr();
1877     MachineOperand &JumpTarget = MBBI->getOperand(0);
1878     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1879   } else if (RetOpcode == PPC::TCRETURNdi8) {
1880     MBBI = MBB.getLastNonDebugInstr();
1881     MachineOperand &JumpTarget = MBBI->getOperand(0);
1882     if (JumpTarget.isGlobal())
1883       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1884         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1885     else if (JumpTarget.isSymbol())
1886       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1887         addExternalSymbol(JumpTarget.getSymbolName());
1888     else
1889       llvm_unreachable("Expecting Global or External Symbol");
1890   } else if (RetOpcode == PPC::TCRETURNri8) {
1891     MBBI = MBB.getLastNonDebugInstr();
1892     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1893     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1894   } else if (RetOpcode == PPC::TCRETURNai8) {
1895     MBBI = MBB.getLastNonDebugInstr();
1896     MachineOperand &JumpTarget = MBBI->getOperand(0);
1897     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1898   }
1899 }
1900 
1901 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1902                                             BitVector &SavedRegs,
1903                                             RegScavenger *RS) const {
1904   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1905 
1906   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1907 
1908   //  Save and clear the LR state.
1909   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1910   unsigned LR = RegInfo->getRARegister();
1911   FI->setMustSaveLR(MustSaveLR(MF, LR));
1912   SavedRegs.reset(LR);
1913 
1914   //  Save R31 if necessary
1915   int FPSI = FI->getFramePointerSaveIndex();
1916   const bool isPPC64 = Subtarget.isPPC64();
1917   MachineFrameInfo &MFI = MF.getFrameInfo();
1918 
1919   // If the frame pointer save index hasn't been defined yet.
1920   if (!FPSI && needsFP(MF)) {
1921     // Find out what the fix offset of the frame pointer save area.
1922     int FPOffset = getFramePointerSaveOffset();
1923     // Allocate the frame index for frame pointer save area.
1924     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1925     // Save the result.
1926     FI->setFramePointerSaveIndex(FPSI);
1927   }
1928 
1929   int BPSI = FI->getBasePointerSaveIndex();
1930   if (!BPSI && RegInfo->hasBasePointer(MF)) {
1931     int BPOffset = getBasePointerSaveOffset();
1932     // Allocate the frame index for the base pointer save area.
1933     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1934     // Save the result.
1935     FI->setBasePointerSaveIndex(BPSI);
1936   }
1937 
1938   // Reserve stack space for the PIC Base register (R30).
1939   // Only used in SVR4 32-bit.
1940   if (FI->usesPICBase()) {
1941     int PBPSI = MFI.CreateFixedObject(4, -8, true);
1942     FI->setPICBasePointerSaveIndex(PBPSI);
1943   }
1944 
1945   // Make sure we don't explicitly spill r31, because, for example, we have
1946   // some inline asm which explicitly clobbers it, when we otherwise have a
1947   // frame pointer and are using r31's spill slot for the prologue/epilogue
1948   // code. Same goes for the base pointer and the PIC base register.
1949   if (needsFP(MF))
1950     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1951   if (RegInfo->hasBasePointer(MF))
1952     SavedRegs.reset(RegInfo->getBaseRegister(MF));
1953   if (FI->usesPICBase())
1954     SavedRegs.reset(PPC::R30);
1955 
1956   // Reserve stack space to move the linkage area to in case of a tail call.
1957   int TCSPDelta = 0;
1958   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1959       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
1960     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
1961   }
1962 
1963   // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
1964   // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
1965   // object at the offset of the CR-save slot in the linkage area. The actual
1966   // save and restore of the condition register will be created as part of the
1967   // prologue and epilogue insertion, but the FixedStack object is needed to
1968   // keep the CalleSavedInfo valid.
1969   if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
1970        SavedRegs.test(PPC::CR4))) {
1971     const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
1972     const int64_t SpillOffset =
1973         Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
1974     int FrameIdx =
1975         MFI.CreateFixedObject(SpillSize, SpillOffset,
1976                               /* IsImmutable */ true, /* IsAliased */ false);
1977     FI->setCRSpillFrameIndex(FrameIdx);
1978   }
1979 }
1980 
1981 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
1982                                                        RegScavenger *RS) const {
1983   // Get callee saved register information.
1984   MachineFrameInfo &MFI = MF.getFrameInfo();
1985   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1986 
1987   // If the function is shrink-wrapped, and if the function has a tail call, the
1988   // tail call might not be in the new RestoreBlock, so real branch instruction
1989   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
1990   // RestoreBlock. So we handle this case here.
1991   if (MFI.getSavePoint() && MFI.hasTailCall()) {
1992     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
1993     for (MachineBasicBlock &MBB : MF) {
1994       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
1995         createTailCallBranchInstr(MBB);
1996     }
1997   }
1998 
1999   // Early exit if no callee saved registers are modified!
2000   if (CSI.empty() && !needsFP(MF)) {
2001     addScavengingSpillSlot(MF, RS);
2002     return;
2003   }
2004 
2005   unsigned MinGPR = PPC::R31;
2006   unsigned MinG8R = PPC::X31;
2007   unsigned MinFPR = PPC::F31;
2008   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
2009 
2010   bool HasGPSaveArea = false;
2011   bool HasG8SaveArea = false;
2012   bool HasFPSaveArea = false;
2013   bool HasVRSaveArea = false;
2014 
2015   SmallVector<CalleeSavedInfo, 18> GPRegs;
2016   SmallVector<CalleeSavedInfo, 18> G8Regs;
2017   SmallVector<CalleeSavedInfo, 18> FPRegs;
2018   SmallVector<CalleeSavedInfo, 18> VRegs;
2019 
2020   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2021     unsigned Reg = CSI[i].getReg();
2022     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
2023             (Reg != PPC::X2 && Reg != PPC::R2)) &&
2024            "Not expecting to try to spill R2 in a function that must save TOC");
2025     if (PPC::GPRCRegClass.contains(Reg)) {
2026       HasGPSaveArea = true;
2027 
2028       GPRegs.push_back(CSI[i]);
2029 
2030       if (Reg < MinGPR) {
2031         MinGPR = Reg;
2032       }
2033     } else if (PPC::G8RCRegClass.contains(Reg)) {
2034       HasG8SaveArea = true;
2035 
2036       G8Regs.push_back(CSI[i]);
2037 
2038       if (Reg < MinG8R) {
2039         MinG8R = Reg;
2040       }
2041     } else if (PPC::F8RCRegClass.contains(Reg)) {
2042       HasFPSaveArea = true;
2043 
2044       FPRegs.push_back(CSI[i]);
2045 
2046       if (Reg < MinFPR) {
2047         MinFPR = Reg;
2048       }
2049     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
2050                PPC::CRRCRegClass.contains(Reg)) {
2051       ; // do nothing, as we already know whether CRs are spilled
2052     } else if (PPC::VRRCRegClass.contains(Reg) ||
2053                PPC::SPERCRegClass.contains(Reg)) {
2054       // Altivec and SPE are mutually exclusive, but have the same stack
2055       // alignment requirements, so overload the save area for both cases.
2056       HasVRSaveArea = true;
2057 
2058       VRegs.push_back(CSI[i]);
2059 
2060       if (Reg < MinVR) {
2061         MinVR = Reg;
2062       }
2063     } else {
2064       llvm_unreachable("Unknown RegisterClass!");
2065     }
2066   }
2067 
2068   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
2069   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2070 
2071   int64_t LowerBound = 0;
2072 
2073   // Take into account stack space reserved for tail calls.
2074   int TCSPDelta = 0;
2075   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2076       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
2077     LowerBound = TCSPDelta;
2078   }
2079 
2080   // The Floating-point register save area is right below the back chain word
2081   // of the previous stack frame.
2082   if (HasFPSaveArea) {
2083     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
2084       int FI = FPRegs[i].getFrameIdx();
2085 
2086       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2087     }
2088 
2089     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
2090   }
2091 
2092   // Check whether the frame pointer register is allocated. If so, make sure it
2093   // is spilled to the correct offset.
2094   if (needsFP(MF)) {
2095     int FI = PFI->getFramePointerSaveIndex();
2096     assert(FI && "No Frame Pointer Save Slot!");
2097     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2098     // FP is R31/X31, so no need to update MinGPR/MinG8R.
2099     HasGPSaveArea = true;
2100   }
2101 
2102   if (PFI->usesPICBase()) {
2103     int FI = PFI->getPICBasePointerSaveIndex();
2104     assert(FI && "No PIC Base Pointer Save Slot!");
2105     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2106 
2107     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
2108     HasGPSaveArea = true;
2109   }
2110 
2111   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2112   if (RegInfo->hasBasePointer(MF)) {
2113     int FI = PFI->getBasePointerSaveIndex();
2114     assert(FI && "No Base Pointer Save Slot!");
2115     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2116 
2117     Register BP = RegInfo->getBaseRegister(MF);
2118     if (PPC::G8RCRegClass.contains(BP)) {
2119       MinG8R = std::min<unsigned>(MinG8R, BP);
2120       HasG8SaveArea = true;
2121     } else if (PPC::GPRCRegClass.contains(BP)) {
2122       MinGPR = std::min<unsigned>(MinGPR, BP);
2123       HasGPSaveArea = true;
2124     }
2125   }
2126 
2127   // General register save area starts right below the Floating-point
2128   // register save area.
2129   if (HasGPSaveArea || HasG8SaveArea) {
2130     // Move general register save area spill slots down, taking into account
2131     // the size of the Floating-point register save area.
2132     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
2133       if (!GPRegs[i].isSpilledToReg()) {
2134         int FI = GPRegs[i].getFrameIdx();
2135         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2136       }
2137     }
2138 
2139     // Move general register save area spill slots down, taking into account
2140     // the size of the Floating-point register save area.
2141     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2142       if (!G8Regs[i].isSpilledToReg()) {
2143         int FI = G8Regs[i].getFrameIdx();
2144         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2145       }
2146     }
2147 
2148     unsigned MinReg =
2149       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2150                          TRI->getEncodingValue(MinG8R));
2151 
2152     const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2153     LowerBound -= (31 - MinReg + 1) * GPRegSize;
2154   }
2155 
2156   // For 32-bit only, the CR save area is below the general register
2157   // save area.  For 64-bit SVR4, the CR save area is addressed relative
2158   // to the stack pointer and hence does not need an adjustment here.
2159   // Only CR2 (the first nonvolatile spilled) has an associated frame
2160   // index so that we have a single uniform save area.
2161   if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2162     // Adjust the frame index of the CR spill slot.
2163     for (const auto &CSInfo : CSI) {
2164       if (CSInfo.getReg() == PPC::CR2) {
2165         int FI = CSInfo.getFrameIdx();
2166         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2167         break;
2168       }
2169     }
2170 
2171     LowerBound -= 4; // The CR save area is always 4 bytes long.
2172   }
2173 
2174   // Both Altivec and SPE have the same alignment and padding requirements
2175   // within the stack frame.
2176   if (HasVRSaveArea) {
2177     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2178     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2179     // we are using negative number here (the stack grows downward). We should
2180     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2181     // is the alignment size ( n = 16 here) and y is the size after aligning.
2182     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2183     LowerBound &= ~(15);
2184 
2185     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2186       int FI = VRegs[i].getFrameIdx();
2187 
2188       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2189     }
2190   }
2191 
2192   addScavengingSpillSlot(MF, RS);
2193 }
2194 
2195 void
2196 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2197                                          RegScavenger *RS) const {
2198   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2199   // a large stack, which will require scavenging a register to materialize a
2200   // large offset.
2201 
2202   // We need to have a scavenger spill slot for spills if the frame size is
2203   // large. In case there is no free register for large-offset addressing,
2204   // this slot is used for the necessary emergency spill. Also, we need the
2205   // slot for dynamic stack allocations.
2206 
2207   // The scavenger might be invoked if the frame offset does not fit into
2208   // the 16-bit immediate. We don't know the complete frame size here
2209   // because we've not yet computed callee-saved register spills or the
2210   // needed alignment padding.
2211   unsigned StackSize = determineFrameLayout(MF, true);
2212   MachineFrameInfo &MFI = MF.getFrameInfo();
2213   if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
2214       (hasSpills(MF) && !isInt<16>(StackSize))) {
2215     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2216     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2217     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2218     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2219     unsigned Size = TRI.getSpillSize(RC);
2220     Align Alignment = TRI.getSpillAlign(RC);
2221     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
2222 
2223     // Might we have over-aligned allocas?
2224     bool HasAlVars =
2225         MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2226 
2227     // These kinds of spills might need two registers.
2228     if (spillsCR(MF) || HasAlVars)
2229       RS->addScavengingFrameIndex(
2230           MFI.CreateStackObject(Size, Alignment, false));
2231   }
2232 }
2233 
2234 // This function checks if a callee saved gpr can be spilled to a volatile
2235 // vector register. This occurs for leaf functions when the option
2236 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2237 // which were not spilled to vectors, return false so the target independent
2238 // code can handle them by assigning a FrameIdx to a stack slot.
2239 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2240     MachineFunction &MF, const TargetRegisterInfo *TRI,
2241     std::vector<CalleeSavedInfo> &CSI) const {
2242 
2243   if (CSI.empty())
2244     return true; // Early exit if no callee saved registers are modified!
2245 
2246   // Early exit if cannot spill gprs to volatile vector registers.
2247   MachineFrameInfo &MFI = MF.getFrameInfo();
2248   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2249     return false;
2250 
2251   // Build a BitVector of VSRs that can be used for spilling GPRs.
2252   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2253   BitVector BVCalleeSaved(TRI->getNumRegs());
2254   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2255   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2256   for (unsigned i = 0; CSRegs[i]; ++i)
2257     BVCalleeSaved.set(CSRegs[i]);
2258 
2259   for (unsigned Reg : BVAllocatable.set_bits()) {
2260     // Set to 0 if the register is not a volatile VF/F8 register, or if it is
2261     // used in the function.
2262     if (BVCalleeSaved[Reg] ||
2263         (!PPC::F8RCRegClass.contains(Reg) &&
2264          !PPC::VFRCRegClass.contains(Reg)) ||
2265         (MF.getRegInfo().isPhysRegUsed(Reg)))
2266       BVAllocatable.reset(Reg);
2267   }
2268 
2269   bool AllSpilledToReg = true;
2270   for (auto &CS : CSI) {
2271     if (BVAllocatable.none())
2272       return false;
2273 
2274     unsigned Reg = CS.getReg();
2275     if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
2276       AllSpilledToReg = false;
2277       continue;
2278     }
2279 
2280     unsigned VolatileVFReg = BVAllocatable.find_first();
2281     if (VolatileVFReg < BVAllocatable.size()) {
2282       CS.setDstReg(VolatileVFReg);
2283       BVAllocatable.reset(VolatileVFReg);
2284     } else {
2285       AllSpilledToReg = false;
2286     }
2287   }
2288   return AllSpilledToReg;
2289 }
2290 
2291 bool PPCFrameLowering::spillCalleeSavedRegisters(
2292     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2293     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2294 
2295   MachineFunction *MF = MBB.getParent();
2296   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2297   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2298   bool MustSaveTOC = FI->mustSaveTOC();
2299   DebugLoc DL;
2300   bool CRSpilled = false;
2301   MachineInstrBuilder CRMIB;
2302 
2303   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2304     unsigned Reg = CSI[i].getReg();
2305 
2306     // CR2 through CR4 are the nonvolatile CR fields.
2307     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2308 
2309     // Add the callee-saved register as live-in; it's killed at the spill.
2310     // Do not do this for callee-saved registers that are live-in to the
2311     // function because they will already be marked live-in and this will be
2312     // adding it for a second time. It is an error to add the same register
2313     // to the set more than once.
2314     const MachineRegisterInfo &MRI = MF->getRegInfo();
2315     bool IsLiveIn = MRI.isLiveIn(Reg);
2316     if (!IsLiveIn)
2317        MBB.addLiveIn(Reg);
2318 
2319     if (CRSpilled && IsCRField) {
2320       CRMIB.addReg(Reg, RegState::ImplicitKill);
2321       continue;
2322     }
2323 
2324     // The actual spill will happen in the prologue.
2325     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2326       continue;
2327 
2328     // Insert the spill to the stack frame.
2329     if (IsCRField) {
2330       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2331       if (!Subtarget.is32BitELFABI()) {
2332         // The actual spill will happen at the start of the prologue.
2333         FuncInfo->addMustSaveCR(Reg);
2334       } else {
2335         CRSpilled = true;
2336         FuncInfo->setSpillsCR();
2337 
2338         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2339         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2340         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2341                   .addReg(Reg, RegState::ImplicitKill);
2342 
2343         MBB.insert(MI, CRMIB);
2344         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2345                                          .addReg(PPC::R12,
2346                                                  getKillRegState(true)),
2347                                          CSI[i].getFrameIdx()));
2348       }
2349     } else {
2350       if (CSI[i].isSpilledToReg()) {
2351         NumPESpillVSR++;
2352         BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
2353           .addReg(Reg, getKillRegState(true));
2354       } else {
2355         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2356         // Use !IsLiveIn for the kill flag.
2357         // We do not want to kill registers that are live in this function
2358         // before their use because they will become undefined registers.
2359         // Functions without NoUnwind need to preserve the order of elements in
2360         // saved vector registers.
2361         if (Subtarget.needsSwapsForVSXMemOps() &&
2362             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2363           TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2364                                        CSI[i].getFrameIdx(), RC, TRI);
2365         else
2366           TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(),
2367                                   RC, TRI);
2368       }
2369     }
2370   }
2371   return true;
2372 }
2373 
2374 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2375                        bool CR4Spilled, MachineBasicBlock &MBB,
2376                        MachineBasicBlock::iterator MI,
2377                        ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2378 
2379   MachineFunction *MF = MBB.getParent();
2380   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2381   DebugLoc DL;
2382   unsigned MoveReg = PPC::R12;
2383 
2384   // 32-bit:  FP-relative
2385   MBB.insert(MI,
2386              addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2387                                CSI[CSIIndex].getFrameIdx()));
2388 
2389   unsigned RestoreOp = PPC::MTOCRF;
2390   if (CR2Spilled)
2391     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2392                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2393 
2394   if (CR3Spilled)
2395     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2396                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2397 
2398   if (CR4Spilled)
2399     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2400                .addReg(MoveReg, getKillRegState(true)));
2401 }
2402 
2403 MachineBasicBlock::iterator PPCFrameLowering::
2404 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2405                               MachineBasicBlock::iterator I) const {
2406   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2407   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2408       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2409     // Add (actually subtract) back the amount the callee popped on return.
2410     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2411       bool is64Bit = Subtarget.isPPC64();
2412       CalleeAmt *= -1;
2413       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2414       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2415       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2416       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2417       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2418       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2419       const DebugLoc &dl = I->getDebugLoc();
2420 
2421       if (isInt<16>(CalleeAmt)) {
2422         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2423           .addReg(StackReg, RegState::Kill)
2424           .addImm(CalleeAmt);
2425       } else {
2426         MachineBasicBlock::iterator MBBI = I;
2427         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2428           .addImm(CalleeAmt >> 16);
2429         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2430           .addReg(TmpReg, RegState::Kill)
2431           .addImm(CalleeAmt & 0xFFFF);
2432         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2433           .addReg(StackReg, RegState::Kill)
2434           .addReg(TmpReg);
2435       }
2436     }
2437   }
2438   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2439   return MBB.erase(I);
2440 }
2441 
2442 static bool isCalleeSavedCR(unsigned Reg) {
2443   return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2444 }
2445 
2446 bool PPCFrameLowering::restoreCalleeSavedRegisters(
2447     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2448     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2449   MachineFunction *MF = MBB.getParent();
2450   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2451   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2452   bool MustSaveTOC = FI->mustSaveTOC();
2453   bool CR2Spilled = false;
2454   bool CR3Spilled = false;
2455   bool CR4Spilled = false;
2456   unsigned CSIIndex = 0;
2457 
2458   // Initialize insertion-point logic; we will be restoring in reverse
2459   // order of spill.
2460   MachineBasicBlock::iterator I = MI, BeforeI = I;
2461   bool AtStart = I == MBB.begin();
2462 
2463   if (!AtStart)
2464     --BeforeI;
2465 
2466   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2467     unsigned Reg = CSI[i].getReg();
2468 
2469     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2470       continue;
2471 
2472     // Restore of callee saved condition register field is handled during
2473     // epilogue insertion.
2474     if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2475       continue;
2476 
2477     if (Reg == PPC::CR2) {
2478       CR2Spilled = true;
2479       // The spill slot is associated only with CR2, which is the
2480       // first nonvolatile spilled.  Save it here.
2481       CSIIndex = i;
2482       continue;
2483     } else if (Reg == PPC::CR3) {
2484       CR3Spilled = true;
2485       continue;
2486     } else if (Reg == PPC::CR4) {
2487       CR4Spilled = true;
2488       continue;
2489     } else {
2490       // On 32-bit ELF when we first encounter a non-CR register after seeing at
2491       // least one CR register, restore all spilled CRs together.
2492       if (CR2Spilled || CR3Spilled || CR4Spilled) {
2493         bool is31 = needsFP(*MF);
2494         restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2495                    CSIIndex);
2496         CR2Spilled = CR3Spilled = CR4Spilled = false;
2497       }
2498 
2499       if (CSI[i].isSpilledToReg()) {
2500         DebugLoc DL;
2501         NumPEReloadVSR++;
2502         BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
2503             .addReg(CSI[i].getDstReg(), getKillRegState(true));
2504       } else {
2505        // Default behavior for non-CR saves.
2506         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2507 
2508         // Functions without NoUnwind need to preserve the order of elements in
2509         // saved vector registers.
2510         if (Subtarget.needsSwapsForVSXMemOps() &&
2511             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2512           TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2513                                         TRI);
2514         else
2515           TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2516 
2517         assert(I != MBB.begin() &&
2518                "loadRegFromStackSlot didn't insert any code!");
2519       }
2520     }
2521 
2522     // Insert in reverse order.
2523     if (AtStart)
2524       I = MBB.begin();
2525     else {
2526       I = BeforeI;
2527       ++I;
2528     }
2529   }
2530 
2531   // If we haven't yet spilled the CRs, do so now.
2532   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2533     assert(Subtarget.is32BitELFABI() &&
2534            "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2535     bool is31 = needsFP(*MF);
2536     restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2537   }
2538 
2539   return true;
2540 }
2541 
2542 unsigned PPCFrameLowering::getTOCSaveOffset() const {
2543   return TOCSaveOffset;
2544 }
2545 
2546 unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
2547   return FramePointerSaveOffset;
2548 }
2549 
2550 unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
2551   return BasePointerSaveOffset;
2552 }
2553 
2554 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2555   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2556     return false;
2557   return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2558           MF.getSubtarget<PPCSubtarget>().isPPC64());
2559 }
2560