1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCFrameLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
15 #include "PPCInstrBuilder.h"
16 #include "PPCInstrInfo.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCSubtarget.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/LivePhysRegs.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineModuleInfo.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/RegisterScavenging.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/Target/TargetOptions.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "framelowering"
34 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
35 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
36 STATISTIC(NumPrologProbed, "Number of prologues probed");
37 
38 static cl::opt<bool>
39 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
40                      cl::desc("Enable spills in prologue to vector registers."),
41                      cl::init(false), cl::Hidden);
42 
43 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
44   if (STI.isAIXABI())
45     return STI.isPPC64() ? 16 : 8;
46   // SVR4 ABI:
47   return STI.isPPC64() ? 16 : 4;
48 }
49 
50 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
51   if (STI.isAIXABI())
52     return STI.isPPC64() ? 40 : 20;
53   return STI.isELFv2ABI() ? 24 : 40;
54 }
55 
56 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
57   // First slot in the general register save area.
58   return STI.isPPC64() ? -8U : -4U;
59 }
60 
61 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
62   if (STI.isAIXABI() || STI.isPPC64())
63     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
64 
65   // 32-bit SVR4 ABI:
66   return 8;
67 }
68 
69 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
70   // Third slot in the general purpose register save area.
71   if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
72     return -12U;
73 
74   // Second slot in the general purpose register save area.
75   return STI.isPPC64() ? -16U : -8U;
76 }
77 
78 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
79   return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
80 }
81 
82 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
83     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
84                           STI.getPlatformStackAlignment(), 0),
85       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
86       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
87       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
88       LinkageSize(computeLinkageSize(Subtarget)),
89       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
90       CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
91 
92 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
93 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
94     unsigned &NumEntries) const {
95 
96 // Floating-point register save area offsets.
97 #define CALLEE_SAVED_FPRS \
98       {PPC::F31, -8},     \
99       {PPC::F30, -16},    \
100       {PPC::F29, -24},    \
101       {PPC::F28, -32},    \
102       {PPC::F27, -40},    \
103       {PPC::F26, -48},    \
104       {PPC::F25, -56},    \
105       {PPC::F24, -64},    \
106       {PPC::F23, -72},    \
107       {PPC::F22, -80},    \
108       {PPC::F21, -88},    \
109       {PPC::F20, -96},    \
110       {PPC::F19, -104},   \
111       {PPC::F18, -112},   \
112       {PPC::F17, -120},   \
113       {PPC::F16, -128},   \
114       {PPC::F15, -136},   \
115       {PPC::F14, -144}
116 
117 // 32-bit general purpose register save area offsets shared by ELF and
118 // AIX. AIX has an extra CSR with r13.
119 #define CALLEE_SAVED_GPRS32 \
120       {PPC::R31, -4},       \
121       {PPC::R30, -8},       \
122       {PPC::R29, -12},      \
123       {PPC::R28, -16},      \
124       {PPC::R27, -20},      \
125       {PPC::R26, -24},      \
126       {PPC::R25, -28},      \
127       {PPC::R24, -32},      \
128       {PPC::R23, -36},      \
129       {PPC::R22, -40},      \
130       {PPC::R21, -44},      \
131       {PPC::R20, -48},      \
132       {PPC::R19, -52},      \
133       {PPC::R18, -56},      \
134       {PPC::R17, -60},      \
135       {PPC::R16, -64},      \
136       {PPC::R15, -68},      \
137       {PPC::R14, -72}
138 
139 // 64-bit general purpose register save area offsets.
140 #define CALLEE_SAVED_GPRS64 \
141       {PPC::X31, -8},       \
142       {PPC::X30, -16},      \
143       {PPC::X29, -24},      \
144       {PPC::X28, -32},      \
145       {PPC::X27, -40},      \
146       {PPC::X26, -48},      \
147       {PPC::X25, -56},      \
148       {PPC::X24, -64},      \
149       {PPC::X23, -72},      \
150       {PPC::X22, -80},      \
151       {PPC::X21, -88},      \
152       {PPC::X20, -96},      \
153       {PPC::X19, -104},     \
154       {PPC::X18, -112},     \
155       {PPC::X17, -120},     \
156       {PPC::X16, -128},     \
157       {PPC::X15, -136},     \
158       {PPC::X14, -144}
159 
160 // Vector register save area offsets.
161 #define CALLEE_SAVED_VRS \
162       {PPC::V31, -16},   \
163       {PPC::V30, -32},   \
164       {PPC::V29, -48},   \
165       {PPC::V28, -64},   \
166       {PPC::V27, -80},   \
167       {PPC::V26, -96},   \
168       {PPC::V25, -112},  \
169       {PPC::V24, -128},  \
170       {PPC::V23, -144},  \
171       {PPC::V22, -160},  \
172       {PPC::V21, -176},  \
173       {PPC::V20, -192}
174 
175   // Note that the offsets here overlap, but this is fixed up in
176   // processFunctionBeforeFrameFinalized.
177 
178   static const SpillSlot ELFOffsets32[] = {
179       CALLEE_SAVED_FPRS,
180       CALLEE_SAVED_GPRS32,
181 
182       // CR save area offset.  We map each of the nonvolatile CR fields
183       // to the slot for CR2, which is the first of the nonvolatile CR
184       // fields to be assigned, so that we only allocate one save slot.
185       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
186       {PPC::CR2, -4},
187 
188       // VRSAVE save area offset.
189       {PPC::VRSAVE, -4},
190 
191       CALLEE_SAVED_VRS,
192 
193       // SPE register save area (overlaps Vector save area).
194       {PPC::S31, -8},
195       {PPC::S30, -16},
196       {PPC::S29, -24},
197       {PPC::S28, -32},
198       {PPC::S27, -40},
199       {PPC::S26, -48},
200       {PPC::S25, -56},
201       {PPC::S24, -64},
202       {PPC::S23, -72},
203       {PPC::S22, -80},
204       {PPC::S21, -88},
205       {PPC::S20, -96},
206       {PPC::S19, -104},
207       {PPC::S18, -112},
208       {PPC::S17, -120},
209       {PPC::S16, -128},
210       {PPC::S15, -136},
211       {PPC::S14, -144}};
212 
213   static const SpillSlot ELFOffsets64[] = {
214       CALLEE_SAVED_FPRS,
215       CALLEE_SAVED_GPRS64,
216 
217       // VRSAVE save area offset.
218       {PPC::VRSAVE, -4},
219       CALLEE_SAVED_VRS
220   };
221 
222   static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS,
223                                            CALLEE_SAVED_GPRS32,
224                                            // Add AIX's extra CSR.
225                                            {PPC::R13, -76},
226                                            CALLEE_SAVED_VRS};
227 
228   static const SpillSlot AIXOffsets64[] = {
229       CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS};
230 
231   if (Subtarget.is64BitELFABI()) {
232     NumEntries = std::size(ELFOffsets64);
233     return ELFOffsets64;
234   }
235 
236   if (Subtarget.is32BitELFABI()) {
237     NumEntries = std::size(ELFOffsets32);
238     return ELFOffsets32;
239   }
240 
241   assert(Subtarget.isAIXABI() && "Unexpected ABI.");
242 
243   if (Subtarget.isPPC64()) {
244     NumEntries = std::size(AIXOffsets64);
245     return AIXOffsets64;
246   }
247 
248   NumEntries = std::size(AIXOffsets32);
249   return AIXOffsets32;
250 }
251 
252 static bool spillsCR(const MachineFunction &MF) {
253   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
254   return FuncInfo->isCRSpilled();
255 }
256 
257 static bool hasSpills(const MachineFunction &MF) {
258   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
259   return FuncInfo->hasSpills();
260 }
261 
262 static bool hasNonRISpills(const MachineFunction &MF) {
263   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
264   return FuncInfo->hasNonRISpills();
265 }
266 
267 /// MustSaveLR - Return true if this function requires that we save the LR
268 /// register onto the stack in the prolog and restore it in the epilog of the
269 /// function.
270 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
271   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
272 
273   // We need a save/restore of LR if there is any def of LR (which is
274   // defined by calls, including the PIC setup sequence), or if there is
275   // some use of the LR stack slot (e.g. for builtin_return_address).
276   // (LR comes in 32 and 64 bit versions.)
277   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
278   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
279 }
280 
281 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
282 /// call frame size. Update the MachineFunction object with the stack size.
283 uint64_t
284 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
285                                                 bool UseEstimate) const {
286   unsigned NewMaxCallFrameSize = 0;
287   uint64_t FrameSize = determineFrameLayout(MF, UseEstimate,
288                                             &NewMaxCallFrameSize);
289   MF.getFrameInfo().setStackSize(FrameSize);
290   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
291   return FrameSize;
292 }
293 
294 /// determineFrameLayout - Determine the size of the frame and maximum call
295 /// frame size.
296 uint64_t
297 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
298                                        bool UseEstimate,
299                                        unsigned *NewMaxCallFrameSize) const {
300   const MachineFrameInfo &MFI = MF.getFrameInfo();
301   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
302 
303   // Get the number of bytes to allocate from the FrameInfo
304   uint64_t FrameSize =
305     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
306 
307   // Get stack alignments. The frame must be aligned to the greatest of these:
308   Align TargetAlign = getStackAlign(); // alignment required per the ABI
309   Align MaxAlign = MFI.getMaxAlign();  // algmt required by data in frame
310   Align Alignment = std::max(TargetAlign, MaxAlign);
311 
312   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
313 
314   unsigned LR = RegInfo->getRARegister();
315   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
316   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
317                        !MFI.adjustsStack() &&       // No calls.
318                        !MustSaveLR(MF, LR) &&       // No need to save LR.
319                        !FI->mustSaveTOC() &&        // No need to save TOC.
320                        !RegInfo->hasBasePointer(MF); // No special alignment.
321 
322   // Note: for PPC32 SVR4ABI, we can still generate stackless
323   // code if all local vars are reg-allocated.
324   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
325 
326   // Check whether we can skip adjusting the stack pointer (by using red zone)
327   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
328     // No need for frame
329     return 0;
330   }
331 
332   // Get the maximum call frame size of all the calls.
333   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
334 
335   // Maximum call frame needs to be at least big enough for linkage area.
336   unsigned minCallFrameSize = getLinkageSize();
337   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
338 
339   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
340   // that allocations will be aligned.
341   if (MFI.hasVarSizedObjects())
342     maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
343 
344   // Update the new max call frame size if the caller passes in a valid pointer.
345   if (NewMaxCallFrameSize)
346     *NewMaxCallFrameSize = maxCallFrameSize;
347 
348   // Include call frame size in total.
349   FrameSize += maxCallFrameSize;
350 
351   // Make sure the frame is aligned.
352   FrameSize = alignTo(FrameSize, Alignment);
353 
354   return FrameSize;
355 }
356 
357 // hasFP - Return true if the specified function actually has a dedicated frame
358 // pointer register.
359 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
360   const MachineFrameInfo &MFI = MF.getFrameInfo();
361   // FIXME: This is pretty much broken by design: hasFP() might be called really
362   // early, before the stack layout was calculated and thus hasFP() might return
363   // true or false here depending on the time of call.
364   return (MFI.getStackSize()) && needsFP(MF);
365 }
366 
367 // needsFP - Return true if the specified function should have a dedicated frame
368 // pointer register.  This is true if the function has variable sized allocas or
369 // if frame pointer elimination is disabled.
370 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
371   const MachineFrameInfo &MFI = MF.getFrameInfo();
372 
373   // Naked functions have no stack frame pushed, so we don't have a frame
374   // pointer.
375   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
376     return false;
377 
378   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
379          MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
380          MF.exposesReturnsTwice() ||
381          (MF.getTarget().Options.GuaranteedTailCallOpt &&
382           MF.getInfo<PPCFunctionInfo>()->hasFastCall());
383 }
384 
385 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
386   bool is31 = needsFP(MF);
387   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
388   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
389 
390   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
391   bool HasBP = RegInfo->hasBasePointer(MF);
392   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
393   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
394 
395   for (MachineBasicBlock &MBB : MF)
396     for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) {
397       --MBBI;
398       for (MachineOperand &MO : MBBI->operands()) {
399         if (!MO.isReg())
400           continue;
401 
402         switch (MO.getReg()) {
403         case PPC::FP:
404           MO.setReg(FPReg);
405           break;
406         case PPC::FP8:
407           MO.setReg(FP8Reg);
408           break;
409         case PPC::BP:
410           MO.setReg(BPReg);
411           break;
412         case PPC::BP8:
413           MO.setReg(BP8Reg);
414           break;
415 
416         }
417       }
418     }
419 }
420 
421 /*  This function will do the following:
422     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
423       respectively (defaults recommended by the ABI) and return true
424     - If MBB is not an entry block, initialize the register scavenger and look
425       for available registers.
426     - If the defaults (R0/R12) are available, return true
427     - If TwoUniqueRegsRequired is set to true, it looks for two unique
428       registers. Otherwise, look for a single available register.
429       - If the required registers are found, set SR1 and SR2 and return true.
430       - If the required registers are not found, set SR2 or both SR1 and SR2 to
431         PPC::NoRegister and return false.
432 
433     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
434     is not set, this function will attempt to find two different registers, but
435     still return true if only one register is available (and set SR1 == SR2).
436 */
437 bool
438 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
439                                       bool UseAtEnd,
440                                       bool TwoUniqueRegsRequired,
441                                       Register *SR1,
442                                       Register *SR2) const {
443   RegScavenger RS;
444   Register R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
445   Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
446 
447   // Set the defaults for the two scratch registers.
448   if (SR1)
449     *SR1 = R0;
450 
451   if (SR2) {
452     assert (SR1 && "Asking for the second scratch register but not the first?");
453     *SR2 = R12;
454   }
455 
456   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
457   if ((UseAtEnd && MBB->isReturnBlock()) ||
458       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
459     return true;
460 
461   if (UseAtEnd) {
462     // The scratch register will be used before the first terminator (or at the
463     // end of the block if there are no terminators).
464     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
465     if (MBBI == MBB->begin()) {
466       RS.enterBasicBlock(*MBB);
467     } else {
468       RS.enterBasicBlockEnd(*MBB);
469       RS.backward(MBBI);
470     }
471   } else {
472     // The scratch register will be used at the start of the block.
473     RS.enterBasicBlock(*MBB);
474   }
475 
476   // If the two registers are available, we're all good.
477   // Note that we only return here if both R0 and R12 are available because
478   // although the function may not require two unique registers, it may benefit
479   // from having two so we should try to provide them.
480   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
481     return true;
482 
483   // Get the list of callee-saved registers for the target.
484   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
485   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
486 
487   // Get all the available registers in the block.
488   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
489                                      &PPC::GPRCRegClass);
490 
491   // We shouldn't use callee-saved registers as scratch registers as they may be
492   // available when looking for a candidate block for shrink wrapping but not
493   // available when the actual prologue/epilogue is being emitted because they
494   // were added as live-in to the prologue block by PrologueEpilogueInserter.
495   for (int i = 0; CSRegs[i]; ++i)
496     BV.reset(CSRegs[i]);
497 
498   // Set the first scratch register to the first available one.
499   if (SR1) {
500     int FirstScratchReg = BV.find_first();
501     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
502   }
503 
504   // If there is another one available, set the second scratch register to that.
505   // Otherwise, set it to either PPC::NoRegister if this function requires two
506   // or to whatever SR1 is set to if this function doesn't require two.
507   if (SR2) {
508     int SecondScratchReg = BV.find_next(*SR1);
509     if (SecondScratchReg != -1)
510       *SR2 = SecondScratchReg;
511     else
512       *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
513   }
514 
515   // Now that we've done our best to provide both registers, double check
516   // whether we were unable to provide enough.
517   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
518     return false;
519 
520   return true;
521 }
522 
523 // We need a scratch register for spilling LR and for spilling CR. By default,
524 // we use two scratch registers to hide latency. However, if only one scratch
525 // register is available, we can adjust for that by not overlapping the spill
526 // code. However, if we need to realign the stack (i.e. have a base pointer)
527 // and the stack frame is large, we need two scratch registers.
528 // Also, stack probe requires two scratch registers, one for old sp, one for
529 // large frame and large probe size.
530 bool
531 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
532   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
533   MachineFunction &MF = *(MBB->getParent());
534   bool HasBP = RegInfo->hasBasePointer(MF);
535   unsigned FrameSize = determineFrameLayout(MF);
536   int NegFrameSize = -FrameSize;
537   bool IsLargeFrame = !isInt<16>(NegFrameSize);
538   MachineFrameInfo &MFI = MF.getFrameInfo();
539   Align MaxAlign = MFI.getMaxAlign();
540   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
541   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
542 
543   return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) ||
544          TLI.hasInlineStackProbe(MF);
545 }
546 
547 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
548   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
549 
550   return findScratchRegister(TmpMBB, false,
551                              twoUniqueScratchRegsRequired(TmpMBB));
552 }
553 
554 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
555   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
556 
557   return findScratchRegister(TmpMBB, true);
558 }
559 
560 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
561   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
562   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
563 
564   // Abort if there is no register info or function info.
565   if (!RegInfo || !FI)
566     return false;
567 
568   // Only move the stack update on ELFv2 ABI and PPC64.
569   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
570     return false;
571 
572   // Check the frame size first and return false if it does not fit the
573   // requirements.
574   // We need a non-zero frame size as well as a frame that will fit in the red
575   // zone. This is because by moving the stack pointer update we are now storing
576   // to the red zone until the stack pointer is updated. If we get an interrupt
577   // inside the prologue but before the stack update we now have a number of
578   // stores to the red zone and those stores must all fit.
579   MachineFrameInfo &MFI = MF.getFrameInfo();
580   unsigned FrameSize = MFI.getStackSize();
581   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
582     return false;
583 
584   // Frame pointers and base pointers complicate matters so don't do anything
585   // if we have them. For example having a frame pointer will sometimes require
586   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
587   // difficult. Similar situation exists with setjmp.
588   if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice())
589     return false;
590 
591   // Calls to fast_cc functions use different rules for passing parameters on
592   // the stack from the ABI and using PIC base in the function imposes
593   // similar restrictions to using the base pointer. It is not generally safe
594   // to move the stack pointer update in these situations.
595   if (FI->hasFastCall() || FI->usesPICBase())
596     return false;
597 
598   // Finally we can move the stack update if we do not require register
599   // scavenging. Register scavenging can introduce more spills and so
600   // may make the frame size larger than we have computed.
601   return !RegInfo->requiresFrameIndexScavenging(MF);
602 }
603 
604 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
605                                     MachineBasicBlock &MBB) const {
606   MachineBasicBlock::iterator MBBI = MBB.begin();
607   MachineFrameInfo &MFI = MF.getFrameInfo();
608   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
609   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
610   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
611 
612   MachineModuleInfo &MMI = MF.getMMI();
613   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
614   DebugLoc dl;
615   // AIX assembler does not support cfi directives.
616   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
617 
618   const bool HasFastMFLR = Subtarget.hasFastMFLR();
619 
620   // Get processor type.
621   bool isPPC64 = Subtarget.isPPC64();
622   // Get the ABI.
623   bool isSVR4ABI = Subtarget.isSVR4ABI();
624   bool isELFv2ABI = Subtarget.isELFv2ABI();
625   assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.");
626 
627   // Work out frame sizes.
628   uint64_t FrameSize = determineFrameLayoutAndUpdate(MF);
629   int64_t NegFrameSize = -FrameSize;
630   if (!isPPC64 && (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize)))
631     llvm_unreachable("Unhandled stack size!");
632 
633   if (MFI.isFrameAddressTaken())
634     replaceFPWithRealFP(MF);
635 
636   // Check if the link register (LR) must be saved.
637   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
638   bool MustSaveLR = FI->mustSaveLR();
639   bool MustSaveTOC = FI->mustSaveTOC();
640   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
641   bool MustSaveCR = !MustSaveCRs.empty();
642   // Do we have a frame pointer and/or base pointer for this function?
643   bool HasFP = hasFP(MF);
644   bool HasBP = RegInfo->hasBasePointer(MF);
645   bool HasRedZone = isPPC64 || !isSVR4ABI;
646   bool HasROPProtect = Subtarget.hasROPProtect();
647   bool HasPrivileged = Subtarget.hasPrivileged();
648 
649   Register SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
650   Register BPReg = RegInfo->getBaseRegister(MF);
651   Register FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
652   Register LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
653   Register TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
654   Register ScratchReg;
655   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
656   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
657   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
658                                                 : PPC::MFLR );
659   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
660                                                  : PPC::STW );
661   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
662                                                      : PPC::STWU );
663   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
664                                                         : PPC::STWUX);
665   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
666                                               : PPC::OR );
667   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
668                                                             : PPC::SUBFC);
669   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
670                                                                : PPC::SUBFIC);
671   const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
672                                                            : PPC::MFCR);
673   const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
674   const MCInstrDesc &HashST =
675       TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHSTP8 : PPC::HASHST8)
676                       : (HasPrivileged ? PPC::HASHSTP : PPC::HASHST));
677 
678   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
679   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
680   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
681   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
682   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
683          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
684 
685   // Using the same bool variable as below to suppress compiler warnings.
686   bool SingleScratchReg = findScratchRegister(
687       &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg);
688   assert(SingleScratchReg &&
689          "Required number of registers not available in this block");
690 
691   SingleScratchReg = ScratchReg == TempReg;
692 
693   int64_t LROffset = getReturnSaveOffset();
694 
695   int64_t FPOffset = 0;
696   if (HasFP) {
697     MachineFrameInfo &MFI = MF.getFrameInfo();
698     int FPIndex = FI->getFramePointerSaveIndex();
699     assert(FPIndex && "No Frame Pointer Save Slot!");
700     FPOffset = MFI.getObjectOffset(FPIndex);
701   }
702 
703   int64_t BPOffset = 0;
704   if (HasBP) {
705     MachineFrameInfo &MFI = MF.getFrameInfo();
706     int BPIndex = FI->getBasePointerSaveIndex();
707     assert(BPIndex && "No Base Pointer Save Slot!");
708     BPOffset = MFI.getObjectOffset(BPIndex);
709   }
710 
711   int64_t PBPOffset = 0;
712   if (FI->usesPICBase()) {
713     MachineFrameInfo &MFI = MF.getFrameInfo();
714     int PBPIndex = FI->getPICBasePointerSaveIndex();
715     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
716     PBPOffset = MFI.getObjectOffset(PBPIndex);
717   }
718 
719   // Get stack alignments.
720   Align MaxAlign = MFI.getMaxAlign();
721   if (HasBP && MaxAlign > 1)
722     assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
723 
724   // Frames of 32KB & larger require special handling because they cannot be
725   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
726   bool isLargeFrame = !isInt<16>(NegFrameSize);
727 
728   // Check if we can move the stack update instruction (stdu) down the prologue
729   // past the callee saves. Hopefully this will avoid the situation where the
730   // saves are waiting for the update on the store with update to complete.
731   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
732   bool MovingStackUpdateDown = false;
733 
734   // Check if we can move the stack update.
735   if (stackUpdateCanBeMoved(MF)) {
736     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
737     for (CalleeSavedInfo CSI : Info) {
738       // If the callee saved register is spilled to a register instead of the
739       // stack then the spill no longer uses the stack pointer.
740       // This can lead to two consequences:
741       // 1) We no longer need to update the stack because the function does not
742       //    spill any callee saved registers to stack.
743       // 2) We have a situation where we still have to update the stack pointer
744       //    even though some registers are spilled to other registers. In
745       //    this case the current code moves the stack update to an incorrect
746       //    position.
747       // In either case we should abort moving the stack update operation.
748       if (CSI.isSpilledToReg()) {
749         StackUpdateLoc = MBBI;
750         MovingStackUpdateDown = false;
751         break;
752       }
753 
754       int FrIdx = CSI.getFrameIdx();
755       // If the frame index is not negative the callee saved info belongs to a
756       // stack object that is not a fixed stack object. We ignore non-fixed
757       // stack objects because we won't move the stack update pointer past them.
758       if (FrIdx >= 0)
759         continue;
760 
761       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
762         StackUpdateLoc++;
763         MovingStackUpdateDown = true;
764       } else {
765         // We need all of the Frame Indices to meet these conditions.
766         // If they do not, abort the whole operation.
767         StackUpdateLoc = MBBI;
768         MovingStackUpdateDown = false;
769         break;
770       }
771     }
772 
773     // If the operation was not aborted then update the object offset.
774     if (MovingStackUpdateDown) {
775       for (CalleeSavedInfo CSI : Info) {
776         int FrIdx = CSI.getFrameIdx();
777         if (FrIdx < 0)
778           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
779       }
780     }
781   }
782 
783   // Where in the prologue we move the CR fields depends on how many scratch
784   // registers we have, and if we need to save the link register or not. This
785   // lambda is to avoid duplicating the logic in 2 places.
786   auto BuildMoveFromCR = [&]() {
787     if (isELFv2ABI && MustSaveCRs.size() == 1) {
788     // In the ELFv2 ABI, we are not required to save all CR fields.
789     // If only one CR field is clobbered, it is more efficient to use
790     // mfocrf to selectively save just that field, because mfocrf has short
791     // latency compares to mfcr.
792       assert(isPPC64 && "V2 ABI is 64-bit only.");
793       MachineInstrBuilder MIB =
794           BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
795       MIB.addReg(MustSaveCRs[0], RegState::Kill);
796     } else {
797       MachineInstrBuilder MIB =
798           BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
799       for (unsigned CRfield : MustSaveCRs)
800         MIB.addReg(CRfield, RegState::ImplicitKill);
801     }
802   };
803 
804   // If we need to spill the CR and the LR but we don't have two separate
805   // registers available, we must spill them one at a time
806   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
807     BuildMoveFromCR();
808     BuildMI(MBB, MBBI, dl, StoreWordInst)
809         .addReg(TempReg, getKillRegState(true))
810         .addImm(CRSaveOffset)
811         .addReg(SPReg);
812   }
813 
814   if (MustSaveLR)
815     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
816 
817   if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
818     BuildMoveFromCR();
819 
820   if (HasRedZone) {
821     if (HasFP)
822       BuildMI(MBB, MBBI, dl, StoreInst)
823         .addReg(FPReg)
824         .addImm(FPOffset)
825         .addReg(SPReg);
826     if (FI->usesPICBase())
827       BuildMI(MBB, MBBI, dl, StoreInst)
828         .addReg(PPC::R30)
829         .addImm(PBPOffset)
830         .addReg(SPReg);
831     if (HasBP)
832       BuildMI(MBB, MBBI, dl, StoreInst)
833         .addReg(BPReg)
834         .addImm(BPOffset)
835         .addReg(SPReg);
836   }
837 
838   // Generate the instruction to store the LR. In the case where ROP protection
839   // is required the register holding the LR should not be killed as it will be
840   // used by the hash store instruction.
841   auto SaveLR = [&](int64_t Offset) {
842     assert(MustSaveLR && "LR is not required to be saved!");
843     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
844         .addReg(ScratchReg, getKillRegState(!HasROPProtect))
845         .addImm(Offset)
846         .addReg(SPReg);
847 
848     // Add the ROP protection Hash Store instruction.
849     // NOTE: This is technically a violation of the ABI. The hash can be saved
850     // up to 512 bytes into the Protected Zone. This can be outside of the
851     // initial 288 byte volatile program storage region in the Protected Zone.
852     // However, this restriction will be removed in an upcoming revision of the
853     // ABI.
854     if (HasROPProtect) {
855       const int SaveIndex = FI->getROPProtectionHashSaveIndex();
856       const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
857       assert((ImmOffset <= -8 && ImmOffset >= -512) &&
858              "ROP hash save offset out of range.");
859       assert(((ImmOffset & 0x7) == 0) &&
860              "ROP hash save offset must be 8 byte aligned.");
861       BuildMI(MBB, StackUpdateLoc, dl, HashST)
862           .addReg(ScratchReg, getKillRegState(true))
863           .addImm(ImmOffset)
864           .addReg(SPReg);
865     }
866   };
867 
868   if (MustSaveLR && HasFastMFLR)
869       SaveLR(LROffset);
870 
871   if (MustSaveCR &&
872       !(SingleScratchReg && MustSaveLR)) {
873     assert(HasRedZone && "A red zone is always available on PPC64");
874     BuildMI(MBB, MBBI, dl, StoreWordInst)
875       .addReg(TempReg, getKillRegState(true))
876       .addImm(CRSaveOffset)
877       .addReg(SPReg);
878   }
879 
880   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
881   if (!FrameSize) {
882     if (MustSaveLR && !HasFastMFLR)
883       SaveLR(LROffset);
884     return;
885   }
886 
887   // Adjust stack pointer: r1 += NegFrameSize.
888   // If there is a preferred stack alignment, align R1 now
889 
890   if (HasBP && HasRedZone) {
891     // Save a copy of r1 as the base pointer.
892     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
893       .addReg(SPReg)
894       .addReg(SPReg);
895   }
896 
897   // Have we generated a STUX instruction to claim stack frame? If so,
898   // the negated frame size will be placed in ScratchReg.
899   bool HasSTUX =
900       (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) ||
901       (HasBP && MaxAlign > 1) || isLargeFrame;
902 
903   // If we use STUX to update the stack pointer, we need the two scratch
904   // registers TempReg and ScratchReg, we have to save LR here which is stored
905   // in ScratchReg.
906   // If the offset can not be encoded into the store instruction, we also have
907   // to save LR here.
908   if (MustSaveLR && !HasFastMFLR &&
909       (HasSTUX || !isInt<16>(FrameSize + LROffset)))
910     SaveLR(LROffset);
911 
912   // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
913   // pointer is always stored at SP, we will get a free probe due to an essential
914   // STU(X) instruction.
915   if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
916     // To be consistent with other targets, a pseudo instruction is emitted and
917     // will be later expanded in `inlineStackProbe`.
918     BuildMI(MBB, MBBI, dl,
919             TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
920                             : PPC::PROBED_STACKALLOC_32))
921         .addDef(TempReg)
922         .addDef(ScratchReg) // ScratchReg stores the old sp.
923         .addImm(NegFrameSize);
924     // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
925     // update the ScratchReg to meet the assumption that ScratchReg contains
926     // the NegFrameSize. This solution is rather tricky.
927     if (!HasRedZone) {
928       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
929           .addReg(ScratchReg)
930           .addReg(SPReg);
931     }
932   } else {
933     // This condition must be kept in sync with canUseAsPrologue.
934     if (HasBP && MaxAlign > 1) {
935       if (isPPC64)
936         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
937             .addReg(SPReg)
938             .addImm(0)
939             .addImm(64 - Log2(MaxAlign));
940       else // PPC32...
941         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
942             .addReg(SPReg)
943             .addImm(0)
944             .addImm(32 - Log2(MaxAlign))
945             .addImm(31);
946       if (!isLargeFrame) {
947         BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
948             .addReg(ScratchReg, RegState::Kill)
949             .addImm(NegFrameSize);
950       } else {
951         assert(!SingleScratchReg && "Only a single scratch reg available");
952         TII.materializeImmPostRA(MBB, MBBI, dl, TempReg, NegFrameSize);
953         BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
954             .addReg(ScratchReg, RegState::Kill)
955             .addReg(TempReg, RegState::Kill);
956       }
957 
958       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
959           .addReg(SPReg, RegState::Kill)
960           .addReg(SPReg)
961           .addReg(ScratchReg);
962     } else if (!isLargeFrame) {
963       BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
964           .addReg(SPReg)
965           .addImm(NegFrameSize)
966           .addReg(SPReg);
967     } else {
968       TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, NegFrameSize);
969       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
970           .addReg(SPReg, RegState::Kill)
971           .addReg(SPReg)
972           .addReg(ScratchReg);
973     }
974   }
975 
976   // Save the TOC register after the stack pointer update if a prologue TOC
977   // save is required for the function.
978   if (MustSaveTOC) {
979     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
980     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
981       .addReg(TOCReg, getKillRegState(true))
982       .addImm(TOCSaveOffset)
983       .addReg(SPReg);
984   }
985 
986   if (!HasRedZone) {
987     assert(!isPPC64 && "A red zone is always available on PPC64");
988     if (HasSTUX) {
989       // The negated frame size is in ScratchReg, and the SPReg has been
990       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
991       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
992       // the stack frame (i.e. the old SP), ideally, we would put the old
993       // SP into a register and use it as the base for the stores. The
994       // problem is that the only available register may be ScratchReg,
995       // which could be R0, and R0 cannot be used as a base address.
996 
997       // First, set ScratchReg to the old SP. This may need to be modified
998       // later.
999       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1000         .addReg(ScratchReg, RegState::Kill)
1001         .addReg(SPReg);
1002 
1003       if (ScratchReg == PPC::R0) {
1004         // R0 cannot be used as a base register, but it can be used as an
1005         // index in a store-indexed.
1006         int LastOffset = 0;
1007         if (HasFP)  {
1008           // R0 += (FPOffset-LastOffset).
1009           // Need addic, since addi treats R0 as 0.
1010           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1011             .addReg(ScratchReg)
1012             .addImm(FPOffset-LastOffset);
1013           LastOffset = FPOffset;
1014           // Store FP into *R0.
1015           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1016             .addReg(FPReg, RegState::Kill)  // Save FP.
1017             .addReg(PPC::ZERO)
1018             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1019         }
1020         if (FI->usesPICBase()) {
1021           // R0 += (PBPOffset-LastOffset).
1022           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1023             .addReg(ScratchReg)
1024             .addImm(PBPOffset-LastOffset);
1025           LastOffset = PBPOffset;
1026           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1027             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1028             .addReg(PPC::ZERO)
1029             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1030         }
1031         if (HasBP) {
1032           // R0 += (BPOffset-LastOffset).
1033           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1034             .addReg(ScratchReg)
1035             .addImm(BPOffset-LastOffset);
1036           LastOffset = BPOffset;
1037           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1038             .addReg(BPReg, RegState::Kill)  // Save BP.
1039             .addReg(PPC::ZERO)
1040             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1041           // BP = R0-LastOffset
1042           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1043             .addReg(ScratchReg, RegState::Kill)
1044             .addImm(-LastOffset);
1045         }
1046       } else {
1047         // ScratchReg is not R0, so use it as the base register. It is
1048         // already set to the old SP, so we can use the offsets directly.
1049 
1050         // Now that the stack frame has been allocated, save all the necessary
1051         // registers using ScratchReg as the base address.
1052         if (HasFP)
1053           BuildMI(MBB, MBBI, dl, StoreInst)
1054             .addReg(FPReg)
1055             .addImm(FPOffset)
1056             .addReg(ScratchReg);
1057         if (FI->usesPICBase())
1058           BuildMI(MBB, MBBI, dl, StoreInst)
1059             .addReg(PPC::R30)
1060             .addImm(PBPOffset)
1061             .addReg(ScratchReg);
1062         if (HasBP) {
1063           BuildMI(MBB, MBBI, dl, StoreInst)
1064             .addReg(BPReg)
1065             .addImm(BPOffset)
1066             .addReg(ScratchReg);
1067           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1068             .addReg(ScratchReg, RegState::Kill)
1069             .addReg(ScratchReg);
1070         }
1071       }
1072     } else {
1073       // The frame size is a known 16-bit constant (fitting in the immediate
1074       // field of STWU). To be here we have to be compiling for PPC32.
1075       // Since the SPReg has been decreased by FrameSize, add it back to each
1076       // offset.
1077       if (HasFP)
1078         BuildMI(MBB, MBBI, dl, StoreInst)
1079           .addReg(FPReg)
1080           .addImm(FrameSize + FPOffset)
1081           .addReg(SPReg);
1082       if (FI->usesPICBase())
1083         BuildMI(MBB, MBBI, dl, StoreInst)
1084           .addReg(PPC::R30)
1085           .addImm(FrameSize + PBPOffset)
1086           .addReg(SPReg);
1087       if (HasBP) {
1088         BuildMI(MBB, MBBI, dl, StoreInst)
1089           .addReg(BPReg)
1090           .addImm(FrameSize + BPOffset)
1091           .addReg(SPReg);
1092         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1093           .addReg(SPReg)
1094           .addImm(FrameSize);
1095       }
1096     }
1097   }
1098 
1099   // Save the LR now.
1100   if (!HasSTUX && MustSaveLR && !HasFastMFLR && isInt<16>(FrameSize + LROffset))
1101     SaveLR(LROffset + FrameSize);
1102 
1103   // Add Call Frame Information for the instructions we generated above.
1104   if (needsCFI) {
1105     unsigned CFIIndex;
1106 
1107     if (HasBP) {
1108       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1109       // because if the stack needed aligning then CFA won't be at a fixed
1110       // offset from FP/SP.
1111       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1112       CFIIndex = MF.addFrameInst(
1113           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1114     } else {
1115       // Adjust the definition of CFA to account for the change in SP.
1116       assert(NegFrameSize);
1117       CFIIndex = MF.addFrameInst(
1118           MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1119     }
1120     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1121         .addCFIIndex(CFIIndex);
1122 
1123     if (HasFP) {
1124       // Describe where FP was saved, at a fixed offset from CFA.
1125       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1126       CFIIndex = MF.addFrameInst(
1127           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1128       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1129           .addCFIIndex(CFIIndex);
1130     }
1131 
1132     if (FI->usesPICBase()) {
1133       // Describe where FP was saved, at a fixed offset from CFA.
1134       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1135       CFIIndex = MF.addFrameInst(
1136           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1137       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1138           .addCFIIndex(CFIIndex);
1139     }
1140 
1141     if (HasBP) {
1142       // Describe where BP was saved, at a fixed offset from CFA.
1143       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1144       CFIIndex = MF.addFrameInst(
1145           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1146       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1147           .addCFIIndex(CFIIndex);
1148     }
1149 
1150     if (MustSaveLR) {
1151       // Describe where LR was saved, at a fixed offset from CFA.
1152       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1153       CFIIndex = MF.addFrameInst(
1154           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1155       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1156           .addCFIIndex(CFIIndex);
1157     }
1158   }
1159 
1160   // If there is a frame pointer, copy R1 into R31
1161   if (HasFP) {
1162     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1163       .addReg(SPReg)
1164       .addReg(SPReg);
1165 
1166     if (!HasBP && needsCFI) {
1167       // Change the definition of CFA from SP+offset to FP+offset, because SP
1168       // will change at every alloca.
1169       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1170       unsigned CFIIndex = MF.addFrameInst(
1171           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1172 
1173       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1174           .addCFIIndex(CFIIndex);
1175     }
1176   }
1177 
1178   if (needsCFI) {
1179     // Describe where callee saved registers were saved, at fixed offsets from
1180     // CFA.
1181     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1182     for (const CalleeSavedInfo &I : CSI) {
1183       Register Reg = I.getReg();
1184       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1185 
1186       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1187       // subregisters of CR2. We just need to emit a move of CR2.
1188       if (PPC::CRBITRCRegClass.contains(Reg))
1189         continue;
1190 
1191       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1192         continue;
1193 
1194       // For SVR4, don't emit a move for the CR spill slot if we haven't
1195       // spilled CRs.
1196       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1197           && !MustSaveCR)
1198         continue;
1199 
1200       // For 64-bit SVR4 when we have spilled CRs, the spill location
1201       // is SP+8, not a frame-relative slot.
1202       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1203         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1204         // the whole CR word.  In the ELFv2 ABI, every CR that was
1205         // actually saved gets its own CFI record.
1206         Register CRReg = isELFv2ABI? Reg : PPC::CR2;
1207         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1208             nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1209         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1210             .addCFIIndex(CFIIndex);
1211         continue;
1212       }
1213 
1214       if (I.isSpilledToReg()) {
1215         unsigned SpilledReg = I.getDstReg();
1216         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1217             nullptr, MRI->getDwarfRegNum(Reg, true),
1218             MRI->getDwarfRegNum(SpilledReg, true)));
1219         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1220           .addCFIIndex(CFIRegister);
1221       } else {
1222         int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
1223         // We have changed the object offset above but we do not want to change
1224         // the actual offsets in the CFI instruction so we have to undo the
1225         // offset change here.
1226         if (MovingStackUpdateDown)
1227           Offset -= NegFrameSize;
1228 
1229         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1230             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1231         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1232             .addCFIIndex(CFIIndex);
1233       }
1234     }
1235   }
1236 }
1237 
1238 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
1239                                         MachineBasicBlock &PrologMBB) const {
1240   bool isPPC64 = Subtarget.isPPC64();
1241   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
1242   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1243   MachineFrameInfo &MFI = MF.getFrameInfo();
1244   MachineModuleInfo &MMI = MF.getMMI();
1245   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
1246   // AIX assembler does not support cfi directives.
1247   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
1248   auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
1249     int Opc = MI.getOpcode();
1250     return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
1251   });
1252   if (StackAllocMIPos == PrologMBB.end())
1253     return;
1254   const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
1255   MachineBasicBlock *CurrentMBB = &PrologMBB;
1256   DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
1257   MachineInstr &MI = *StackAllocMIPos;
1258   int64_t NegFrameSize = MI.getOperand(2).getImm();
1259   unsigned ProbeSize = TLI.getStackProbeSize(MF);
1260   int64_t NegProbeSize = -(int64_t)ProbeSize;
1261   assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
1262   int64_t NumBlocks = NegFrameSize / NegProbeSize;
1263   int64_t NegResidualSize = NegFrameSize % NegProbeSize;
1264   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1265   Register ScratchReg = MI.getOperand(0).getReg();
1266   Register FPReg = MI.getOperand(1).getReg();
1267   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1268   bool HasBP = RegInfo->hasBasePointer(MF);
1269   Register BPReg = RegInfo->getBaseRegister(MF);
1270   Align MaxAlign = MFI.getMaxAlign();
1271   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1272   const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
1273   // Subroutines to generate .cfi_* directives.
1274   auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
1275                             MachineBasicBlock::iterator MBBI, Register Reg) {
1276     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1277     unsigned CFIIndex = MF.addFrameInst(
1278         MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
1279     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1280         .addCFIIndex(CFIIndex);
1281   };
1282   auto buildDefCFA = [&](MachineBasicBlock &MBB,
1283                          MachineBasicBlock::iterator MBBI, Register Reg,
1284                          int Offset) {
1285     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1286     unsigned CFIIndex = MBB.getParent()->addFrameInst(
1287         MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
1288     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1289         .addCFIIndex(CFIIndex);
1290   };
1291   // Subroutine to determine if we can use the Imm as part of d-form.
1292   auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
1293   // Subroutine to materialize the Imm into TempReg.
1294   auto MaterializeImm = [&](MachineBasicBlock &MBB,
1295                             MachineBasicBlock::iterator MBBI, int64_t Imm,
1296                             Register &TempReg) {
1297     assert(isInt<32>(Imm) && "Unhandled imm");
1298     if (isInt<16>(Imm))
1299       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
1300           .addImm(Imm);
1301     else {
1302       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
1303           .addImm(Imm >> 16);
1304       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
1305           .addReg(TempReg)
1306           .addImm(Imm & 0xFFFF);
1307     }
1308   };
1309   // Subroutine to store frame pointer and decrease stack pointer by probe size.
1310   auto allocateAndProbe = [&](MachineBasicBlock &MBB,
1311                               MachineBasicBlock::iterator MBBI, int64_t NegSize,
1312                               Register NegSizeReg, bool UseDForm,
1313                               Register StoreReg) {
1314     if (UseDForm)
1315       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
1316           .addReg(StoreReg)
1317           .addImm(NegSize)
1318           .addReg(SPReg);
1319     else
1320       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
1321           .addReg(StoreReg)
1322           .addReg(SPReg)
1323           .addReg(NegSizeReg);
1324   };
1325   // Used to probe stack when realignment is required.
1326   // Note that, according to ABI's requirement, *sp must always equals the
1327   // value of back-chain pointer, only st(w|d)u(x) can be used to update sp.
1328   // Following is pseudo code:
1329   // final_sp = (sp & align) + negframesize;
1330   // neg_gap = final_sp - sp;
1331   // while (neg_gap < negprobesize) {
1332   //   stdu fp, negprobesize(sp);
1333   //   neg_gap -= negprobesize;
1334   // }
1335   // stdux fp, sp, neg_gap
1336   //
1337   // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg
1338   // before probe code, we don't need to save it, so we get one additional reg
1339   // that can be used to materialize the probeside if needed to use xform.
1340   // Otherwise, we can NOT materialize probeside, so we can only use Dform for
1341   // now.
1342   //
1343   // The allocations are:
1344   // if (HasBP && HasRedzone) {
1345   //   r0: materialize the probesize if needed so that we can use xform.
1346   //   r12: `neg_gap`
1347   // } else {
1348   //   r0: back-chain pointer
1349   //   r12: `neg_gap`.
1350   // }
1351   auto probeRealignedStack = [&](MachineBasicBlock &MBB,
1352                                  MachineBasicBlock::iterator MBBI,
1353                                  Register ScratchReg, Register TempReg) {
1354     assert(HasBP && "The function is supposed to have base pointer when its "
1355                     "stack is realigned.");
1356     assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
1357 
1358     // FIXME: We can eliminate this limitation if we get more infomation about
1359     // which part of redzone are already used. Used redzone can be treated
1360     // probed. But there might be `holes' in redzone probed, this could
1361     // complicate the implementation.
1362     assert(ProbeSize >= Subtarget.getRedZoneSize() &&
1363            "Probe size should be larger or equal to the size of red-zone so "
1364            "that red-zone is not clobbered by probing.");
1365 
1366     Register &FinalStackPtr = TempReg;
1367     // FIXME: We only support NegProbeSize materializable by DForm currently.
1368     // When HasBP && HasRedzone, we can use xform if we have an additional idle
1369     // register.
1370     NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15));
1371     assert(isInt<16>(NegProbeSize) &&
1372            "NegProbeSize should be materializable by DForm");
1373     Register CRReg = PPC::CR0;
1374     // Layout of output assembly kinda like:
1375     // bb.0:
1376     //   ...
1377     //   sub $scratchreg, $finalsp, r1
1378     //   cmpdi $scratchreg, <negprobesize>
1379     //   bge bb.2
1380     // bb.1:
1381     //   stdu <backchain>, <negprobesize>(r1)
1382     //   sub $scratchreg, $scratchreg, negprobesize
1383     //   cmpdi $scratchreg, <negprobesize>
1384     //   blt bb.1
1385     // bb.2:
1386     //   stdux <backchain>, r1, $scratchreg
1387     MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
1388     MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
1389     MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
1390     MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1391     MF.insert(MBBInsertPoint, ProbeExitMBB);
1392     // bb.2
1393     {
1394       Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1395       allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false,
1396                        BackChainPointer);
1397       if (HasRedZone)
1398         // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg
1399         // to TempReg to satisfy it.
1400         BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg)
1401             .addReg(BPReg)
1402             .addReg(BPReg);
1403       ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
1404       ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
1405     }
1406     // bb.0
1407     {
1408       BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg)
1409           .addReg(SPReg)
1410           .addReg(FinalStackPtr);
1411       if (!HasRedZone)
1412         BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg);
1413       BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg)
1414           .addReg(ScratchReg)
1415           .addImm(NegProbeSize);
1416       BuildMI(&MBB, DL, TII.get(PPC::BCC))
1417           .addImm(PPC::PRED_GE)
1418           .addReg(CRReg)
1419           .addMBB(ProbeExitMBB);
1420       MBB.addSuccessor(ProbeLoopBodyMBB);
1421       MBB.addSuccessor(ProbeExitMBB);
1422     }
1423     // bb.1
1424     {
1425       Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1426       allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize,
1427                        0, true /*UseDForm*/, BackChainPointer);
1428       BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI),
1429               ScratchReg)
1430           .addReg(ScratchReg)
1431           .addImm(-NegProbeSize);
1432       BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI),
1433               CRReg)
1434           .addReg(ScratchReg)
1435           .addImm(NegProbeSize);
1436       BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
1437           .addImm(PPC::PRED_LT)
1438           .addReg(CRReg)
1439           .addMBB(ProbeLoopBodyMBB);
1440       ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
1441       ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
1442     }
1443     // Update liveins.
1444     recomputeLiveIns(*ProbeLoopBodyMBB);
1445     recomputeLiveIns(*ProbeExitMBB);
1446     return ProbeExitMBB;
1447   };
1448   // For case HasBP && MaxAlign > 1, we have to realign the SP by performing
1449   // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since
1450   // the offset subtracted from SP is determined by SP's runtime value.
1451   if (HasBP && MaxAlign > 1) {
1452     // Calculate final stack pointer.
1453     if (isPPC64)
1454       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
1455           .addReg(SPReg)
1456           .addImm(0)
1457           .addImm(64 - Log2(MaxAlign));
1458     else
1459       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
1460           .addReg(SPReg)
1461           .addImm(0)
1462           .addImm(32 - Log2(MaxAlign))
1463           .addImm(31);
1464     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF),
1465             FPReg)
1466         .addReg(ScratchReg)
1467         .addReg(SPReg);
1468     MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg);
1469     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
1470             FPReg)
1471         .addReg(ScratchReg)
1472         .addReg(FPReg);
1473     CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg);
1474     if (needsCFI)
1475       buildDefCFAReg(*CurrentMBB, {MI}, FPReg);
1476   } else {
1477     // Initialize current frame pointer.
1478     BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
1479     // Use FPReg to calculate CFA.
1480     if (needsCFI)
1481       buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
1482     // Probe residual part.
1483     if (NegResidualSize) {
1484       bool ResidualUseDForm = CanUseDForm(NegResidualSize);
1485       if (!ResidualUseDForm)
1486         MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
1487       allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
1488                        ResidualUseDForm, FPReg);
1489     }
1490     bool UseDForm = CanUseDForm(NegProbeSize);
1491     // If number of blocks is small, just probe them directly.
1492     if (NumBlocks < 3) {
1493       if (!UseDForm)
1494         MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1495       for (int i = 0; i < NumBlocks; ++i)
1496         allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
1497                          FPReg);
1498       if (needsCFI) {
1499         // Restore using SPReg to calculate CFA.
1500         buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
1501       }
1502     } else {
1503       // Since CTR is a volatile register and current shrinkwrap implementation
1504       // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
1505       // CTR loop to probe.
1506       // Calculate trip count and stores it in CTRReg.
1507       MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
1508       BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
1509           .addReg(ScratchReg, RegState::Kill);
1510       if (!UseDForm)
1511         MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1512       // Create MBBs of the loop.
1513       MachineFunction::iterator MBBInsertPoint =
1514           std::next(CurrentMBB->getIterator());
1515       MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
1516       MF.insert(MBBInsertPoint, LoopMBB);
1517       MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1518       MF.insert(MBBInsertPoint, ExitMBB);
1519       // Synthesize the loop body.
1520       allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
1521                        UseDForm, FPReg);
1522       BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
1523           .addMBB(LoopMBB);
1524       LoopMBB->addSuccessor(ExitMBB);
1525       LoopMBB->addSuccessor(LoopMBB);
1526       // Synthesize the exit MBB.
1527       ExitMBB->splice(ExitMBB->end(), CurrentMBB,
1528                       std::next(MachineBasicBlock::iterator(MI)),
1529                       CurrentMBB->end());
1530       ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
1531       CurrentMBB->addSuccessor(LoopMBB);
1532       if (needsCFI) {
1533         // Restore using SPReg to calculate CFA.
1534         buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
1535       }
1536       // Update liveins.
1537       recomputeLiveIns(*LoopMBB);
1538       recomputeLiveIns(*ExitMBB);
1539     }
1540   }
1541   ++NumPrologProbed;
1542   MI.eraseFromParent();
1543 }
1544 
1545 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1546                                     MachineBasicBlock &MBB) const {
1547   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1548   DebugLoc dl;
1549 
1550   if (MBBI != MBB.end())
1551     dl = MBBI->getDebugLoc();
1552 
1553   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1554   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1555 
1556   // Get alignment info so we know how to restore the SP.
1557   const MachineFrameInfo &MFI = MF.getFrameInfo();
1558 
1559   // Get the number of bytes allocated from the FrameInfo.
1560   int64_t FrameSize = MFI.getStackSize();
1561 
1562   // Get processor type.
1563   bool isPPC64 = Subtarget.isPPC64();
1564 
1565   // Check if the link register (LR) has been saved.
1566   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1567   bool MustSaveLR = FI->mustSaveLR();
1568   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1569   bool MustSaveCR = !MustSaveCRs.empty();
1570   // Do we have a frame pointer and/or base pointer for this function?
1571   bool HasFP = hasFP(MF);
1572   bool HasBP = RegInfo->hasBasePointer(MF);
1573   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1574   bool HasROPProtect = Subtarget.hasROPProtect();
1575   bool HasPrivileged = Subtarget.hasPrivileged();
1576 
1577   Register SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1578   Register BPReg = RegInfo->getBaseRegister(MF);
1579   Register FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1580   Register ScratchReg;
1581   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1582   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1583                                                  : PPC::MTLR );
1584   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1585                                                  : PPC::LWZ );
1586   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1587                                                            : PPC::LIS );
1588   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1589                                               : PPC::OR );
1590   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1591                                                   : PPC::ORI );
1592   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1593                                                    : PPC::ADDI );
1594   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1595                                                 : PPC::ADD4 );
1596   const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1597                                                      : PPC::LWZ);
1598   const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1599                                                      : PPC::MTOCRF);
1600   const MCInstrDesc &HashChk =
1601       TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHCHKP8 : PPC::HASHCHK8)
1602                       : (HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK));
1603   int64_t LROffset = getReturnSaveOffset();
1604 
1605   int64_t FPOffset = 0;
1606 
1607   // Using the same bool variable as below to suppress compiler warnings.
1608   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1609                                               &TempReg);
1610   assert(SingleScratchReg &&
1611          "Could not find an available scratch register");
1612 
1613   SingleScratchReg = ScratchReg == TempReg;
1614 
1615   if (HasFP) {
1616     int FPIndex = FI->getFramePointerSaveIndex();
1617     assert(FPIndex && "No Frame Pointer Save Slot!");
1618     FPOffset = MFI.getObjectOffset(FPIndex);
1619   }
1620 
1621   int64_t BPOffset = 0;
1622   if (HasBP) {
1623       int BPIndex = FI->getBasePointerSaveIndex();
1624       assert(BPIndex && "No Base Pointer Save Slot!");
1625       BPOffset = MFI.getObjectOffset(BPIndex);
1626   }
1627 
1628   int64_t PBPOffset = 0;
1629   if (FI->usesPICBase()) {
1630     int PBPIndex = FI->getPICBasePointerSaveIndex();
1631     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1632     PBPOffset = MFI.getObjectOffset(PBPIndex);
1633   }
1634 
1635   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1636 
1637   if (IsReturnBlock) {
1638     unsigned RetOpcode = MBBI->getOpcode();
1639     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1640                       RetOpcode == PPC::TCRETURNdi ||
1641                       RetOpcode == PPC::TCRETURNai ||
1642                       RetOpcode == PPC::TCRETURNri8 ||
1643                       RetOpcode == PPC::TCRETURNdi8 ||
1644                       RetOpcode == PPC::TCRETURNai8;
1645 
1646     if (UsesTCRet) {
1647       int MaxTCRetDelta = FI->getTailCallSPDelta();
1648       MachineOperand &StackAdjust = MBBI->getOperand(1);
1649       assert(StackAdjust.isImm() && "Expecting immediate value.");
1650       // Adjust stack pointer.
1651       int StackAdj = StackAdjust.getImm();
1652       int Delta = StackAdj - MaxTCRetDelta;
1653       assert((Delta >= 0) && "Delta must be positive");
1654       if (MaxTCRetDelta>0)
1655         FrameSize += (StackAdj +Delta);
1656       else
1657         FrameSize += StackAdj;
1658     }
1659   }
1660 
1661   // Frames of 32KB & larger require special handling because they cannot be
1662   // indexed into with a simple LD/LWZ immediate offset operand.
1663   bool isLargeFrame = !isInt<16>(FrameSize);
1664 
1665   // On targets without red zone, the SP needs to be restored last, so that
1666   // all live contents of the stack frame are upwards of the SP. This means
1667   // that we cannot restore SP just now, since there may be more registers
1668   // to restore from the stack frame (e.g. R31). If the frame size is not
1669   // a simple immediate value, we will need a spare register to hold the
1670   // restored SP. If the frame size is known and small, we can simply adjust
1671   // the offsets of the registers to be restored, and still use SP to restore
1672   // them. In such case, the final update of SP will be to add the frame
1673   // size to it.
1674   // To simplify the code, set RBReg to the base register used to restore
1675   // values from the stack, and set SPAdd to the value that needs to be added
1676   // to the SP at the end. The default values are as if red zone was present.
1677   unsigned RBReg = SPReg;
1678   uint64_t SPAdd = 0;
1679 
1680   // Check if we can move the stack update instruction up the epilogue
1681   // past the callee saves. This will allow the move to LR instruction
1682   // to be executed before the restores of the callee saves which means
1683   // that the callee saves can hide the latency from the MTLR instrcution.
1684   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1685   if (stackUpdateCanBeMoved(MF)) {
1686     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1687     for (CalleeSavedInfo CSI : Info) {
1688       // If the callee saved register is spilled to another register abort the
1689       // stack update movement.
1690       if (CSI.isSpilledToReg()) {
1691         StackUpdateLoc = MBBI;
1692         break;
1693       }
1694       int FrIdx = CSI.getFrameIdx();
1695       // If the frame index is not negative the callee saved info belongs to a
1696       // stack object that is not a fixed stack object. We ignore non-fixed
1697       // stack objects because we won't move the update of the stack pointer
1698       // past them.
1699       if (FrIdx >= 0)
1700         continue;
1701 
1702       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1703         StackUpdateLoc--;
1704       else {
1705         // Abort the operation as we can't update all CSR restores.
1706         StackUpdateLoc = MBBI;
1707         break;
1708       }
1709     }
1710   }
1711 
1712   if (FrameSize) {
1713     // In the prologue, the loaded (or persistent) stack pointer value is
1714     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1715     // zone add this offset back now.
1716 
1717     // If the function has a base pointer, the stack pointer has been copied
1718     // to it so we can restore it by copying in the other direction.
1719     if (HasRedZone && HasBP) {
1720       BuildMI(MBB, MBBI, dl, OrInst, RBReg).
1721         addReg(BPReg).
1722         addReg(BPReg);
1723     }
1724     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1725     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1726     // call which invalidates the stack pointer value in SP(0). So we use the
1727     // value of R31 in this case. Similar situation exists with setjmp.
1728     else if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
1729       assert(HasFP && "Expecting a valid frame pointer.");
1730       if (!HasRedZone)
1731         RBReg = FPReg;
1732       if (!isLargeFrame) {
1733         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1734           .addReg(FPReg).addImm(FrameSize);
1735       } else {
1736         TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, FrameSize);
1737         BuildMI(MBB, MBBI, dl, AddInst)
1738           .addReg(RBReg)
1739           .addReg(FPReg)
1740           .addReg(ScratchReg);
1741       }
1742     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1743       if (HasRedZone) {
1744         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1745           .addReg(SPReg)
1746           .addImm(FrameSize);
1747       } else {
1748         // Make sure that adding FrameSize will not overflow the max offset
1749         // size.
1750         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1751                "Local offsets should be negative");
1752         SPAdd = FrameSize;
1753         FPOffset += FrameSize;
1754         BPOffset += FrameSize;
1755         PBPOffset += FrameSize;
1756       }
1757     } else {
1758       // We don't want to use ScratchReg as a base register, because it
1759       // could happen to be R0. Use FP instead, but make sure to preserve it.
1760       if (!HasRedZone) {
1761         // If FP is not saved, copy it to ScratchReg.
1762         if (!HasFP)
1763           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1764             .addReg(FPReg)
1765             .addReg(FPReg);
1766         RBReg = FPReg;
1767       }
1768       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1769         .addImm(0)
1770         .addReg(SPReg);
1771     }
1772   }
1773   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1774   // If there is no red zone, ScratchReg may be needed for holding a useful
1775   // value (although not the base register). Make sure it is not overwritten
1776   // too early.
1777 
1778   // If we need to restore both the LR and the CR and we only have one
1779   // available scratch register, we must do them one at a time.
1780   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1781     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1782     // is live here.
1783     assert(HasRedZone && "Expecting red zone");
1784     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1785       .addImm(CRSaveOffset)
1786       .addReg(SPReg);
1787     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1788       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1789         .addReg(TempReg, getKillRegState(i == e-1));
1790   }
1791 
1792   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1793   // LR is stored in the caller's stack frame. ScratchReg will be needed
1794   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1795   // a base register anyway, because it may happen to be R0.
1796   bool LoadedLR = false;
1797   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1798     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1799       .addImm(LROffset+SPAdd)
1800       .addReg(RBReg);
1801     LoadedLR = true;
1802   }
1803 
1804   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1805     assert(RBReg == SPReg && "Should be using SP as a base register");
1806     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1807       .addImm(CRSaveOffset)
1808       .addReg(RBReg);
1809   }
1810 
1811   if (HasFP) {
1812     // If there is red zone, restore FP directly, since SP has already been
1813     // restored. Otherwise, restore the value of FP into ScratchReg.
1814     if (HasRedZone || RBReg == SPReg)
1815       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1816         .addImm(FPOffset)
1817         .addReg(SPReg);
1818     else
1819       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1820         .addImm(FPOffset)
1821         .addReg(RBReg);
1822   }
1823 
1824   if (FI->usesPICBase())
1825     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1826       .addImm(PBPOffset)
1827       .addReg(RBReg);
1828 
1829   if (HasBP)
1830     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1831       .addImm(BPOffset)
1832       .addReg(RBReg);
1833 
1834   // There is nothing more to be loaded from the stack, so now we can
1835   // restore SP: SP = RBReg + SPAdd.
1836   if (RBReg != SPReg || SPAdd != 0) {
1837     assert(!HasRedZone && "This should not happen with red zone");
1838     // If SPAdd is 0, generate a copy.
1839     if (SPAdd == 0)
1840       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1841         .addReg(RBReg)
1842         .addReg(RBReg);
1843     else
1844       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1845         .addReg(RBReg)
1846         .addImm(SPAdd);
1847 
1848     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1849     if (RBReg == FPReg)
1850       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1851         .addReg(ScratchReg)
1852         .addReg(ScratchReg);
1853 
1854     // Now load the LR from the caller's stack frame.
1855     if (MustSaveLR && !LoadedLR)
1856       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1857         .addImm(LROffset)
1858         .addReg(SPReg);
1859   }
1860 
1861   if (MustSaveCR &&
1862       !(SingleScratchReg && MustSaveLR))
1863     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1864       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1865         .addReg(TempReg, getKillRegState(i == e-1));
1866 
1867   if (MustSaveLR) {
1868     // If ROP protection is required, an extra instruction is added to compute a
1869     // hash and then compare it to the hash stored in the prologue.
1870     if (HasROPProtect) {
1871       const int SaveIndex = FI->getROPProtectionHashSaveIndex();
1872       const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
1873       assert((ImmOffset <= -8 && ImmOffset >= -512) &&
1874              "ROP hash check location offset out of range.");
1875       assert(((ImmOffset & 0x7) == 0) &&
1876              "ROP hash check location offset must be 8 byte aligned.");
1877       BuildMI(MBB, StackUpdateLoc, dl, HashChk)
1878           .addReg(ScratchReg)
1879           .addImm(ImmOffset)
1880           .addReg(SPReg);
1881     }
1882     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1883   }
1884 
1885   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1886   // call optimization
1887   if (IsReturnBlock) {
1888     unsigned RetOpcode = MBBI->getOpcode();
1889     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1890         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1891         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1892       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1893       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1894 
1895       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1896         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1897           .addReg(SPReg).addImm(CallerAllocatedAmt);
1898       } else {
1899         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1900           .addImm(CallerAllocatedAmt >> 16);
1901         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1902           .addReg(ScratchReg, RegState::Kill)
1903           .addImm(CallerAllocatedAmt & 0xFFFF);
1904         BuildMI(MBB, MBBI, dl, AddInst)
1905           .addReg(SPReg)
1906           .addReg(FPReg)
1907           .addReg(ScratchReg);
1908       }
1909     } else {
1910       createTailCallBranchInstr(MBB);
1911     }
1912   }
1913 }
1914 
1915 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1916   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1917 
1918   // If we got this far a first terminator should exist.
1919   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1920 
1921   DebugLoc dl = MBBI->getDebugLoc();
1922   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1923 
1924   // Create branch instruction for pseudo tail call return instruction.
1925   // The TCRETURNdi variants are direct calls. Valid targets for those are
1926   // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1927   // since we can tail call external functions with PC-Rel (i.e. we don't need
1928   // to worry about different TOC pointers). Some of the external functions will
1929   // be MO_GlobalAddress while others like memcpy for example, are going to
1930   // be MO_ExternalSymbol.
1931   unsigned RetOpcode = MBBI->getOpcode();
1932   if (RetOpcode == PPC::TCRETURNdi) {
1933     MBBI = MBB.getLastNonDebugInstr();
1934     MachineOperand &JumpTarget = MBBI->getOperand(0);
1935     if (JumpTarget.isGlobal())
1936       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1937         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1938     else if (JumpTarget.isSymbol())
1939       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1940         addExternalSymbol(JumpTarget.getSymbolName());
1941     else
1942       llvm_unreachable("Expecting Global or External Symbol");
1943   } else if (RetOpcode == PPC::TCRETURNri) {
1944     MBBI = MBB.getLastNonDebugInstr();
1945     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1946     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1947   } else if (RetOpcode == PPC::TCRETURNai) {
1948     MBBI = MBB.getLastNonDebugInstr();
1949     MachineOperand &JumpTarget = MBBI->getOperand(0);
1950     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1951   } else if (RetOpcode == PPC::TCRETURNdi8) {
1952     MBBI = MBB.getLastNonDebugInstr();
1953     MachineOperand &JumpTarget = MBBI->getOperand(0);
1954     if (JumpTarget.isGlobal())
1955       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1956         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1957     else if (JumpTarget.isSymbol())
1958       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1959         addExternalSymbol(JumpTarget.getSymbolName());
1960     else
1961       llvm_unreachable("Expecting Global or External Symbol");
1962   } else if (RetOpcode == PPC::TCRETURNri8) {
1963     MBBI = MBB.getLastNonDebugInstr();
1964     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1965     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1966   } else if (RetOpcode == PPC::TCRETURNai8) {
1967     MBBI = MBB.getLastNonDebugInstr();
1968     MachineOperand &JumpTarget = MBBI->getOperand(0);
1969     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1970   }
1971 }
1972 
1973 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1974                                             BitVector &SavedRegs,
1975                                             RegScavenger *RS) const {
1976   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1977 
1978   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1979 
1980   // Do not explicitly save the callee saved VSRp registers.
1981   // The individual VSR subregisters will be saved instead.
1982   SavedRegs.reset(PPC::VSRp26);
1983   SavedRegs.reset(PPC::VSRp27);
1984   SavedRegs.reset(PPC::VSRp28);
1985   SavedRegs.reset(PPC::VSRp29);
1986   SavedRegs.reset(PPC::VSRp30);
1987   SavedRegs.reset(PPC::VSRp31);
1988 
1989   //  Save and clear the LR state.
1990   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1991   unsigned LR = RegInfo->getRARegister();
1992   FI->setMustSaveLR(MustSaveLR(MF, LR));
1993   SavedRegs.reset(LR);
1994 
1995   //  Save R31 if necessary
1996   int FPSI = FI->getFramePointerSaveIndex();
1997   const bool isPPC64 = Subtarget.isPPC64();
1998   MachineFrameInfo &MFI = MF.getFrameInfo();
1999 
2000   // If the frame pointer save index hasn't been defined yet.
2001   if (!FPSI && needsFP(MF)) {
2002     // Find out what the fix offset of the frame pointer save area.
2003     int FPOffset = getFramePointerSaveOffset();
2004     // Allocate the frame index for frame pointer save area.
2005     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
2006     // Save the result.
2007     FI->setFramePointerSaveIndex(FPSI);
2008   }
2009 
2010   int BPSI = FI->getBasePointerSaveIndex();
2011   if (!BPSI && RegInfo->hasBasePointer(MF)) {
2012     int BPOffset = getBasePointerSaveOffset();
2013     // Allocate the frame index for the base pointer save area.
2014     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
2015     // Save the result.
2016     FI->setBasePointerSaveIndex(BPSI);
2017   }
2018 
2019   // Reserve stack space for the PIC Base register (R30).
2020   // Only used in SVR4 32-bit.
2021   if (FI->usesPICBase()) {
2022     int PBPSI = MFI.CreateFixedObject(4, -8, true);
2023     FI->setPICBasePointerSaveIndex(PBPSI);
2024   }
2025 
2026   // Make sure we don't explicitly spill r31, because, for example, we have
2027   // some inline asm which explicitly clobbers it, when we otherwise have a
2028   // frame pointer and are using r31's spill slot for the prologue/epilogue
2029   // code. Same goes for the base pointer and the PIC base register.
2030   if (needsFP(MF))
2031     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
2032   if (RegInfo->hasBasePointer(MF))
2033     SavedRegs.reset(RegInfo->getBaseRegister(MF));
2034   if (FI->usesPICBase())
2035     SavedRegs.reset(PPC::R30);
2036 
2037   // Reserve stack space to move the linkage area to in case of a tail call.
2038   int TCSPDelta = 0;
2039   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2040       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
2041     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
2042   }
2043 
2044   // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
2045   // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
2046   // object at the offset of the CR-save slot in the linkage area. The actual
2047   // save and restore of the condition register will be created as part of the
2048   // prologue and epilogue insertion, but the FixedStack object is needed to
2049   // keep the CalleSavedInfo valid.
2050   if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
2051        SavedRegs.test(PPC::CR4))) {
2052     const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
2053     const int64_t SpillOffset =
2054         Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
2055     int FrameIdx =
2056         MFI.CreateFixedObject(SpillSize, SpillOffset,
2057                               /* IsImmutable */ true, /* IsAliased */ false);
2058     FI->setCRSpillFrameIndex(FrameIdx);
2059   }
2060 }
2061 
2062 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
2063                                                        RegScavenger *RS) const {
2064   // Get callee saved register information.
2065   MachineFrameInfo &MFI = MF.getFrameInfo();
2066   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
2067 
2068   // If the function is shrink-wrapped, and if the function has a tail call, the
2069   // tail call might not be in the new RestoreBlock, so real branch instruction
2070   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
2071   // RestoreBlock. So we handle this case here.
2072   if (MFI.getSavePoint() && MFI.hasTailCall()) {
2073     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
2074     for (MachineBasicBlock &MBB : MF) {
2075       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
2076         createTailCallBranchInstr(MBB);
2077     }
2078   }
2079 
2080   // Early exit if no callee saved registers are modified!
2081   if (CSI.empty() && !needsFP(MF)) {
2082     addScavengingSpillSlot(MF, RS);
2083     return;
2084   }
2085 
2086   unsigned MinGPR = PPC::R31;
2087   unsigned MinG8R = PPC::X31;
2088   unsigned MinFPR = PPC::F31;
2089   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
2090 
2091   bool HasGPSaveArea = false;
2092   bool HasG8SaveArea = false;
2093   bool HasFPSaveArea = false;
2094   bool HasVRSaveArea = false;
2095 
2096   SmallVector<CalleeSavedInfo, 18> GPRegs;
2097   SmallVector<CalleeSavedInfo, 18> G8Regs;
2098   SmallVector<CalleeSavedInfo, 18> FPRegs;
2099   SmallVector<CalleeSavedInfo, 18> VRegs;
2100 
2101   for (const CalleeSavedInfo &I : CSI) {
2102     Register Reg = I.getReg();
2103     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
2104             (Reg != PPC::X2 && Reg != PPC::R2)) &&
2105            "Not expecting to try to spill R2 in a function that must save TOC");
2106     if (PPC::GPRCRegClass.contains(Reg)) {
2107       HasGPSaveArea = true;
2108 
2109       GPRegs.push_back(I);
2110 
2111       if (Reg < MinGPR) {
2112         MinGPR = Reg;
2113       }
2114     } else if (PPC::G8RCRegClass.contains(Reg)) {
2115       HasG8SaveArea = true;
2116 
2117       G8Regs.push_back(I);
2118 
2119       if (Reg < MinG8R) {
2120         MinG8R = Reg;
2121       }
2122     } else if (PPC::F8RCRegClass.contains(Reg)) {
2123       HasFPSaveArea = true;
2124 
2125       FPRegs.push_back(I);
2126 
2127       if (Reg < MinFPR) {
2128         MinFPR = Reg;
2129       }
2130     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
2131                PPC::CRRCRegClass.contains(Reg)) {
2132       ; // do nothing, as we already know whether CRs are spilled
2133     } else if (PPC::VRRCRegClass.contains(Reg) ||
2134                PPC::SPERCRegClass.contains(Reg)) {
2135       // Altivec and SPE are mutually exclusive, but have the same stack
2136       // alignment requirements, so overload the save area for both cases.
2137       HasVRSaveArea = true;
2138 
2139       VRegs.push_back(I);
2140 
2141       if (Reg < MinVR) {
2142         MinVR = Reg;
2143       }
2144     } else {
2145       llvm_unreachable("Unknown RegisterClass!");
2146     }
2147   }
2148 
2149   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
2150   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2151 
2152   int64_t LowerBound = 0;
2153 
2154   // Take into account stack space reserved for tail calls.
2155   int TCSPDelta = 0;
2156   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2157       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
2158     LowerBound = TCSPDelta;
2159   }
2160 
2161   // The Floating-point register save area is right below the back chain word
2162   // of the previous stack frame.
2163   if (HasFPSaveArea) {
2164     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
2165       int FI = FPRegs[i].getFrameIdx();
2166 
2167       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2168     }
2169 
2170     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
2171   }
2172 
2173   // Check whether the frame pointer register is allocated. If so, make sure it
2174   // is spilled to the correct offset.
2175   if (needsFP(MF)) {
2176     int FI = PFI->getFramePointerSaveIndex();
2177     assert(FI && "No Frame Pointer Save Slot!");
2178     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2179     // FP is R31/X31, so no need to update MinGPR/MinG8R.
2180     HasGPSaveArea = true;
2181   }
2182 
2183   if (PFI->usesPICBase()) {
2184     int FI = PFI->getPICBasePointerSaveIndex();
2185     assert(FI && "No PIC Base Pointer Save Slot!");
2186     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2187 
2188     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
2189     HasGPSaveArea = true;
2190   }
2191 
2192   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2193   if (RegInfo->hasBasePointer(MF)) {
2194     int FI = PFI->getBasePointerSaveIndex();
2195     assert(FI && "No Base Pointer Save Slot!");
2196     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2197 
2198     Register BP = RegInfo->getBaseRegister(MF);
2199     if (PPC::G8RCRegClass.contains(BP)) {
2200       MinG8R = std::min<unsigned>(MinG8R, BP);
2201       HasG8SaveArea = true;
2202     } else if (PPC::GPRCRegClass.contains(BP)) {
2203       MinGPR = std::min<unsigned>(MinGPR, BP);
2204       HasGPSaveArea = true;
2205     }
2206   }
2207 
2208   // General register save area starts right below the Floating-point
2209   // register save area.
2210   if (HasGPSaveArea || HasG8SaveArea) {
2211     // Move general register save area spill slots down, taking into account
2212     // the size of the Floating-point register save area.
2213     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
2214       if (!GPRegs[i].isSpilledToReg()) {
2215         int FI = GPRegs[i].getFrameIdx();
2216         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2217       }
2218     }
2219 
2220     // Move general register save area spill slots down, taking into account
2221     // the size of the Floating-point register save area.
2222     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2223       if (!G8Regs[i].isSpilledToReg()) {
2224         int FI = G8Regs[i].getFrameIdx();
2225         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2226       }
2227     }
2228 
2229     unsigned MinReg =
2230       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2231                          TRI->getEncodingValue(MinG8R));
2232 
2233     const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2234     LowerBound -= (31 - MinReg + 1) * GPRegSize;
2235   }
2236 
2237   // For 32-bit only, the CR save area is below the general register
2238   // save area.  For 64-bit SVR4, the CR save area is addressed relative
2239   // to the stack pointer and hence does not need an adjustment here.
2240   // Only CR2 (the first nonvolatile spilled) has an associated frame
2241   // index so that we have a single uniform save area.
2242   if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2243     // Adjust the frame index of the CR spill slot.
2244     for (const auto &CSInfo : CSI) {
2245       if (CSInfo.getReg() == PPC::CR2) {
2246         int FI = CSInfo.getFrameIdx();
2247         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2248         break;
2249       }
2250     }
2251 
2252     LowerBound -= 4; // The CR save area is always 4 bytes long.
2253   }
2254 
2255   // Both Altivec and SPE have the same alignment and padding requirements
2256   // within the stack frame.
2257   if (HasVRSaveArea) {
2258     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2259     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2260     // we are using negative number here (the stack grows downward). We should
2261     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2262     // is the alignment size ( n = 16 here) and y is the size after aligning.
2263     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2264     LowerBound &= ~(15);
2265 
2266     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2267       int FI = VRegs[i].getFrameIdx();
2268 
2269       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2270     }
2271   }
2272 
2273   addScavengingSpillSlot(MF, RS);
2274 }
2275 
2276 void
2277 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2278                                          RegScavenger *RS) const {
2279   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2280   // a large stack, which will require scavenging a register to materialize a
2281   // large offset.
2282 
2283   // We need to have a scavenger spill slot for spills if the frame size is
2284   // large. In case there is no free register for large-offset addressing,
2285   // this slot is used for the necessary emergency spill. Also, we need the
2286   // slot for dynamic stack allocations.
2287 
2288   // The scavenger might be invoked if the frame offset does not fit into
2289   // the 16-bit immediate in case of not SPE and 8-bit in case of SPE.
2290   // We don't know the complete frame size here because we've not yet computed
2291   // callee-saved register spills or the needed alignment padding.
2292   unsigned StackSize = determineFrameLayout(MF, true);
2293   MachineFrameInfo &MFI = MF.getFrameInfo();
2294   bool NeedSpills = Subtarget.hasSPE() ? !isInt<8>(StackSize) : !isInt<16>(StackSize);
2295 
2296   if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
2297       (hasSpills(MF) && NeedSpills)) {
2298     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2299     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2300     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2301     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2302     unsigned Size = TRI.getSpillSize(RC);
2303     Align Alignment = TRI.getSpillAlign(RC);
2304     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
2305 
2306     // Might we have over-aligned allocas?
2307     bool HasAlVars =
2308         MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2309 
2310     // These kinds of spills might need two registers.
2311     if (spillsCR(MF) || HasAlVars)
2312       RS->addScavengingFrameIndex(
2313           MFI.CreateStackObject(Size, Alignment, false));
2314   }
2315 }
2316 
2317 // This function checks if a callee saved gpr can be spilled to a volatile
2318 // vector register. This occurs for leaf functions when the option
2319 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2320 // which were not spilled to vectors, return false so the target independent
2321 // code can handle them by assigning a FrameIdx to a stack slot.
2322 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2323     MachineFunction &MF, const TargetRegisterInfo *TRI,
2324     std::vector<CalleeSavedInfo> &CSI) const {
2325 
2326   if (CSI.empty())
2327     return true; // Early exit if no callee saved registers are modified!
2328 
2329   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2330   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2331   const MachineRegisterInfo &MRI = MF.getRegInfo();
2332 
2333   if (Subtarget.hasSPE()) {
2334     // In case of SPE we only have SuperRegs and CRs
2335     // in our CalleSaveInfo vector.
2336 
2337     for (auto &CalleeSaveReg : CSI) {
2338       MCPhysReg Reg = CalleeSaveReg.getReg();
2339       MCPhysReg Lower = RegInfo->getSubReg(Reg, 1);
2340       MCPhysReg Higher = RegInfo->getSubReg(Reg, 2);
2341 
2342       if ( // Check only for SuperRegs.
2343           Lower &&
2344           // Replace Reg if only lower-32 bits modified
2345           !MRI.isPhysRegModified(Higher))
2346         CalleeSaveReg = CalleeSavedInfo(Lower);
2347     }
2348   }
2349 
2350   // Early exit if cannot spill gprs to volatile vector registers.
2351   MachineFrameInfo &MFI = MF.getFrameInfo();
2352   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2353     return false;
2354 
2355   // Build a BitVector of VSRs that can be used for spilling GPRs.
2356   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2357   BitVector BVCalleeSaved(TRI->getNumRegs());
2358   for (unsigned i = 0; CSRegs[i]; ++i)
2359     BVCalleeSaved.set(CSRegs[i]);
2360 
2361   for (unsigned Reg : BVAllocatable.set_bits()) {
2362     // Set to 0 if the register is not a volatile VSX register, or if it is
2363     // used in the function.
2364     if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) ||
2365         MRI.isPhysRegUsed(Reg))
2366       BVAllocatable.reset(Reg);
2367   }
2368 
2369   bool AllSpilledToReg = true;
2370   unsigned LastVSRUsedForSpill = 0;
2371   for (auto &CS : CSI) {
2372     if (BVAllocatable.none())
2373       return false;
2374 
2375     Register Reg = CS.getReg();
2376 
2377     if (!PPC::G8RCRegClass.contains(Reg)) {
2378       AllSpilledToReg = false;
2379       continue;
2380     }
2381 
2382     // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs
2383     // into one VSR using the mtvsrdd instruction.
2384     if (LastVSRUsedForSpill != 0) {
2385       CS.setDstReg(LastVSRUsedForSpill);
2386       BVAllocatable.reset(LastVSRUsedForSpill);
2387       LastVSRUsedForSpill = 0;
2388       continue;
2389     }
2390 
2391     unsigned VolatileVFReg = BVAllocatable.find_first();
2392     if (VolatileVFReg < BVAllocatable.size()) {
2393       CS.setDstReg(VolatileVFReg);
2394       LastVSRUsedForSpill = VolatileVFReg;
2395     } else {
2396       AllSpilledToReg = false;
2397     }
2398   }
2399   return AllSpilledToReg;
2400 }
2401 
2402 bool PPCFrameLowering::spillCalleeSavedRegisters(
2403     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2404     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2405 
2406   MachineFunction *MF = MBB.getParent();
2407   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2408   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2409   bool MustSaveTOC = FI->mustSaveTOC();
2410   DebugLoc DL;
2411   bool CRSpilled = false;
2412   MachineInstrBuilder CRMIB;
2413   BitVector Spilled(TRI->getNumRegs());
2414 
2415   VSRContainingGPRs.clear();
2416 
2417   // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one
2418   // or two GPRs, so we need table to record information for later save/restore.
2419   for (const CalleeSavedInfo &Info : CSI) {
2420     if (Info.isSpilledToReg()) {
2421       auto &SpilledVSR =
2422           VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second;
2423       assert(SpilledVSR.second == 0 &&
2424              "Can't spill more than two GPRs into VSR!");
2425       if (SpilledVSR.first == 0)
2426         SpilledVSR.first = Info.getReg();
2427       else
2428         SpilledVSR.second = Info.getReg();
2429     }
2430   }
2431 
2432   for (const CalleeSavedInfo &I : CSI) {
2433     Register Reg = I.getReg();
2434 
2435     // CR2 through CR4 are the nonvolatile CR fields.
2436     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2437 
2438     // Add the callee-saved register as live-in; it's killed at the spill.
2439     // Do not do this for callee-saved registers that are live-in to the
2440     // function because they will already be marked live-in and this will be
2441     // adding it for a second time. It is an error to add the same register
2442     // to the set more than once.
2443     const MachineRegisterInfo &MRI = MF->getRegInfo();
2444     bool IsLiveIn = MRI.isLiveIn(Reg);
2445     if (!IsLiveIn)
2446        MBB.addLiveIn(Reg);
2447 
2448     if (CRSpilled && IsCRField) {
2449       CRMIB.addReg(Reg, RegState::ImplicitKill);
2450       continue;
2451     }
2452 
2453     // The actual spill will happen in the prologue.
2454     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2455       continue;
2456 
2457     // Insert the spill to the stack frame.
2458     if (IsCRField) {
2459       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2460       if (!Subtarget.is32BitELFABI()) {
2461         // The actual spill will happen at the start of the prologue.
2462         FuncInfo->addMustSaveCR(Reg);
2463       } else {
2464         CRSpilled = true;
2465         FuncInfo->setSpillsCR();
2466 
2467         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2468         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2469         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2470                   .addReg(Reg, RegState::ImplicitKill);
2471 
2472         MBB.insert(MI, CRMIB);
2473         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2474                                          .addReg(PPC::R12,
2475                                                  getKillRegState(true)),
2476                                          I.getFrameIdx()));
2477       }
2478     } else {
2479       if (I.isSpilledToReg()) {
2480         unsigned Dst = I.getDstReg();
2481 
2482         if (Spilled[Dst])
2483           continue;
2484 
2485         if (VSRContainingGPRs[Dst].second != 0) {
2486           assert(Subtarget.hasP9Vector() &&
2487                  "mtvsrdd is unavailable on pre-P9 targets.");
2488 
2489           NumPESpillVSR += 2;
2490           BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst)
2491               .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true))
2492               .addReg(VSRContainingGPRs[Dst].second, getKillRegState(true));
2493         } else if (VSRContainingGPRs[Dst].second == 0) {
2494           assert(Subtarget.hasP8Vector() &&
2495                  "Can't move GPR to VSR on pre-P8 targets.");
2496 
2497           ++NumPESpillVSR;
2498           BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD),
2499                   TRI->getSubReg(Dst, PPC::sub_64))
2500               .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true));
2501         } else {
2502           llvm_unreachable("More than two GPRs spilled to a VSR!");
2503         }
2504         Spilled.set(Dst);
2505       } else {
2506         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2507         // Use !IsLiveIn for the kill flag.
2508         // We do not want to kill registers that are live in this function
2509         // before their use because they will become undefined registers.
2510         // Functions without NoUnwind need to preserve the order of elements in
2511         // saved vector registers.
2512         if (Subtarget.needsSwapsForVSXMemOps() &&
2513             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2514           TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2515                                        I.getFrameIdx(), RC, TRI);
2516         else
2517           TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), RC,
2518                                   TRI, Register());
2519       }
2520     }
2521   }
2522   return true;
2523 }
2524 
2525 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2526                        bool CR4Spilled, MachineBasicBlock &MBB,
2527                        MachineBasicBlock::iterator MI,
2528                        ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2529 
2530   MachineFunction *MF = MBB.getParent();
2531   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2532   DebugLoc DL;
2533   unsigned MoveReg = PPC::R12;
2534 
2535   // 32-bit:  FP-relative
2536   MBB.insert(MI,
2537              addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2538                                CSI[CSIIndex].getFrameIdx()));
2539 
2540   unsigned RestoreOp = PPC::MTOCRF;
2541   if (CR2Spilled)
2542     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2543                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2544 
2545   if (CR3Spilled)
2546     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2547                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2548 
2549   if (CR4Spilled)
2550     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2551                .addReg(MoveReg, getKillRegState(true)));
2552 }
2553 
2554 MachineBasicBlock::iterator PPCFrameLowering::
2555 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2556                               MachineBasicBlock::iterator I) const {
2557   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2558   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2559       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2560     // Add (actually subtract) back the amount the callee popped on return.
2561     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2562       bool is64Bit = Subtarget.isPPC64();
2563       CalleeAmt *= -1;
2564       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2565       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2566       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2567       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2568       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2569       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2570       const DebugLoc &dl = I->getDebugLoc();
2571 
2572       if (isInt<16>(CalleeAmt)) {
2573         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2574           .addReg(StackReg, RegState::Kill)
2575           .addImm(CalleeAmt);
2576       } else {
2577         MachineBasicBlock::iterator MBBI = I;
2578         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2579           .addImm(CalleeAmt >> 16);
2580         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2581           .addReg(TmpReg, RegState::Kill)
2582           .addImm(CalleeAmt & 0xFFFF);
2583         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2584           .addReg(StackReg, RegState::Kill)
2585           .addReg(TmpReg);
2586       }
2587     }
2588   }
2589   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2590   return MBB.erase(I);
2591 }
2592 
2593 static bool isCalleeSavedCR(unsigned Reg) {
2594   return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2595 }
2596 
2597 bool PPCFrameLowering::restoreCalleeSavedRegisters(
2598     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2599     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2600   MachineFunction *MF = MBB.getParent();
2601   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2602   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2603   bool MustSaveTOC = FI->mustSaveTOC();
2604   bool CR2Spilled = false;
2605   bool CR3Spilled = false;
2606   bool CR4Spilled = false;
2607   unsigned CSIIndex = 0;
2608   BitVector Restored(TRI->getNumRegs());
2609 
2610   // Initialize insertion-point logic; we will be restoring in reverse
2611   // order of spill.
2612   MachineBasicBlock::iterator I = MI, BeforeI = I;
2613   bool AtStart = I == MBB.begin();
2614 
2615   if (!AtStart)
2616     --BeforeI;
2617 
2618   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2619     Register Reg = CSI[i].getReg();
2620 
2621     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2622       continue;
2623 
2624     // Restore of callee saved condition register field is handled during
2625     // epilogue insertion.
2626     if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2627       continue;
2628 
2629     if (Reg == PPC::CR2) {
2630       CR2Spilled = true;
2631       // The spill slot is associated only with CR2, which is the
2632       // first nonvolatile spilled.  Save it here.
2633       CSIIndex = i;
2634       continue;
2635     } else if (Reg == PPC::CR3) {
2636       CR3Spilled = true;
2637       continue;
2638     } else if (Reg == PPC::CR4) {
2639       CR4Spilled = true;
2640       continue;
2641     } else {
2642       // On 32-bit ELF when we first encounter a non-CR register after seeing at
2643       // least one CR register, restore all spilled CRs together.
2644       if (CR2Spilled || CR3Spilled || CR4Spilled) {
2645         bool is31 = needsFP(*MF);
2646         restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2647                    CSIIndex);
2648         CR2Spilled = CR3Spilled = CR4Spilled = false;
2649       }
2650 
2651       if (CSI[i].isSpilledToReg()) {
2652         DebugLoc DL;
2653         unsigned Dst = CSI[i].getDstReg();
2654 
2655         if (Restored[Dst])
2656           continue;
2657 
2658         if (VSRContainingGPRs[Dst].second != 0) {
2659           assert(Subtarget.hasP9Vector());
2660           NumPEReloadVSR += 2;
2661           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD),
2662                   VSRContainingGPRs[Dst].second)
2663               .addReg(Dst);
2664           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2665                   VSRContainingGPRs[Dst].first)
2666               .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2667         } else if (VSRContainingGPRs[Dst].second == 0) {
2668           assert(Subtarget.hasP8Vector());
2669           ++NumPEReloadVSR;
2670           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2671                   VSRContainingGPRs[Dst].first)
2672               .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2673         } else {
2674           llvm_unreachable("More than two GPRs spilled to a VSR!");
2675         }
2676 
2677         Restored.set(Dst);
2678 
2679       } else {
2680        // Default behavior for non-CR saves.
2681         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2682 
2683         // Functions without NoUnwind need to preserve the order of elements in
2684         // saved vector registers.
2685         if (Subtarget.needsSwapsForVSXMemOps() &&
2686             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2687           TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2688                                         TRI);
2689         else
2690           TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI,
2691                                    Register());
2692 
2693         assert(I != MBB.begin() &&
2694                "loadRegFromStackSlot didn't insert any code!");
2695       }
2696     }
2697 
2698     // Insert in reverse order.
2699     if (AtStart)
2700       I = MBB.begin();
2701     else {
2702       I = BeforeI;
2703       ++I;
2704     }
2705   }
2706 
2707   // If we haven't yet spilled the CRs, do so now.
2708   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2709     assert(Subtarget.is32BitELFABI() &&
2710            "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2711     bool is31 = needsFP(*MF);
2712     restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2713   }
2714 
2715   return true;
2716 }
2717 
2718 uint64_t PPCFrameLowering::getTOCSaveOffset() const {
2719   return TOCSaveOffset;
2720 }
2721 
2722 uint64_t PPCFrameLowering::getFramePointerSaveOffset() const {
2723   return FramePointerSaveOffset;
2724 }
2725 
2726 uint64_t PPCFrameLowering::getBasePointerSaveOffset() const {
2727   return BasePointerSaveOffset;
2728 }
2729 
2730 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2731   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2732     return false;
2733   return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI();
2734 }
2735 
2736 uint64_t PPCFrameLowering::getStackThreshold() const {
2737   // On PPC64, we use `stux r1, r1, <scratch_reg>` to extend the stack;
2738   // use `add r1, r1, <scratch_reg>` to release the stack frame.
2739   // Scratch register contains a signed 64-bit number, which is negative
2740   // when extending the stack and is positive when releasing the stack frame.
2741   // To make `stux` and `add` paired, the absolute value of the number contained
2742   // in the scratch register should be the same. Thus the maximum stack size
2743   // is (2^63)-1, i.e., LONG_MAX.
2744   if (Subtarget.isPPC64())
2745     return LONG_MAX;
2746 
2747   return TargetFrameLowering::getStackThreshold();
2748 }
2749