1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCFrameLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
15 #include "PPCInstrBuilder.h"
16 #include "PPCInstrInfo.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCSubtarget.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/LivePhysRegs.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineModuleInfo.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/RegisterScavenging.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/Target/TargetOptions.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "framelowering"
34 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
35 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
36 STATISTIC(NumPrologProbed, "Number of prologues probed");
37 
38 static cl::opt<bool>
39 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
40                      cl::desc("Enable spills in prologue to vector registers."),
41                      cl::init(false), cl::Hidden);
42 
computeReturnSaveOffset(const PPCSubtarget & STI)43 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
44   if (STI.isAIXABI())
45     return STI.isPPC64() ? 16 : 8;
46   // SVR4 ABI:
47   return STI.isPPC64() ? 16 : 4;
48 }
49 
computeTOCSaveOffset(const PPCSubtarget & STI)50 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
51   if (STI.isAIXABI())
52     return STI.isPPC64() ? 40 : 20;
53   return STI.isELFv2ABI() ? 24 : 40;
54 }
55 
computeFramePointerSaveOffset(const PPCSubtarget & STI)56 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
57   // First slot in the general register save area.
58   return STI.isPPC64() ? -8U : -4U;
59 }
60 
computeLinkageSize(const PPCSubtarget & STI)61 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
62   if (STI.isAIXABI() || STI.isPPC64())
63     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
64 
65   // 32-bit SVR4 ABI:
66   return 8;
67 }
68 
computeBasePointerSaveOffset(const PPCSubtarget & STI)69 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
70   // Third slot in the general purpose register save area.
71   if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
72     return -12U;
73 
74   // Second slot in the general purpose register save area.
75   return STI.isPPC64() ? -16U : -8U;
76 }
77 
computeCRSaveOffset(const PPCSubtarget & STI)78 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
79   return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
80 }
81 
PPCFrameLowering(const PPCSubtarget & STI)82 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
83     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
84                           STI.getPlatformStackAlignment(), 0),
85       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
86       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
87       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
88       LinkageSize(computeLinkageSize(Subtarget)),
89       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
90       CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
91 
92 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
getCalleeSavedSpillSlots(unsigned & NumEntries) const93 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
94     unsigned &NumEntries) const {
95 
96 // Floating-point register save area offsets.
97 #define CALLEE_SAVED_FPRS \
98       {PPC::F31, -8},     \
99       {PPC::F30, -16},    \
100       {PPC::F29, -24},    \
101       {PPC::F28, -32},    \
102       {PPC::F27, -40},    \
103       {PPC::F26, -48},    \
104       {PPC::F25, -56},    \
105       {PPC::F24, -64},    \
106       {PPC::F23, -72},    \
107       {PPC::F22, -80},    \
108       {PPC::F21, -88},    \
109       {PPC::F20, -96},    \
110       {PPC::F19, -104},   \
111       {PPC::F18, -112},   \
112       {PPC::F17, -120},   \
113       {PPC::F16, -128},   \
114       {PPC::F15, -136},   \
115       {PPC::F14, -144}
116 
117 // 32-bit general purpose register save area offsets shared by ELF and
118 // AIX. AIX has an extra CSR with r13.
119 #define CALLEE_SAVED_GPRS32 \
120       {PPC::R31, -4},       \
121       {PPC::R30, -8},       \
122       {PPC::R29, -12},      \
123       {PPC::R28, -16},      \
124       {PPC::R27, -20},      \
125       {PPC::R26, -24},      \
126       {PPC::R25, -28},      \
127       {PPC::R24, -32},      \
128       {PPC::R23, -36},      \
129       {PPC::R22, -40},      \
130       {PPC::R21, -44},      \
131       {PPC::R20, -48},      \
132       {PPC::R19, -52},      \
133       {PPC::R18, -56},      \
134       {PPC::R17, -60},      \
135       {PPC::R16, -64},      \
136       {PPC::R15, -68},      \
137       {PPC::R14, -72}
138 
139 // 64-bit general purpose register save area offsets.
140 #define CALLEE_SAVED_GPRS64 \
141       {PPC::X31, -8},       \
142       {PPC::X30, -16},      \
143       {PPC::X29, -24},      \
144       {PPC::X28, -32},      \
145       {PPC::X27, -40},      \
146       {PPC::X26, -48},      \
147       {PPC::X25, -56},      \
148       {PPC::X24, -64},      \
149       {PPC::X23, -72},      \
150       {PPC::X22, -80},      \
151       {PPC::X21, -88},      \
152       {PPC::X20, -96},      \
153       {PPC::X19, -104},     \
154       {PPC::X18, -112},     \
155       {PPC::X17, -120},     \
156       {PPC::X16, -128},     \
157       {PPC::X15, -136},     \
158       {PPC::X14, -144}
159 
160 // Vector register save area offsets.
161 #define CALLEE_SAVED_VRS \
162       {PPC::V31, -16},   \
163       {PPC::V30, -32},   \
164       {PPC::V29, -48},   \
165       {PPC::V28, -64},   \
166       {PPC::V27, -80},   \
167       {PPC::V26, -96},   \
168       {PPC::V25, -112},  \
169       {PPC::V24, -128},  \
170       {PPC::V23, -144},  \
171       {PPC::V22, -160},  \
172       {PPC::V21, -176},  \
173       {PPC::V20, -192}
174 
175   // Note that the offsets here overlap, but this is fixed up in
176   // processFunctionBeforeFrameFinalized.
177 
178   static const SpillSlot ELFOffsets32[] = {
179       CALLEE_SAVED_FPRS,
180       CALLEE_SAVED_GPRS32,
181 
182       // CR save area offset.  We map each of the nonvolatile CR fields
183       // to the slot for CR2, which is the first of the nonvolatile CR
184       // fields to be assigned, so that we only allocate one save slot.
185       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
186       {PPC::CR2, -4},
187 
188       // VRSAVE save area offset.
189       {PPC::VRSAVE, -4},
190 
191       CALLEE_SAVED_VRS,
192 
193       // SPE register save area (overlaps Vector save area).
194       {PPC::S31, -8},
195       {PPC::S30, -16},
196       {PPC::S29, -24},
197       {PPC::S28, -32},
198       {PPC::S27, -40},
199       {PPC::S26, -48},
200       {PPC::S25, -56},
201       {PPC::S24, -64},
202       {PPC::S23, -72},
203       {PPC::S22, -80},
204       {PPC::S21, -88},
205       {PPC::S20, -96},
206       {PPC::S19, -104},
207       {PPC::S18, -112},
208       {PPC::S17, -120},
209       {PPC::S16, -128},
210       {PPC::S15, -136},
211       {PPC::S14, -144}};
212 
213   static const SpillSlot ELFOffsets64[] = {
214       CALLEE_SAVED_FPRS,
215       CALLEE_SAVED_GPRS64,
216 
217       // VRSAVE save area offset.
218       {PPC::VRSAVE, -4},
219       CALLEE_SAVED_VRS
220   };
221 
222   static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS,
223                                            CALLEE_SAVED_GPRS32,
224                                            // Add AIX's extra CSR.
225                                            {PPC::R13, -76},
226                                            CALLEE_SAVED_VRS};
227 
228   static const SpillSlot AIXOffsets64[] = {
229       CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS};
230 
231   if (Subtarget.is64BitELFABI()) {
232     NumEntries = std::size(ELFOffsets64);
233     return ELFOffsets64;
234   }
235 
236   if (Subtarget.is32BitELFABI()) {
237     NumEntries = std::size(ELFOffsets32);
238     return ELFOffsets32;
239   }
240 
241   assert(Subtarget.isAIXABI() && "Unexpected ABI.");
242 
243   if (Subtarget.isPPC64()) {
244     NumEntries = std::size(AIXOffsets64);
245     return AIXOffsets64;
246   }
247 
248   NumEntries = std::size(AIXOffsets32);
249   return AIXOffsets32;
250 }
251 
spillsCR(const MachineFunction & MF)252 static bool spillsCR(const MachineFunction &MF) {
253   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
254   return FuncInfo->isCRSpilled();
255 }
256 
hasSpills(const MachineFunction & MF)257 static bool hasSpills(const MachineFunction &MF) {
258   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
259   return FuncInfo->hasSpills();
260 }
261 
hasNonRISpills(const MachineFunction & MF)262 static bool hasNonRISpills(const MachineFunction &MF) {
263   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
264   return FuncInfo->hasNonRISpills();
265 }
266 
267 /// MustSaveLR - Return true if this function requires that we save the LR
268 /// register onto the stack in the prolog and restore it in the epilog of the
269 /// function.
MustSaveLR(const MachineFunction & MF,unsigned LR)270 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
271   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
272 
273   // We need a save/restore of LR if there is any def of LR (which is
274   // defined by calls, including the PIC setup sequence), or if there is
275   // some use of the LR stack slot (e.g. for builtin_return_address).
276   // (LR comes in 32 and 64 bit versions.)
277   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
278   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
279 }
280 
281 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
282 /// call frame size. Update the MachineFunction object with the stack size.
283 uint64_t
determineFrameLayoutAndUpdate(MachineFunction & MF,bool UseEstimate) const284 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
285                                                 bool UseEstimate) const {
286   unsigned NewMaxCallFrameSize = 0;
287   uint64_t FrameSize = determineFrameLayout(MF, UseEstimate,
288                                             &NewMaxCallFrameSize);
289   MF.getFrameInfo().setStackSize(FrameSize);
290   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
291   return FrameSize;
292 }
293 
294 /// determineFrameLayout - Determine the size of the frame and maximum call
295 /// frame size.
296 uint64_t
determineFrameLayout(const MachineFunction & MF,bool UseEstimate,unsigned * NewMaxCallFrameSize) const297 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
298                                        bool UseEstimate,
299                                        unsigned *NewMaxCallFrameSize) const {
300   const MachineFrameInfo &MFI = MF.getFrameInfo();
301   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
302 
303   // Get the number of bytes to allocate from the FrameInfo
304   uint64_t FrameSize =
305     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
306 
307   // Get stack alignments. The frame must be aligned to the greatest of these:
308   Align TargetAlign = getStackAlign(); // alignment required per the ABI
309   Align MaxAlign = MFI.getMaxAlign();  // algmt required by data in frame
310   Align Alignment = std::max(TargetAlign, MaxAlign);
311 
312   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
313 
314   unsigned LR = RegInfo->getRARegister();
315   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
316   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
317                        !MFI.adjustsStack() &&       // No calls.
318                        !MustSaveLR(MF, LR) &&       // No need to save LR.
319                        !FI->mustSaveTOC() &&        // No need to save TOC.
320                        !RegInfo->hasBasePointer(MF); // No special alignment.
321 
322   // Note: for PPC32 SVR4ABI, we can still generate stackless
323   // code if all local vars are reg-allocated.
324   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
325 
326   // Check whether we can skip adjusting the stack pointer (by using red zone)
327   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
328     // No need for frame
329     return 0;
330   }
331 
332   // Get the maximum call frame size of all the calls.
333   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
334 
335   // Maximum call frame needs to be at least big enough for linkage area.
336   unsigned minCallFrameSize = getLinkageSize();
337   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
338 
339   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
340   // that allocations will be aligned.
341   if (MFI.hasVarSizedObjects())
342     maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
343 
344   // Update the new max call frame size if the caller passes in a valid pointer.
345   if (NewMaxCallFrameSize)
346     *NewMaxCallFrameSize = maxCallFrameSize;
347 
348   // Include call frame size in total.
349   FrameSize += maxCallFrameSize;
350 
351   // Make sure the frame is aligned.
352   FrameSize = alignTo(FrameSize, Alignment);
353 
354   return FrameSize;
355 }
356 
357 // hasFP - Return true if the specified function actually has a dedicated frame
358 // pointer register.
hasFP(const MachineFunction & MF) const359 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
360   const MachineFrameInfo &MFI = MF.getFrameInfo();
361   // FIXME: This is pretty much broken by design: hasFP() might be called really
362   // early, before the stack layout was calculated and thus hasFP() might return
363   // true or false here depending on the time of call.
364   return (MFI.getStackSize()) && needsFP(MF);
365 }
366 
367 // needsFP - Return true if the specified function should have a dedicated frame
368 // pointer register.  This is true if the function has variable sized allocas or
369 // if frame pointer elimination is disabled.
needsFP(const MachineFunction & MF) const370 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
371   const MachineFrameInfo &MFI = MF.getFrameInfo();
372 
373   // Naked functions have no stack frame pushed, so we don't have a frame
374   // pointer.
375   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
376     return false;
377 
378   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
379          MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
380          MF.exposesReturnsTwice() ||
381          (MF.getTarget().Options.GuaranteedTailCallOpt &&
382           MF.getInfo<PPCFunctionInfo>()->hasFastCall());
383 }
384 
replaceFPWithRealFP(MachineFunction & MF) const385 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
386   bool is31 = needsFP(MF);
387   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
388   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
389 
390   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
391   bool HasBP = RegInfo->hasBasePointer(MF);
392   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
393   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
394 
395   for (MachineBasicBlock &MBB : MF)
396     for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) {
397       --MBBI;
398       for (MachineOperand &MO : MBBI->operands()) {
399         if (!MO.isReg())
400           continue;
401 
402         switch (MO.getReg()) {
403         case PPC::FP:
404           MO.setReg(FPReg);
405           break;
406         case PPC::FP8:
407           MO.setReg(FP8Reg);
408           break;
409         case PPC::BP:
410           MO.setReg(BPReg);
411           break;
412         case PPC::BP8:
413           MO.setReg(BP8Reg);
414           break;
415 
416         }
417       }
418     }
419 }
420 
421 /*  This function will do the following:
422     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
423       respectively (defaults recommended by the ABI) and return true
424     - If MBB is not an entry block, initialize the register scavenger and look
425       for available registers.
426     - If the defaults (R0/R12) are available, return true
427     - If TwoUniqueRegsRequired is set to true, it looks for two unique
428       registers. Otherwise, look for a single available register.
429       - If the required registers are found, set SR1 and SR2 and return true.
430       - If the required registers are not found, set SR2 or both SR1 and SR2 to
431         PPC::NoRegister and return false.
432 
433     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
434     is not set, this function will attempt to find two different registers, but
435     still return true if only one register is available (and set SR1 == SR2).
436 */
437 bool
findScratchRegister(MachineBasicBlock * MBB,bool UseAtEnd,bool TwoUniqueRegsRequired,Register * SR1,Register * SR2) const438 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
439                                       bool UseAtEnd,
440                                       bool TwoUniqueRegsRequired,
441                                       Register *SR1,
442                                       Register *SR2) const {
443   RegScavenger RS;
444   Register R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
445   Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
446 
447   // Set the defaults for the two scratch registers.
448   if (SR1)
449     *SR1 = R0;
450 
451   if (SR2) {
452     assert (SR1 && "Asking for the second scratch register but not the first?");
453     *SR2 = R12;
454   }
455 
456   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
457   if ((UseAtEnd && MBB->isReturnBlock()) ||
458       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
459     return true;
460 
461   if (UseAtEnd) {
462     // The scratch register will be used before the first terminator (or at the
463     // end of the block if there are no terminators).
464     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
465     if (MBBI == MBB->begin()) {
466       RS.enterBasicBlock(*MBB);
467     } else {
468       RS.enterBasicBlockEnd(*MBB);
469       RS.backward(MBBI);
470     }
471   } else {
472     // The scratch register will be used at the start of the block.
473     RS.enterBasicBlock(*MBB);
474   }
475 
476   // If the two registers are available, we're all good.
477   // Note that we only return here if both R0 and R12 are available because
478   // although the function may not require two unique registers, it may benefit
479   // from having two so we should try to provide them.
480   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
481     return true;
482 
483   // Get the list of callee-saved registers for the target.
484   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
485   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
486 
487   // Get all the available registers in the block.
488   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
489                                      &PPC::GPRCRegClass);
490 
491   // We shouldn't use callee-saved registers as scratch registers as they may be
492   // available when looking for a candidate block for shrink wrapping but not
493   // available when the actual prologue/epilogue is being emitted because they
494   // were added as live-in to the prologue block by PrologueEpilogueInserter.
495   for (int i = 0; CSRegs[i]; ++i)
496     BV.reset(CSRegs[i]);
497 
498   // Set the first scratch register to the first available one.
499   if (SR1) {
500     int FirstScratchReg = BV.find_first();
501     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
502   }
503 
504   // If there is another one available, set the second scratch register to that.
505   // Otherwise, set it to either PPC::NoRegister if this function requires two
506   // or to whatever SR1 is set to if this function doesn't require two.
507   if (SR2) {
508     int SecondScratchReg = BV.find_next(*SR1);
509     if (SecondScratchReg != -1)
510       *SR2 = SecondScratchReg;
511     else
512       *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
513   }
514 
515   // Now that we've done our best to provide both registers, double check
516   // whether we were unable to provide enough.
517   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
518     return false;
519 
520   return true;
521 }
522 
523 // We need a scratch register for spilling LR and for spilling CR. By default,
524 // we use two scratch registers to hide latency. However, if only one scratch
525 // register is available, we can adjust for that by not overlapping the spill
526 // code. However, if we need to realign the stack (i.e. have a base pointer)
527 // and the stack frame is large, we need two scratch registers.
528 // Also, stack probe requires two scratch registers, one for old sp, one for
529 // large frame and large probe size.
530 bool
twoUniqueScratchRegsRequired(MachineBasicBlock * MBB) const531 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
532   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
533   MachineFunction &MF = *(MBB->getParent());
534   bool HasBP = RegInfo->hasBasePointer(MF);
535   unsigned FrameSize = determineFrameLayout(MF);
536   int NegFrameSize = -FrameSize;
537   bool IsLargeFrame = !isInt<16>(NegFrameSize);
538   MachineFrameInfo &MFI = MF.getFrameInfo();
539   Align MaxAlign = MFI.getMaxAlign();
540   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
541   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
542 
543   return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) ||
544          TLI.hasInlineStackProbe(MF);
545 }
546 
canUseAsPrologue(const MachineBasicBlock & MBB) const547 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
548   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
549 
550   return findScratchRegister(TmpMBB, false,
551                              twoUniqueScratchRegsRequired(TmpMBB));
552 }
553 
canUseAsEpilogue(const MachineBasicBlock & MBB) const554 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
555   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
556 
557   return findScratchRegister(TmpMBB, true);
558 }
559 
stackUpdateCanBeMoved(MachineFunction & MF) const560 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
561   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
562   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
563 
564   // Abort if there is no register info or function info.
565   if (!RegInfo || !FI)
566     return false;
567 
568   // Only move the stack update on ELFv2 ABI and PPC64.
569   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
570     return false;
571 
572   // Check the frame size first and return false if it does not fit the
573   // requirements.
574   // We need a non-zero frame size as well as a frame that will fit in the red
575   // zone. This is because by moving the stack pointer update we are now storing
576   // to the red zone until the stack pointer is updated. If we get an interrupt
577   // inside the prologue but before the stack update we now have a number of
578   // stores to the red zone and those stores must all fit.
579   MachineFrameInfo &MFI = MF.getFrameInfo();
580   unsigned FrameSize = MFI.getStackSize();
581   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
582     return false;
583 
584   // Frame pointers and base pointers complicate matters so don't do anything
585   // if we have them. For example having a frame pointer will sometimes require
586   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
587   // difficult. Similar situation exists with setjmp.
588   if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice())
589     return false;
590 
591   // Calls to fast_cc functions use different rules for passing parameters on
592   // the stack from the ABI and using PIC base in the function imposes
593   // similar restrictions to using the base pointer. It is not generally safe
594   // to move the stack pointer update in these situations.
595   if (FI->hasFastCall() || FI->usesPICBase())
596     return false;
597 
598   // Finally we can move the stack update if we do not require register
599   // scavenging. Register scavenging can introduce more spills and so
600   // may make the frame size larger than we have computed.
601   return !RegInfo->requiresFrameIndexScavenging(MF);
602 }
603 
emitPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const604 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
605                                     MachineBasicBlock &MBB) const {
606   MachineBasicBlock::iterator MBBI = MBB.begin();
607   MachineFrameInfo &MFI = MF.getFrameInfo();
608   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
609   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
610   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
611 
612   MachineModuleInfo &MMI = MF.getMMI();
613   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
614   DebugLoc dl;
615   // AIX assembler does not support cfi directives.
616   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
617 
618   const bool HasFastMFLR = Subtarget.hasFastMFLR();
619 
620   // Get processor type.
621   bool isPPC64 = Subtarget.isPPC64();
622   // Get the ABI.
623   bool isSVR4ABI = Subtarget.isSVR4ABI();
624   bool isELFv2ABI = Subtarget.isELFv2ABI();
625   assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.");
626 
627   // Work out frame sizes.
628   uint64_t FrameSize = determineFrameLayoutAndUpdate(MF);
629   int64_t NegFrameSize = -FrameSize;
630   if (!isPPC64 && (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize)))
631     llvm_unreachable("Unhandled stack size!");
632 
633   if (MFI.isFrameAddressTaken())
634     replaceFPWithRealFP(MF);
635 
636   // Check if the link register (LR) must be saved.
637   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
638   bool MustSaveLR = FI->mustSaveLR();
639   bool MustSaveTOC = FI->mustSaveTOC();
640   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
641   bool MustSaveCR = !MustSaveCRs.empty();
642   // Do we have a frame pointer and/or base pointer for this function?
643   bool HasFP = hasFP(MF);
644   bool HasBP = RegInfo->hasBasePointer(MF);
645   bool HasRedZone = isPPC64 || !isSVR4ABI;
646   bool HasROPProtect = Subtarget.hasROPProtect();
647   bool HasPrivileged = Subtarget.hasPrivileged();
648 
649   Register SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
650   Register BPReg = RegInfo->getBaseRegister(MF);
651   Register FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
652   Register LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
653   Register TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
654   Register ScratchReg;
655   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
656   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
657   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
658                                                 : PPC::MFLR );
659   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
660                                                  : PPC::STW );
661   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
662                                                      : PPC::STWU );
663   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
664                                                         : PPC::STWUX);
665   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
666                                               : PPC::OR );
667   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
668                                                             : PPC::SUBFC);
669   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
670                                                                : PPC::SUBFIC);
671   const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
672                                                            : PPC::MFCR);
673   const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
674   const MCInstrDesc &HashST =
675       TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHSTP8 : PPC::HASHST8)
676                       : (HasPrivileged ? PPC::HASHSTP : PPC::HASHST));
677 
678   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
679   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
680   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
681   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
682   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
683          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
684 
685   // Using the same bool variable as below to suppress compiler warnings.
686   bool SingleScratchReg = findScratchRegister(
687       &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg);
688   assert(SingleScratchReg &&
689          "Required number of registers not available in this block");
690 
691   SingleScratchReg = ScratchReg == TempReg;
692 
693   int64_t LROffset = getReturnSaveOffset();
694 
695   int64_t FPOffset = 0;
696   if (HasFP) {
697     MachineFrameInfo &MFI = MF.getFrameInfo();
698     int FPIndex = FI->getFramePointerSaveIndex();
699     assert(FPIndex && "No Frame Pointer Save Slot!");
700     FPOffset = MFI.getObjectOffset(FPIndex);
701   }
702 
703   int64_t BPOffset = 0;
704   if (HasBP) {
705     MachineFrameInfo &MFI = MF.getFrameInfo();
706     int BPIndex = FI->getBasePointerSaveIndex();
707     assert(BPIndex && "No Base Pointer Save Slot!");
708     BPOffset = MFI.getObjectOffset(BPIndex);
709   }
710 
711   int64_t PBPOffset = 0;
712   if (FI->usesPICBase()) {
713     MachineFrameInfo &MFI = MF.getFrameInfo();
714     int PBPIndex = FI->getPICBasePointerSaveIndex();
715     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
716     PBPOffset = MFI.getObjectOffset(PBPIndex);
717   }
718 
719   // Get stack alignments.
720   Align MaxAlign = MFI.getMaxAlign();
721   if (HasBP && MaxAlign > 1)
722     assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
723 
724   // Frames of 32KB & larger require special handling because they cannot be
725   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
726   bool isLargeFrame = !isInt<16>(NegFrameSize);
727 
728   // Check if we can move the stack update instruction (stdu) down the prologue
729   // past the callee saves. Hopefully this will avoid the situation where the
730   // saves are waiting for the update on the store with update to complete.
731   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
732   bool MovingStackUpdateDown = false;
733 
734   // Check if we can move the stack update.
735   if (stackUpdateCanBeMoved(MF)) {
736     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
737     for (CalleeSavedInfo CSI : Info) {
738       // If the callee saved register is spilled to a register instead of the
739       // stack then the spill no longer uses the stack pointer.
740       // This can lead to two consequences:
741       // 1) We no longer need to update the stack because the function does not
742       //    spill any callee saved registers to stack.
743       // 2) We have a situation where we still have to update the stack pointer
744       //    even though some registers are spilled to other registers. In
745       //    this case the current code moves the stack update to an incorrect
746       //    position.
747       // In either case we should abort moving the stack update operation.
748       if (CSI.isSpilledToReg()) {
749         StackUpdateLoc = MBBI;
750         MovingStackUpdateDown = false;
751         break;
752       }
753 
754       int FrIdx = CSI.getFrameIdx();
755       // If the frame index is not negative the callee saved info belongs to a
756       // stack object that is not a fixed stack object. We ignore non-fixed
757       // stack objects because we won't move the stack update pointer past them.
758       if (FrIdx >= 0)
759         continue;
760 
761       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
762         StackUpdateLoc++;
763         MovingStackUpdateDown = true;
764       } else {
765         // We need all of the Frame Indices to meet these conditions.
766         // If they do not, abort the whole operation.
767         StackUpdateLoc = MBBI;
768         MovingStackUpdateDown = false;
769         break;
770       }
771     }
772 
773     // If the operation was not aborted then update the object offset.
774     if (MovingStackUpdateDown) {
775       for (CalleeSavedInfo CSI : Info) {
776         int FrIdx = CSI.getFrameIdx();
777         if (FrIdx < 0)
778           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
779       }
780     }
781   }
782 
783   // Where in the prologue we move the CR fields depends on how many scratch
784   // registers we have, and if we need to save the link register or not. This
785   // lambda is to avoid duplicating the logic in 2 places.
786   auto BuildMoveFromCR = [&]() {
787     if (isELFv2ABI && MustSaveCRs.size() == 1) {
788     // In the ELFv2 ABI, we are not required to save all CR fields.
789     // If only one CR field is clobbered, it is more efficient to use
790     // mfocrf to selectively save just that field, because mfocrf has short
791     // latency compares to mfcr.
792       assert(isPPC64 && "V2 ABI is 64-bit only.");
793       MachineInstrBuilder MIB =
794           BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
795       MIB.addReg(MustSaveCRs[0], RegState::Kill);
796     } else {
797       MachineInstrBuilder MIB =
798           BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
799       for (unsigned CRfield : MustSaveCRs)
800         MIB.addReg(CRfield, RegState::ImplicitKill);
801     }
802   };
803 
804   // If we need to spill the CR and the LR but we don't have two separate
805   // registers available, we must spill them one at a time
806   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
807     BuildMoveFromCR();
808     BuildMI(MBB, MBBI, dl, StoreWordInst)
809         .addReg(TempReg, getKillRegState(true))
810         .addImm(CRSaveOffset)
811         .addReg(SPReg);
812   }
813 
814   if (MustSaveLR)
815     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
816 
817   if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
818     BuildMoveFromCR();
819 
820   if (HasRedZone) {
821     if (HasFP)
822       BuildMI(MBB, MBBI, dl, StoreInst)
823         .addReg(FPReg)
824         .addImm(FPOffset)
825         .addReg(SPReg);
826     if (FI->usesPICBase())
827       BuildMI(MBB, MBBI, dl, StoreInst)
828         .addReg(PPC::R30)
829         .addImm(PBPOffset)
830         .addReg(SPReg);
831     if (HasBP)
832       BuildMI(MBB, MBBI, dl, StoreInst)
833         .addReg(BPReg)
834         .addImm(BPOffset)
835         .addReg(SPReg);
836   }
837 
838   // Generate the instruction to store the LR. In the case where ROP protection
839   // is required the register holding the LR should not be killed as it will be
840   // used by the hash store instruction.
841   auto SaveLR = [&](int64_t Offset) {
842     assert(MustSaveLR && "LR is not required to be saved!");
843     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
844         .addReg(ScratchReg, getKillRegState(!HasROPProtect))
845         .addImm(Offset)
846         .addReg(SPReg);
847 
848     // Add the ROP protection Hash Store instruction.
849     // NOTE: This is technically a violation of the ABI. The hash can be saved
850     // up to 512 bytes into the Protected Zone. This can be outside of the
851     // initial 288 byte volatile program storage region in the Protected Zone.
852     // However, this restriction will be removed in an upcoming revision of the
853     // ABI.
854     if (HasROPProtect) {
855       const int SaveIndex = FI->getROPProtectionHashSaveIndex();
856       const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
857       assert((ImmOffset <= -8 && ImmOffset >= -512) &&
858              "ROP hash save offset out of range.");
859       assert(((ImmOffset & 0x7) == 0) &&
860              "ROP hash save offset must be 8 byte aligned.");
861       BuildMI(MBB, StackUpdateLoc, dl, HashST)
862           .addReg(ScratchReg, getKillRegState(true))
863           .addImm(ImmOffset)
864           .addReg(SPReg);
865     }
866   };
867 
868   if (MustSaveLR && HasFastMFLR)
869       SaveLR(LROffset);
870 
871   if (MustSaveCR &&
872       !(SingleScratchReg && MustSaveLR)) {
873     assert(HasRedZone && "A red zone is always available on PPC64");
874     BuildMI(MBB, MBBI, dl, StoreWordInst)
875       .addReg(TempReg, getKillRegState(true))
876       .addImm(CRSaveOffset)
877       .addReg(SPReg);
878   }
879 
880   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
881   if (!FrameSize) {
882     if (MustSaveLR && !HasFastMFLR)
883       SaveLR(LROffset);
884     return;
885   }
886 
887   // Adjust stack pointer: r1 += NegFrameSize.
888   // If there is a preferred stack alignment, align R1 now
889 
890   if (HasBP && HasRedZone) {
891     // Save a copy of r1 as the base pointer.
892     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
893       .addReg(SPReg)
894       .addReg(SPReg);
895   }
896 
897   // Have we generated a STUX instruction to claim stack frame? If so,
898   // the negated frame size will be placed in ScratchReg.
899   bool HasSTUX =
900       (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) ||
901       (HasBP && MaxAlign > 1) || isLargeFrame;
902 
903   // If we use STUX to update the stack pointer, we need the two scratch
904   // registers TempReg and ScratchReg, we have to save LR here which is stored
905   // in ScratchReg.
906   // If the offset can not be encoded into the store instruction, we also have
907   // to save LR here.
908   if (MustSaveLR && !HasFastMFLR &&
909       (HasSTUX || !isInt<16>(FrameSize + LROffset)))
910     SaveLR(LROffset);
911 
912   // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
913   // pointer is always stored at SP, we will get a free probe due to an essential
914   // STU(X) instruction.
915   if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
916     // To be consistent with other targets, a pseudo instruction is emitted and
917     // will be later expanded in `inlineStackProbe`.
918     BuildMI(MBB, MBBI, dl,
919             TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
920                             : PPC::PROBED_STACKALLOC_32))
921         .addDef(TempReg)
922         .addDef(ScratchReg) // ScratchReg stores the old sp.
923         .addImm(NegFrameSize);
924     // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
925     // update the ScratchReg to meet the assumption that ScratchReg contains
926     // the NegFrameSize. This solution is rather tricky.
927     if (!HasRedZone) {
928       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
929           .addReg(ScratchReg)
930           .addReg(SPReg);
931     }
932   } else {
933     // This condition must be kept in sync with canUseAsPrologue.
934     if (HasBP && MaxAlign > 1) {
935       if (isPPC64)
936         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
937             .addReg(SPReg)
938             .addImm(0)
939             .addImm(64 - Log2(MaxAlign));
940       else // PPC32...
941         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
942             .addReg(SPReg)
943             .addImm(0)
944             .addImm(32 - Log2(MaxAlign))
945             .addImm(31);
946       if (!isLargeFrame) {
947         BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
948             .addReg(ScratchReg, RegState::Kill)
949             .addImm(NegFrameSize);
950       } else {
951         assert(!SingleScratchReg && "Only a single scratch reg available");
952         TII.materializeImmPostRA(MBB, MBBI, dl, TempReg, NegFrameSize);
953         BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
954             .addReg(ScratchReg, RegState::Kill)
955             .addReg(TempReg, RegState::Kill);
956       }
957 
958       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
959           .addReg(SPReg, RegState::Kill)
960           .addReg(SPReg)
961           .addReg(ScratchReg);
962     } else if (!isLargeFrame) {
963       BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
964           .addReg(SPReg)
965           .addImm(NegFrameSize)
966           .addReg(SPReg);
967     } else {
968       TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, NegFrameSize);
969       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
970           .addReg(SPReg, RegState::Kill)
971           .addReg(SPReg)
972           .addReg(ScratchReg);
973     }
974   }
975 
976   // Save the TOC register after the stack pointer update if a prologue TOC
977   // save is required for the function.
978   if (MustSaveTOC) {
979     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
980     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
981       .addReg(TOCReg, getKillRegState(true))
982       .addImm(TOCSaveOffset)
983       .addReg(SPReg);
984   }
985 
986   if (!HasRedZone) {
987     assert(!isPPC64 && "A red zone is always available on PPC64");
988     if (HasSTUX) {
989       // The negated frame size is in ScratchReg, and the SPReg has been
990       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
991       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
992       // the stack frame (i.e. the old SP), ideally, we would put the old
993       // SP into a register and use it as the base for the stores. The
994       // problem is that the only available register may be ScratchReg,
995       // which could be R0, and R0 cannot be used as a base address.
996 
997       // First, set ScratchReg to the old SP. This may need to be modified
998       // later.
999       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1000         .addReg(ScratchReg, RegState::Kill)
1001         .addReg(SPReg);
1002 
1003       if (ScratchReg == PPC::R0) {
1004         // R0 cannot be used as a base register, but it can be used as an
1005         // index in a store-indexed.
1006         int LastOffset = 0;
1007         if (HasFP)  {
1008           // R0 += (FPOffset-LastOffset).
1009           // Need addic, since addi treats R0 as 0.
1010           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1011             .addReg(ScratchReg)
1012             .addImm(FPOffset-LastOffset);
1013           LastOffset = FPOffset;
1014           // Store FP into *R0.
1015           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1016             .addReg(FPReg, RegState::Kill)  // Save FP.
1017             .addReg(PPC::ZERO)
1018             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1019         }
1020         if (FI->usesPICBase()) {
1021           // R0 += (PBPOffset-LastOffset).
1022           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1023             .addReg(ScratchReg)
1024             .addImm(PBPOffset-LastOffset);
1025           LastOffset = PBPOffset;
1026           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1027             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1028             .addReg(PPC::ZERO)
1029             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1030         }
1031         if (HasBP) {
1032           // R0 += (BPOffset-LastOffset).
1033           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1034             .addReg(ScratchReg)
1035             .addImm(BPOffset-LastOffset);
1036           LastOffset = BPOffset;
1037           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1038             .addReg(BPReg, RegState::Kill)  // Save BP.
1039             .addReg(PPC::ZERO)
1040             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1041           // BP = R0-LastOffset
1042           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1043             .addReg(ScratchReg, RegState::Kill)
1044             .addImm(-LastOffset);
1045         }
1046       } else {
1047         // ScratchReg is not R0, so use it as the base register. It is
1048         // already set to the old SP, so we can use the offsets directly.
1049 
1050         // Now that the stack frame has been allocated, save all the necessary
1051         // registers using ScratchReg as the base address.
1052         if (HasFP)
1053           BuildMI(MBB, MBBI, dl, StoreInst)
1054             .addReg(FPReg)
1055             .addImm(FPOffset)
1056             .addReg(ScratchReg);
1057         if (FI->usesPICBase())
1058           BuildMI(MBB, MBBI, dl, StoreInst)
1059             .addReg(PPC::R30)
1060             .addImm(PBPOffset)
1061             .addReg(ScratchReg);
1062         if (HasBP) {
1063           BuildMI(MBB, MBBI, dl, StoreInst)
1064             .addReg(BPReg)
1065             .addImm(BPOffset)
1066             .addReg(ScratchReg);
1067           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1068             .addReg(ScratchReg, RegState::Kill)
1069             .addReg(ScratchReg);
1070         }
1071       }
1072     } else {
1073       // The frame size is a known 16-bit constant (fitting in the immediate
1074       // field of STWU). To be here we have to be compiling for PPC32.
1075       // Since the SPReg has been decreased by FrameSize, add it back to each
1076       // offset.
1077       if (HasFP)
1078         BuildMI(MBB, MBBI, dl, StoreInst)
1079           .addReg(FPReg)
1080           .addImm(FrameSize + FPOffset)
1081           .addReg(SPReg);
1082       if (FI->usesPICBase())
1083         BuildMI(MBB, MBBI, dl, StoreInst)
1084           .addReg(PPC::R30)
1085           .addImm(FrameSize + PBPOffset)
1086           .addReg(SPReg);
1087       if (HasBP) {
1088         BuildMI(MBB, MBBI, dl, StoreInst)
1089           .addReg(BPReg)
1090           .addImm(FrameSize + BPOffset)
1091           .addReg(SPReg);
1092         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1093           .addReg(SPReg)
1094           .addImm(FrameSize);
1095       }
1096     }
1097   }
1098 
1099   // Save the LR now.
1100   if (!HasSTUX && MustSaveLR && !HasFastMFLR && isInt<16>(FrameSize + LROffset))
1101     SaveLR(LROffset + FrameSize);
1102 
1103   // Add Call Frame Information for the instructions we generated above.
1104   if (needsCFI) {
1105     unsigned CFIIndex;
1106 
1107     if (HasBP) {
1108       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1109       // because if the stack needed aligning then CFA won't be at a fixed
1110       // offset from FP/SP.
1111       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1112       CFIIndex = MF.addFrameInst(
1113           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1114     } else {
1115       // Adjust the definition of CFA to account for the change in SP.
1116       assert(NegFrameSize);
1117       CFIIndex = MF.addFrameInst(
1118           MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1119     }
1120     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1121         .addCFIIndex(CFIIndex);
1122 
1123     if (HasFP) {
1124       // Describe where FP was saved, at a fixed offset from CFA.
1125       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1126       CFIIndex = MF.addFrameInst(
1127           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1128       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1129           .addCFIIndex(CFIIndex);
1130     }
1131 
1132     if (FI->usesPICBase()) {
1133       // Describe where FP was saved, at a fixed offset from CFA.
1134       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1135       CFIIndex = MF.addFrameInst(
1136           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1137       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1138           .addCFIIndex(CFIIndex);
1139     }
1140 
1141     if (HasBP) {
1142       // Describe where BP was saved, at a fixed offset from CFA.
1143       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1144       CFIIndex = MF.addFrameInst(
1145           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1146       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1147           .addCFIIndex(CFIIndex);
1148     }
1149 
1150     if (MustSaveLR) {
1151       // Describe where LR was saved, at a fixed offset from CFA.
1152       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1153       CFIIndex = MF.addFrameInst(
1154           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1155       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1156           .addCFIIndex(CFIIndex);
1157     }
1158   }
1159 
1160   // If there is a frame pointer, copy R1 into R31
1161   if (HasFP) {
1162     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1163       .addReg(SPReg)
1164       .addReg(SPReg);
1165 
1166     if (!HasBP && needsCFI) {
1167       // Change the definition of CFA from SP+offset to FP+offset, because SP
1168       // will change at every alloca.
1169       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1170       unsigned CFIIndex = MF.addFrameInst(
1171           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1172 
1173       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1174           .addCFIIndex(CFIIndex);
1175     }
1176   }
1177 
1178   if (needsCFI) {
1179     // Describe where callee saved registers were saved, at fixed offsets from
1180     // CFA.
1181     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1182     for (const CalleeSavedInfo &I : CSI) {
1183       Register Reg = I.getReg();
1184       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1185 
1186       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1187       // subregisters of CR2. We just need to emit a move of CR2.
1188       if (PPC::CRBITRCRegClass.contains(Reg))
1189         continue;
1190 
1191       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1192         continue;
1193 
1194       // For 64-bit SVR4 when we have spilled CRs, the spill location
1195       // is SP+8, not a frame-relative slot.
1196       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1197         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1198         // the whole CR word.  In the ELFv2 ABI, every CR that was
1199         // actually saved gets its own CFI record.
1200         Register CRReg = isELFv2ABI? Reg : PPC::CR2;
1201         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1202             nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1203         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1204             .addCFIIndex(CFIIndex);
1205         continue;
1206       }
1207 
1208       if (I.isSpilledToReg()) {
1209         unsigned SpilledReg = I.getDstReg();
1210         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1211             nullptr, MRI->getDwarfRegNum(Reg, true),
1212             MRI->getDwarfRegNum(SpilledReg, true)));
1213         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1214           .addCFIIndex(CFIRegister);
1215       } else {
1216         int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
1217         // We have changed the object offset above but we do not want to change
1218         // the actual offsets in the CFI instruction so we have to undo the
1219         // offset change here.
1220         if (MovingStackUpdateDown)
1221           Offset -= NegFrameSize;
1222 
1223         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1224             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1225         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1226             .addCFIIndex(CFIIndex);
1227       }
1228     }
1229   }
1230 }
1231 
inlineStackProbe(MachineFunction & MF,MachineBasicBlock & PrologMBB) const1232 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
1233                                         MachineBasicBlock &PrologMBB) const {
1234   bool isPPC64 = Subtarget.isPPC64();
1235   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
1236   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1237   MachineFrameInfo &MFI = MF.getFrameInfo();
1238   MachineModuleInfo &MMI = MF.getMMI();
1239   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
1240   // AIX assembler does not support cfi directives.
1241   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
1242   auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
1243     int Opc = MI.getOpcode();
1244     return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
1245   });
1246   if (StackAllocMIPos == PrologMBB.end())
1247     return;
1248   const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
1249   MachineBasicBlock *CurrentMBB = &PrologMBB;
1250   DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
1251   MachineInstr &MI = *StackAllocMIPos;
1252   int64_t NegFrameSize = MI.getOperand(2).getImm();
1253   unsigned ProbeSize = TLI.getStackProbeSize(MF);
1254   int64_t NegProbeSize = -(int64_t)ProbeSize;
1255   assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
1256   int64_t NumBlocks = NegFrameSize / NegProbeSize;
1257   int64_t NegResidualSize = NegFrameSize % NegProbeSize;
1258   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1259   Register ScratchReg = MI.getOperand(0).getReg();
1260   Register FPReg = MI.getOperand(1).getReg();
1261   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1262   bool HasBP = RegInfo->hasBasePointer(MF);
1263   Register BPReg = RegInfo->getBaseRegister(MF);
1264   Align MaxAlign = MFI.getMaxAlign();
1265   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1266   const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
1267   // Subroutines to generate .cfi_* directives.
1268   auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
1269                             MachineBasicBlock::iterator MBBI, Register Reg) {
1270     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1271     unsigned CFIIndex = MF.addFrameInst(
1272         MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
1273     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1274         .addCFIIndex(CFIIndex);
1275   };
1276   auto buildDefCFA = [&](MachineBasicBlock &MBB,
1277                          MachineBasicBlock::iterator MBBI, Register Reg,
1278                          int Offset) {
1279     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1280     unsigned CFIIndex = MBB.getParent()->addFrameInst(
1281         MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
1282     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1283         .addCFIIndex(CFIIndex);
1284   };
1285   // Subroutine to determine if we can use the Imm as part of d-form.
1286   auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
1287   // Subroutine to materialize the Imm into TempReg.
1288   auto MaterializeImm = [&](MachineBasicBlock &MBB,
1289                             MachineBasicBlock::iterator MBBI, int64_t Imm,
1290                             Register &TempReg) {
1291     assert(isInt<32>(Imm) && "Unhandled imm");
1292     if (isInt<16>(Imm))
1293       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
1294           .addImm(Imm);
1295     else {
1296       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
1297           .addImm(Imm >> 16);
1298       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
1299           .addReg(TempReg)
1300           .addImm(Imm & 0xFFFF);
1301     }
1302   };
1303   // Subroutine to store frame pointer and decrease stack pointer by probe size.
1304   auto allocateAndProbe = [&](MachineBasicBlock &MBB,
1305                               MachineBasicBlock::iterator MBBI, int64_t NegSize,
1306                               Register NegSizeReg, bool UseDForm,
1307                               Register StoreReg) {
1308     if (UseDForm)
1309       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
1310           .addReg(StoreReg)
1311           .addImm(NegSize)
1312           .addReg(SPReg);
1313     else
1314       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
1315           .addReg(StoreReg)
1316           .addReg(SPReg)
1317           .addReg(NegSizeReg);
1318   };
1319   // Used to probe stack when realignment is required.
1320   // Note that, according to ABI's requirement, *sp must always equals the
1321   // value of back-chain pointer, only st(w|d)u(x) can be used to update sp.
1322   // Following is pseudo code:
1323   // final_sp = (sp & align) + negframesize;
1324   // neg_gap = final_sp - sp;
1325   // while (neg_gap < negprobesize) {
1326   //   stdu fp, negprobesize(sp);
1327   //   neg_gap -= negprobesize;
1328   // }
1329   // stdux fp, sp, neg_gap
1330   //
1331   // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg
1332   // before probe code, we don't need to save it, so we get one additional reg
1333   // that can be used to materialize the probeside if needed to use xform.
1334   // Otherwise, we can NOT materialize probeside, so we can only use Dform for
1335   // now.
1336   //
1337   // The allocations are:
1338   // if (HasBP && HasRedzone) {
1339   //   r0: materialize the probesize if needed so that we can use xform.
1340   //   r12: `neg_gap`
1341   // } else {
1342   //   r0: back-chain pointer
1343   //   r12: `neg_gap`.
1344   // }
1345   auto probeRealignedStack = [&](MachineBasicBlock &MBB,
1346                                  MachineBasicBlock::iterator MBBI,
1347                                  Register ScratchReg, Register TempReg) {
1348     assert(HasBP && "The function is supposed to have base pointer when its "
1349                     "stack is realigned.");
1350     assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
1351 
1352     // FIXME: We can eliminate this limitation if we get more infomation about
1353     // which part of redzone are already used. Used redzone can be treated
1354     // probed. But there might be `holes' in redzone probed, this could
1355     // complicate the implementation.
1356     assert(ProbeSize >= Subtarget.getRedZoneSize() &&
1357            "Probe size should be larger or equal to the size of red-zone so "
1358            "that red-zone is not clobbered by probing.");
1359 
1360     Register &FinalStackPtr = TempReg;
1361     // FIXME: We only support NegProbeSize materializable by DForm currently.
1362     // When HasBP && HasRedzone, we can use xform if we have an additional idle
1363     // register.
1364     NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15));
1365     assert(isInt<16>(NegProbeSize) &&
1366            "NegProbeSize should be materializable by DForm");
1367     Register CRReg = PPC::CR0;
1368     // Layout of output assembly kinda like:
1369     // bb.0:
1370     //   ...
1371     //   sub $scratchreg, $finalsp, r1
1372     //   cmpdi $scratchreg, <negprobesize>
1373     //   bge bb.2
1374     // bb.1:
1375     //   stdu <backchain>, <negprobesize>(r1)
1376     //   sub $scratchreg, $scratchreg, negprobesize
1377     //   cmpdi $scratchreg, <negprobesize>
1378     //   blt bb.1
1379     // bb.2:
1380     //   stdux <backchain>, r1, $scratchreg
1381     MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
1382     MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
1383     MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
1384     MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1385     MF.insert(MBBInsertPoint, ProbeExitMBB);
1386     // bb.2
1387     {
1388       Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1389       allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false,
1390                        BackChainPointer);
1391       if (HasRedZone)
1392         // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg
1393         // to TempReg to satisfy it.
1394         BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg)
1395             .addReg(BPReg)
1396             .addReg(BPReg);
1397       ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
1398       ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
1399     }
1400     // bb.0
1401     {
1402       BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg)
1403           .addReg(SPReg)
1404           .addReg(FinalStackPtr);
1405       if (!HasRedZone)
1406         BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg);
1407       BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg)
1408           .addReg(ScratchReg)
1409           .addImm(NegProbeSize);
1410       BuildMI(&MBB, DL, TII.get(PPC::BCC))
1411           .addImm(PPC::PRED_GE)
1412           .addReg(CRReg)
1413           .addMBB(ProbeExitMBB);
1414       MBB.addSuccessor(ProbeLoopBodyMBB);
1415       MBB.addSuccessor(ProbeExitMBB);
1416     }
1417     // bb.1
1418     {
1419       Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1420       allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize,
1421                        0, true /*UseDForm*/, BackChainPointer);
1422       BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI),
1423               ScratchReg)
1424           .addReg(ScratchReg)
1425           .addImm(-NegProbeSize);
1426       BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI),
1427               CRReg)
1428           .addReg(ScratchReg)
1429           .addImm(NegProbeSize);
1430       BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
1431           .addImm(PPC::PRED_LT)
1432           .addReg(CRReg)
1433           .addMBB(ProbeLoopBodyMBB);
1434       ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
1435       ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
1436     }
1437     // Update liveins.
1438     bool anyChange = false;
1439     do {
1440       anyChange = recomputeLiveIns(*ProbeExitMBB) ||
1441                   recomputeLiveIns(*ProbeLoopBodyMBB);
1442     } while (anyChange);
1443     return ProbeExitMBB;
1444   };
1445   // For case HasBP && MaxAlign > 1, we have to realign the SP by performing
1446   // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since
1447   // the offset subtracted from SP is determined by SP's runtime value.
1448   if (HasBP && MaxAlign > 1) {
1449     // Calculate final stack pointer.
1450     if (isPPC64)
1451       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
1452           .addReg(SPReg)
1453           .addImm(0)
1454           .addImm(64 - Log2(MaxAlign));
1455     else
1456       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
1457           .addReg(SPReg)
1458           .addImm(0)
1459           .addImm(32 - Log2(MaxAlign))
1460           .addImm(31);
1461     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF),
1462             FPReg)
1463         .addReg(ScratchReg)
1464         .addReg(SPReg);
1465     MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg);
1466     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
1467             FPReg)
1468         .addReg(ScratchReg)
1469         .addReg(FPReg);
1470     CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg);
1471     if (needsCFI)
1472       buildDefCFAReg(*CurrentMBB, {MI}, FPReg);
1473   } else {
1474     // Initialize current frame pointer.
1475     BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
1476     // Use FPReg to calculate CFA.
1477     if (needsCFI)
1478       buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
1479     // Probe residual part.
1480     if (NegResidualSize) {
1481       bool ResidualUseDForm = CanUseDForm(NegResidualSize);
1482       if (!ResidualUseDForm)
1483         MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
1484       allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
1485                        ResidualUseDForm, FPReg);
1486     }
1487     bool UseDForm = CanUseDForm(NegProbeSize);
1488     // If number of blocks is small, just probe them directly.
1489     if (NumBlocks < 3) {
1490       if (!UseDForm)
1491         MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1492       for (int i = 0; i < NumBlocks; ++i)
1493         allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
1494                          FPReg);
1495       if (needsCFI) {
1496         // Restore using SPReg to calculate CFA.
1497         buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
1498       }
1499     } else {
1500       // Since CTR is a volatile register and current shrinkwrap implementation
1501       // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
1502       // CTR loop to probe.
1503       // Calculate trip count and stores it in CTRReg.
1504       MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
1505       BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
1506           .addReg(ScratchReg, RegState::Kill);
1507       if (!UseDForm)
1508         MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1509       // Create MBBs of the loop.
1510       MachineFunction::iterator MBBInsertPoint =
1511           std::next(CurrentMBB->getIterator());
1512       MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
1513       MF.insert(MBBInsertPoint, LoopMBB);
1514       MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1515       MF.insert(MBBInsertPoint, ExitMBB);
1516       // Synthesize the loop body.
1517       allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
1518                        UseDForm, FPReg);
1519       BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
1520           .addMBB(LoopMBB);
1521       LoopMBB->addSuccessor(ExitMBB);
1522       LoopMBB->addSuccessor(LoopMBB);
1523       // Synthesize the exit MBB.
1524       ExitMBB->splice(ExitMBB->end(), CurrentMBB,
1525                       std::next(MachineBasicBlock::iterator(MI)),
1526                       CurrentMBB->end());
1527       ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
1528       CurrentMBB->addSuccessor(LoopMBB);
1529       if (needsCFI) {
1530         // Restore using SPReg to calculate CFA.
1531         buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
1532       }
1533       // Update liveins.
1534       bool anyChange = false;
1535       do {
1536         anyChange = recomputeLiveIns(*ExitMBB) || recomputeLiveIns(*LoopMBB);
1537       } while (anyChange);
1538     }
1539   }
1540   ++NumPrologProbed;
1541   MI.eraseFromParent();
1542 }
1543 
emitEpilogue(MachineFunction & MF,MachineBasicBlock & MBB) const1544 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1545                                     MachineBasicBlock &MBB) const {
1546   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1547   DebugLoc dl;
1548 
1549   if (MBBI != MBB.end())
1550     dl = MBBI->getDebugLoc();
1551 
1552   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1553   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1554 
1555   // Get alignment info so we know how to restore the SP.
1556   const MachineFrameInfo &MFI = MF.getFrameInfo();
1557 
1558   // Get the number of bytes allocated from the FrameInfo.
1559   int64_t FrameSize = MFI.getStackSize();
1560 
1561   // Get processor type.
1562   bool isPPC64 = Subtarget.isPPC64();
1563 
1564   // Check if the link register (LR) has been saved.
1565   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1566   bool MustSaveLR = FI->mustSaveLR();
1567   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1568   bool MustSaveCR = !MustSaveCRs.empty();
1569   // Do we have a frame pointer and/or base pointer for this function?
1570   bool HasFP = hasFP(MF);
1571   bool HasBP = RegInfo->hasBasePointer(MF);
1572   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1573   bool HasROPProtect = Subtarget.hasROPProtect();
1574   bool HasPrivileged = Subtarget.hasPrivileged();
1575 
1576   Register SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1577   Register BPReg = RegInfo->getBaseRegister(MF);
1578   Register FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1579   Register ScratchReg;
1580   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1581   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1582                                                  : PPC::MTLR );
1583   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1584                                                  : PPC::LWZ );
1585   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1586                                                            : PPC::LIS );
1587   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1588                                               : PPC::OR );
1589   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1590                                                   : PPC::ORI );
1591   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1592                                                    : PPC::ADDI );
1593   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1594                                                 : PPC::ADD4 );
1595   const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1596                                                      : PPC::LWZ);
1597   const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1598                                                      : PPC::MTOCRF);
1599   const MCInstrDesc &HashChk =
1600       TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHCHKP8 : PPC::HASHCHK8)
1601                       : (HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK));
1602   int64_t LROffset = getReturnSaveOffset();
1603 
1604   int64_t FPOffset = 0;
1605 
1606   // Using the same bool variable as below to suppress compiler warnings.
1607   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1608                                               &TempReg);
1609   assert(SingleScratchReg &&
1610          "Could not find an available scratch register");
1611 
1612   SingleScratchReg = ScratchReg == TempReg;
1613 
1614   if (HasFP) {
1615     int FPIndex = FI->getFramePointerSaveIndex();
1616     assert(FPIndex && "No Frame Pointer Save Slot!");
1617     FPOffset = MFI.getObjectOffset(FPIndex);
1618   }
1619 
1620   int64_t BPOffset = 0;
1621   if (HasBP) {
1622       int BPIndex = FI->getBasePointerSaveIndex();
1623       assert(BPIndex && "No Base Pointer Save Slot!");
1624       BPOffset = MFI.getObjectOffset(BPIndex);
1625   }
1626 
1627   int64_t PBPOffset = 0;
1628   if (FI->usesPICBase()) {
1629     int PBPIndex = FI->getPICBasePointerSaveIndex();
1630     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1631     PBPOffset = MFI.getObjectOffset(PBPIndex);
1632   }
1633 
1634   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1635 
1636   if (IsReturnBlock) {
1637     unsigned RetOpcode = MBBI->getOpcode();
1638     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1639                       RetOpcode == PPC::TCRETURNdi ||
1640                       RetOpcode == PPC::TCRETURNai ||
1641                       RetOpcode == PPC::TCRETURNri8 ||
1642                       RetOpcode == PPC::TCRETURNdi8 ||
1643                       RetOpcode == PPC::TCRETURNai8;
1644 
1645     if (UsesTCRet) {
1646       int MaxTCRetDelta = FI->getTailCallSPDelta();
1647       MachineOperand &StackAdjust = MBBI->getOperand(1);
1648       assert(StackAdjust.isImm() && "Expecting immediate value.");
1649       // Adjust stack pointer.
1650       int StackAdj = StackAdjust.getImm();
1651       int Delta = StackAdj - MaxTCRetDelta;
1652       assert((Delta >= 0) && "Delta must be positive");
1653       if (MaxTCRetDelta>0)
1654         FrameSize += (StackAdj +Delta);
1655       else
1656         FrameSize += StackAdj;
1657     }
1658   }
1659 
1660   // Frames of 32KB & larger require special handling because they cannot be
1661   // indexed into with a simple LD/LWZ immediate offset operand.
1662   bool isLargeFrame = !isInt<16>(FrameSize);
1663 
1664   // On targets without red zone, the SP needs to be restored last, so that
1665   // all live contents of the stack frame are upwards of the SP. This means
1666   // that we cannot restore SP just now, since there may be more registers
1667   // to restore from the stack frame (e.g. R31). If the frame size is not
1668   // a simple immediate value, we will need a spare register to hold the
1669   // restored SP. If the frame size is known and small, we can simply adjust
1670   // the offsets of the registers to be restored, and still use SP to restore
1671   // them. In such case, the final update of SP will be to add the frame
1672   // size to it.
1673   // To simplify the code, set RBReg to the base register used to restore
1674   // values from the stack, and set SPAdd to the value that needs to be added
1675   // to the SP at the end. The default values are as if red zone was present.
1676   unsigned RBReg = SPReg;
1677   uint64_t SPAdd = 0;
1678 
1679   // Check if we can move the stack update instruction up the epilogue
1680   // past the callee saves. This will allow the move to LR instruction
1681   // to be executed before the restores of the callee saves which means
1682   // that the callee saves can hide the latency from the MTLR instrcution.
1683   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1684   if (stackUpdateCanBeMoved(MF)) {
1685     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1686     for (CalleeSavedInfo CSI : Info) {
1687       // If the callee saved register is spilled to another register abort the
1688       // stack update movement.
1689       if (CSI.isSpilledToReg()) {
1690         StackUpdateLoc = MBBI;
1691         break;
1692       }
1693       int FrIdx = CSI.getFrameIdx();
1694       // If the frame index is not negative the callee saved info belongs to a
1695       // stack object that is not a fixed stack object. We ignore non-fixed
1696       // stack objects because we won't move the update of the stack pointer
1697       // past them.
1698       if (FrIdx >= 0)
1699         continue;
1700 
1701       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1702         StackUpdateLoc--;
1703       else {
1704         // Abort the operation as we can't update all CSR restores.
1705         StackUpdateLoc = MBBI;
1706         break;
1707       }
1708     }
1709   }
1710 
1711   if (FrameSize) {
1712     // In the prologue, the loaded (or persistent) stack pointer value is
1713     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1714     // zone add this offset back now.
1715 
1716     // If the function has a base pointer, the stack pointer has been copied
1717     // to it so we can restore it by copying in the other direction.
1718     if (HasRedZone && HasBP) {
1719       BuildMI(MBB, MBBI, dl, OrInst, RBReg).
1720         addReg(BPReg).
1721         addReg(BPReg);
1722     }
1723     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1724     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1725     // call which invalidates the stack pointer value in SP(0). So we use the
1726     // value of R31 in this case. Similar situation exists with setjmp.
1727     else if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
1728       assert(HasFP && "Expecting a valid frame pointer.");
1729       if (!HasRedZone)
1730         RBReg = FPReg;
1731       if (!isLargeFrame) {
1732         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1733           .addReg(FPReg).addImm(FrameSize);
1734       } else {
1735         TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, FrameSize);
1736         BuildMI(MBB, MBBI, dl, AddInst)
1737           .addReg(RBReg)
1738           .addReg(FPReg)
1739           .addReg(ScratchReg);
1740       }
1741     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1742       if (HasRedZone) {
1743         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1744           .addReg(SPReg)
1745           .addImm(FrameSize);
1746       } else {
1747         // Make sure that adding FrameSize will not overflow the max offset
1748         // size.
1749         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1750                "Local offsets should be negative");
1751         SPAdd = FrameSize;
1752         FPOffset += FrameSize;
1753         BPOffset += FrameSize;
1754         PBPOffset += FrameSize;
1755       }
1756     } else {
1757       // We don't want to use ScratchReg as a base register, because it
1758       // could happen to be R0. Use FP instead, but make sure to preserve it.
1759       if (!HasRedZone) {
1760         // If FP is not saved, copy it to ScratchReg.
1761         if (!HasFP)
1762           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1763             .addReg(FPReg)
1764             .addReg(FPReg);
1765         RBReg = FPReg;
1766       }
1767       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1768         .addImm(0)
1769         .addReg(SPReg);
1770     }
1771   }
1772   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1773   // If there is no red zone, ScratchReg may be needed for holding a useful
1774   // value (although not the base register). Make sure it is not overwritten
1775   // too early.
1776 
1777   // If we need to restore both the LR and the CR and we only have one
1778   // available scratch register, we must do them one at a time.
1779   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1780     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1781     // is live here.
1782     assert(HasRedZone && "Expecting red zone");
1783     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1784       .addImm(CRSaveOffset)
1785       .addReg(SPReg);
1786     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1787       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1788         .addReg(TempReg, getKillRegState(i == e-1));
1789   }
1790 
1791   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1792   // LR is stored in the caller's stack frame. ScratchReg will be needed
1793   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1794   // a base register anyway, because it may happen to be R0.
1795   bool LoadedLR = false;
1796   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1797     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1798       .addImm(LROffset+SPAdd)
1799       .addReg(RBReg);
1800     LoadedLR = true;
1801   }
1802 
1803   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1804     assert(RBReg == SPReg && "Should be using SP as a base register");
1805     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1806       .addImm(CRSaveOffset)
1807       .addReg(RBReg);
1808   }
1809 
1810   if (HasFP) {
1811     // If there is red zone, restore FP directly, since SP has already been
1812     // restored. Otherwise, restore the value of FP into ScratchReg.
1813     if (HasRedZone || RBReg == SPReg)
1814       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1815         .addImm(FPOffset)
1816         .addReg(SPReg);
1817     else
1818       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1819         .addImm(FPOffset)
1820         .addReg(RBReg);
1821   }
1822 
1823   if (FI->usesPICBase())
1824     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1825       .addImm(PBPOffset)
1826       .addReg(RBReg);
1827 
1828   if (HasBP)
1829     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1830       .addImm(BPOffset)
1831       .addReg(RBReg);
1832 
1833   // There is nothing more to be loaded from the stack, so now we can
1834   // restore SP: SP = RBReg + SPAdd.
1835   if (RBReg != SPReg || SPAdd != 0) {
1836     assert(!HasRedZone && "This should not happen with red zone");
1837     // If SPAdd is 0, generate a copy.
1838     if (SPAdd == 0)
1839       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1840         .addReg(RBReg)
1841         .addReg(RBReg);
1842     else
1843       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1844         .addReg(RBReg)
1845         .addImm(SPAdd);
1846 
1847     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1848     if (RBReg == FPReg)
1849       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1850         .addReg(ScratchReg)
1851         .addReg(ScratchReg);
1852 
1853     // Now load the LR from the caller's stack frame.
1854     if (MustSaveLR && !LoadedLR)
1855       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1856         .addImm(LROffset)
1857         .addReg(SPReg);
1858   }
1859 
1860   if (MustSaveCR &&
1861       !(SingleScratchReg && MustSaveLR))
1862     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1863       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1864         .addReg(TempReg, getKillRegState(i == e-1));
1865 
1866   if (MustSaveLR) {
1867     // If ROP protection is required, an extra instruction is added to compute a
1868     // hash and then compare it to the hash stored in the prologue.
1869     if (HasROPProtect) {
1870       const int SaveIndex = FI->getROPProtectionHashSaveIndex();
1871       const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
1872       assert((ImmOffset <= -8 && ImmOffset >= -512) &&
1873              "ROP hash check location offset out of range.");
1874       assert(((ImmOffset & 0x7) == 0) &&
1875              "ROP hash check location offset must be 8 byte aligned.");
1876       BuildMI(MBB, StackUpdateLoc, dl, HashChk)
1877           .addReg(ScratchReg)
1878           .addImm(ImmOffset)
1879           .addReg(SPReg);
1880     }
1881     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1882   }
1883 
1884   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1885   // call optimization
1886   if (IsReturnBlock) {
1887     unsigned RetOpcode = MBBI->getOpcode();
1888     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1889         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1890         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1891       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1892       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1893 
1894       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1895         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1896           .addReg(SPReg).addImm(CallerAllocatedAmt);
1897       } else {
1898         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1899           .addImm(CallerAllocatedAmt >> 16);
1900         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1901           .addReg(ScratchReg, RegState::Kill)
1902           .addImm(CallerAllocatedAmt & 0xFFFF);
1903         BuildMI(MBB, MBBI, dl, AddInst)
1904           .addReg(SPReg)
1905           .addReg(FPReg)
1906           .addReg(ScratchReg);
1907       }
1908     } else {
1909       createTailCallBranchInstr(MBB);
1910     }
1911   }
1912 }
1913 
createTailCallBranchInstr(MachineBasicBlock & MBB) const1914 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1915   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1916 
1917   // If we got this far a first terminator should exist.
1918   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1919 
1920   DebugLoc dl = MBBI->getDebugLoc();
1921   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1922 
1923   // Create branch instruction for pseudo tail call return instruction.
1924   // The TCRETURNdi variants are direct calls. Valid targets for those are
1925   // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1926   // since we can tail call external functions with PC-Rel (i.e. we don't need
1927   // to worry about different TOC pointers). Some of the external functions will
1928   // be MO_GlobalAddress while others like memcpy for example, are going to
1929   // be MO_ExternalSymbol.
1930   unsigned RetOpcode = MBBI->getOpcode();
1931   if (RetOpcode == PPC::TCRETURNdi) {
1932     MBBI = MBB.getLastNonDebugInstr();
1933     MachineOperand &JumpTarget = MBBI->getOperand(0);
1934     if (JumpTarget.isGlobal())
1935       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1936         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1937     else if (JumpTarget.isSymbol())
1938       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1939         addExternalSymbol(JumpTarget.getSymbolName());
1940     else
1941       llvm_unreachable("Expecting Global or External Symbol");
1942   } else if (RetOpcode == PPC::TCRETURNri) {
1943     MBBI = MBB.getLastNonDebugInstr();
1944     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1945     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1946   } else if (RetOpcode == PPC::TCRETURNai) {
1947     MBBI = MBB.getLastNonDebugInstr();
1948     MachineOperand &JumpTarget = MBBI->getOperand(0);
1949     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1950   } else if (RetOpcode == PPC::TCRETURNdi8) {
1951     MBBI = MBB.getLastNonDebugInstr();
1952     MachineOperand &JumpTarget = MBBI->getOperand(0);
1953     if (JumpTarget.isGlobal())
1954       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1955         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1956     else if (JumpTarget.isSymbol())
1957       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1958         addExternalSymbol(JumpTarget.getSymbolName());
1959     else
1960       llvm_unreachable("Expecting Global or External Symbol");
1961   } else if (RetOpcode == PPC::TCRETURNri8) {
1962     MBBI = MBB.getLastNonDebugInstr();
1963     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1964     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1965   } else if (RetOpcode == PPC::TCRETURNai8) {
1966     MBBI = MBB.getLastNonDebugInstr();
1967     MachineOperand &JumpTarget = MBBI->getOperand(0);
1968     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1969   }
1970 }
1971 
determineCalleeSaves(MachineFunction & MF,BitVector & SavedRegs,RegScavenger * RS) const1972 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1973                                             BitVector &SavedRegs,
1974                                             RegScavenger *RS) const {
1975   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1976 
1977   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1978 
1979   // Do not explicitly save the callee saved VSRp registers.
1980   // The individual VSR subregisters will be saved instead.
1981   SavedRegs.reset(PPC::VSRp26);
1982   SavedRegs.reset(PPC::VSRp27);
1983   SavedRegs.reset(PPC::VSRp28);
1984   SavedRegs.reset(PPC::VSRp29);
1985   SavedRegs.reset(PPC::VSRp30);
1986   SavedRegs.reset(PPC::VSRp31);
1987 
1988   //  Save and clear the LR state.
1989   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1990   unsigned LR = RegInfo->getRARegister();
1991   FI->setMustSaveLR(MustSaveLR(MF, LR));
1992   SavedRegs.reset(LR);
1993 
1994   //  Save R31 if necessary
1995   int FPSI = FI->getFramePointerSaveIndex();
1996   const bool isPPC64 = Subtarget.isPPC64();
1997   MachineFrameInfo &MFI = MF.getFrameInfo();
1998 
1999   // If the frame pointer save index hasn't been defined yet.
2000   if (!FPSI && needsFP(MF)) {
2001     // Find out what the fix offset of the frame pointer save area.
2002     int FPOffset = getFramePointerSaveOffset();
2003     // Allocate the frame index for frame pointer save area.
2004     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
2005     // Save the result.
2006     FI->setFramePointerSaveIndex(FPSI);
2007   }
2008 
2009   int BPSI = FI->getBasePointerSaveIndex();
2010   if (!BPSI && RegInfo->hasBasePointer(MF)) {
2011     int BPOffset = getBasePointerSaveOffset();
2012     // Allocate the frame index for the base pointer save area.
2013     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
2014     // Save the result.
2015     FI->setBasePointerSaveIndex(BPSI);
2016   }
2017 
2018   // Reserve stack space for the PIC Base register (R30).
2019   // Only used in SVR4 32-bit.
2020   if (FI->usesPICBase()) {
2021     int PBPSI = MFI.CreateFixedObject(4, -8, true);
2022     FI->setPICBasePointerSaveIndex(PBPSI);
2023   }
2024 
2025   // Make sure we don't explicitly spill r31, because, for example, we have
2026   // some inline asm which explicitly clobbers it, when we otherwise have a
2027   // frame pointer and are using r31's spill slot for the prologue/epilogue
2028   // code. Same goes for the base pointer and the PIC base register.
2029   if (needsFP(MF))
2030     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
2031   if (RegInfo->hasBasePointer(MF))
2032     SavedRegs.reset(RegInfo->getBaseRegister(MF));
2033   if (FI->usesPICBase())
2034     SavedRegs.reset(PPC::R30);
2035 
2036   // Reserve stack space to move the linkage area to in case of a tail call.
2037   int TCSPDelta = 0;
2038   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2039       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
2040     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
2041   }
2042 
2043   // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
2044   // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
2045   // object at the offset of the CR-save slot in the linkage area. The actual
2046   // save and restore of the condition register will be created as part of the
2047   // prologue and epilogue insertion, but the FixedStack object is needed to
2048   // keep the CalleSavedInfo valid.
2049   if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
2050        SavedRegs.test(PPC::CR4))) {
2051     const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
2052     const int64_t SpillOffset =
2053         Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
2054     int FrameIdx =
2055         MFI.CreateFixedObject(SpillSize, SpillOffset,
2056                               /* IsImmutable */ true, /* IsAliased */ false);
2057     FI->setCRSpillFrameIndex(FrameIdx);
2058   }
2059 }
2060 
processFunctionBeforeFrameFinalized(MachineFunction & MF,RegScavenger * RS) const2061 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
2062                                                        RegScavenger *RS) const {
2063   // Get callee saved register information.
2064   MachineFrameInfo &MFI = MF.getFrameInfo();
2065   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
2066 
2067   // If the function is shrink-wrapped, and if the function has a tail call, the
2068   // tail call might not be in the new RestoreBlock, so real branch instruction
2069   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
2070   // RestoreBlock. So we handle this case here.
2071   if (MFI.getSavePoint() && MFI.hasTailCall()) {
2072     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
2073     for (MachineBasicBlock &MBB : MF) {
2074       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
2075         createTailCallBranchInstr(MBB);
2076     }
2077   }
2078 
2079   // Early exit if no callee saved registers are modified!
2080   if (CSI.empty() && !needsFP(MF)) {
2081     addScavengingSpillSlot(MF, RS);
2082     return;
2083   }
2084 
2085   unsigned MinGPR = PPC::R31;
2086   unsigned MinG8R = PPC::X31;
2087   unsigned MinFPR = PPC::F31;
2088   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
2089 
2090   bool HasGPSaveArea = false;
2091   bool HasG8SaveArea = false;
2092   bool HasFPSaveArea = false;
2093   bool HasVRSaveArea = false;
2094 
2095   SmallVector<CalleeSavedInfo, 18> GPRegs;
2096   SmallVector<CalleeSavedInfo, 18> G8Regs;
2097   SmallVector<CalleeSavedInfo, 18> FPRegs;
2098   SmallVector<CalleeSavedInfo, 18> VRegs;
2099 
2100   for (const CalleeSavedInfo &I : CSI) {
2101     Register Reg = I.getReg();
2102     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
2103             (Reg != PPC::X2 && Reg != PPC::R2)) &&
2104            "Not expecting to try to spill R2 in a function that must save TOC");
2105     if (PPC::GPRCRegClass.contains(Reg)) {
2106       HasGPSaveArea = true;
2107 
2108       GPRegs.push_back(I);
2109 
2110       if (Reg < MinGPR) {
2111         MinGPR = Reg;
2112       }
2113     } else if (PPC::G8RCRegClass.contains(Reg)) {
2114       HasG8SaveArea = true;
2115 
2116       G8Regs.push_back(I);
2117 
2118       if (Reg < MinG8R) {
2119         MinG8R = Reg;
2120       }
2121     } else if (PPC::F8RCRegClass.contains(Reg)) {
2122       HasFPSaveArea = true;
2123 
2124       FPRegs.push_back(I);
2125 
2126       if (Reg < MinFPR) {
2127         MinFPR = Reg;
2128       }
2129     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
2130                PPC::CRRCRegClass.contains(Reg)) {
2131       ; // do nothing, as we already know whether CRs are spilled
2132     } else if (PPC::VRRCRegClass.contains(Reg) ||
2133                PPC::SPERCRegClass.contains(Reg)) {
2134       // Altivec and SPE are mutually exclusive, but have the same stack
2135       // alignment requirements, so overload the save area for both cases.
2136       HasVRSaveArea = true;
2137 
2138       VRegs.push_back(I);
2139 
2140       if (Reg < MinVR) {
2141         MinVR = Reg;
2142       }
2143     } else {
2144       llvm_unreachable("Unknown RegisterClass!");
2145     }
2146   }
2147 
2148   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
2149   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2150 
2151   int64_t LowerBound = 0;
2152 
2153   // Take into account stack space reserved for tail calls.
2154   int TCSPDelta = 0;
2155   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2156       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
2157     LowerBound = TCSPDelta;
2158   }
2159 
2160   // The Floating-point register save area is right below the back chain word
2161   // of the previous stack frame.
2162   if (HasFPSaveArea) {
2163     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
2164       int FI = FPRegs[i].getFrameIdx();
2165 
2166       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2167     }
2168 
2169     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
2170   }
2171 
2172   // Check whether the frame pointer register is allocated. If so, make sure it
2173   // is spilled to the correct offset.
2174   if (needsFP(MF)) {
2175     int FI = PFI->getFramePointerSaveIndex();
2176     assert(FI && "No Frame Pointer Save Slot!");
2177     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2178     // FP is R31/X31, so no need to update MinGPR/MinG8R.
2179     HasGPSaveArea = true;
2180   }
2181 
2182   if (PFI->usesPICBase()) {
2183     int FI = PFI->getPICBasePointerSaveIndex();
2184     assert(FI && "No PIC Base Pointer Save Slot!");
2185     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2186 
2187     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
2188     HasGPSaveArea = true;
2189   }
2190 
2191   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2192   if (RegInfo->hasBasePointer(MF)) {
2193     int FI = PFI->getBasePointerSaveIndex();
2194     assert(FI && "No Base Pointer Save Slot!");
2195     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2196 
2197     Register BP = RegInfo->getBaseRegister(MF);
2198     if (PPC::G8RCRegClass.contains(BP)) {
2199       MinG8R = std::min<unsigned>(MinG8R, BP);
2200       HasG8SaveArea = true;
2201     } else if (PPC::GPRCRegClass.contains(BP)) {
2202       MinGPR = std::min<unsigned>(MinGPR, BP);
2203       HasGPSaveArea = true;
2204     }
2205   }
2206 
2207   // General register save area starts right below the Floating-point
2208   // register save area.
2209   if (HasGPSaveArea || HasG8SaveArea) {
2210     // Move general register save area spill slots down, taking into account
2211     // the size of the Floating-point register save area.
2212     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
2213       if (!GPRegs[i].isSpilledToReg()) {
2214         int FI = GPRegs[i].getFrameIdx();
2215         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2216       }
2217     }
2218 
2219     // Move general register save area spill slots down, taking into account
2220     // the size of the Floating-point register save area.
2221     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2222       if (!G8Regs[i].isSpilledToReg()) {
2223         int FI = G8Regs[i].getFrameIdx();
2224         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2225       }
2226     }
2227 
2228     unsigned MinReg =
2229       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2230                          TRI->getEncodingValue(MinG8R));
2231 
2232     const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2233     LowerBound -= (31 - MinReg + 1) * GPRegSize;
2234   }
2235 
2236   // For 32-bit only, the CR save area is below the general register
2237   // save area.  For 64-bit SVR4, the CR save area is addressed relative
2238   // to the stack pointer and hence does not need an adjustment here.
2239   // Only CR2 (the first nonvolatile spilled) has an associated frame
2240   // index so that we have a single uniform save area.
2241   if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2242     // Adjust the frame index of the CR spill slot.
2243     for (const auto &CSInfo : CSI) {
2244       if (CSInfo.getReg() == PPC::CR2) {
2245         int FI = CSInfo.getFrameIdx();
2246         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2247         break;
2248       }
2249     }
2250 
2251     LowerBound -= 4; // The CR save area is always 4 bytes long.
2252   }
2253 
2254   // Both Altivec and SPE have the same alignment and padding requirements
2255   // within the stack frame.
2256   if (HasVRSaveArea) {
2257     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2258     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2259     // we are using negative number here (the stack grows downward). We should
2260     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2261     // is the alignment size ( n = 16 here) and y is the size after aligning.
2262     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2263     LowerBound &= ~(15);
2264 
2265     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2266       int FI = VRegs[i].getFrameIdx();
2267 
2268       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2269     }
2270   }
2271 
2272   addScavengingSpillSlot(MF, RS);
2273 }
2274 
2275 void
addScavengingSpillSlot(MachineFunction & MF,RegScavenger * RS) const2276 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2277                                          RegScavenger *RS) const {
2278   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2279   // a large stack, which will require scavenging a register to materialize a
2280   // large offset.
2281 
2282   // We need to have a scavenger spill slot for spills if the frame size is
2283   // large. In case there is no free register for large-offset addressing,
2284   // this slot is used for the necessary emergency spill. Also, we need the
2285   // slot for dynamic stack allocations.
2286 
2287   // The scavenger might be invoked if the frame offset does not fit into
2288   // the 16-bit immediate in case of not SPE and 8-bit in case of SPE.
2289   // We don't know the complete frame size here because we've not yet computed
2290   // callee-saved register spills or the needed alignment padding.
2291   unsigned StackSize = determineFrameLayout(MF, true);
2292   MachineFrameInfo &MFI = MF.getFrameInfo();
2293   bool NeedSpills = Subtarget.hasSPE() ? !isInt<8>(StackSize) : !isInt<16>(StackSize);
2294 
2295   if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
2296       (hasSpills(MF) && NeedSpills)) {
2297     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2298     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2299     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2300     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2301     unsigned Size = TRI.getSpillSize(RC);
2302     Align Alignment = TRI.getSpillAlign(RC);
2303     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
2304 
2305     // Might we have over-aligned allocas?
2306     bool HasAlVars =
2307         MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2308 
2309     // These kinds of spills might need two registers.
2310     if (spillsCR(MF) || HasAlVars)
2311       RS->addScavengingFrameIndex(
2312           MFI.CreateStackObject(Size, Alignment, false));
2313   }
2314 }
2315 
2316 // This function checks if a callee saved gpr can be spilled to a volatile
2317 // vector register. This occurs for leaf functions when the option
2318 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2319 // which were not spilled to vectors, return false so the target independent
2320 // code can handle them by assigning a FrameIdx to a stack slot.
assignCalleeSavedSpillSlots(MachineFunction & MF,const TargetRegisterInfo * TRI,std::vector<CalleeSavedInfo> & CSI) const2321 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2322     MachineFunction &MF, const TargetRegisterInfo *TRI,
2323     std::vector<CalleeSavedInfo> &CSI) const {
2324 
2325   if (CSI.empty())
2326     return true; // Early exit if no callee saved registers are modified!
2327 
2328   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2329   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2330   const MachineRegisterInfo &MRI = MF.getRegInfo();
2331 
2332   if (Subtarget.hasSPE()) {
2333     // In case of SPE we only have SuperRegs and CRs
2334     // in our CalleSaveInfo vector.
2335 
2336     for (auto &CalleeSaveReg : CSI) {
2337       MCPhysReg Reg = CalleeSaveReg.getReg();
2338       MCPhysReg Lower = RegInfo->getSubReg(Reg, 1);
2339       MCPhysReg Higher = RegInfo->getSubReg(Reg, 2);
2340 
2341       if ( // Check only for SuperRegs.
2342           Lower &&
2343           // Replace Reg if only lower-32 bits modified
2344           !MRI.isPhysRegModified(Higher))
2345         CalleeSaveReg = CalleeSavedInfo(Lower);
2346     }
2347   }
2348 
2349   // Early exit if cannot spill gprs to volatile vector registers.
2350   MachineFrameInfo &MFI = MF.getFrameInfo();
2351   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2352     return false;
2353 
2354   // Build a BitVector of VSRs that can be used for spilling GPRs.
2355   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2356   BitVector BVCalleeSaved(TRI->getNumRegs());
2357   for (unsigned i = 0; CSRegs[i]; ++i)
2358     BVCalleeSaved.set(CSRegs[i]);
2359 
2360   for (unsigned Reg : BVAllocatable.set_bits()) {
2361     // Set to 0 if the register is not a volatile VSX register, or if it is
2362     // used in the function.
2363     if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) ||
2364         MRI.isPhysRegUsed(Reg))
2365       BVAllocatable.reset(Reg);
2366   }
2367 
2368   bool AllSpilledToReg = true;
2369   unsigned LastVSRUsedForSpill = 0;
2370   for (auto &CS : CSI) {
2371     if (BVAllocatable.none())
2372       return false;
2373 
2374     Register Reg = CS.getReg();
2375 
2376     if (!PPC::G8RCRegClass.contains(Reg)) {
2377       AllSpilledToReg = false;
2378       continue;
2379     }
2380 
2381     // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs
2382     // into one VSR using the mtvsrdd instruction.
2383     if (LastVSRUsedForSpill != 0) {
2384       CS.setDstReg(LastVSRUsedForSpill);
2385       BVAllocatable.reset(LastVSRUsedForSpill);
2386       LastVSRUsedForSpill = 0;
2387       continue;
2388     }
2389 
2390     unsigned VolatileVFReg = BVAllocatable.find_first();
2391     if (VolatileVFReg < BVAllocatable.size()) {
2392       CS.setDstReg(VolatileVFReg);
2393       LastVSRUsedForSpill = VolatileVFReg;
2394     } else {
2395       AllSpilledToReg = false;
2396     }
2397   }
2398   return AllSpilledToReg;
2399 }
2400 
spillCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,ArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI) const2401 bool PPCFrameLowering::spillCalleeSavedRegisters(
2402     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2403     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2404 
2405   MachineFunction *MF = MBB.getParent();
2406   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2407   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2408   bool MustSaveTOC = FI->mustSaveTOC();
2409   DebugLoc DL;
2410   bool CRSpilled = false;
2411   MachineInstrBuilder CRMIB;
2412   BitVector Spilled(TRI->getNumRegs());
2413 
2414   VSRContainingGPRs.clear();
2415 
2416   // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one
2417   // or two GPRs, so we need table to record information for later save/restore.
2418   for (const CalleeSavedInfo &Info : CSI) {
2419     if (Info.isSpilledToReg()) {
2420       auto &SpilledVSR =
2421           VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second;
2422       assert(SpilledVSR.second == 0 &&
2423              "Can't spill more than two GPRs into VSR!");
2424       if (SpilledVSR.first == 0)
2425         SpilledVSR.first = Info.getReg();
2426       else
2427         SpilledVSR.second = Info.getReg();
2428     }
2429   }
2430 
2431   for (const CalleeSavedInfo &I : CSI) {
2432     Register Reg = I.getReg();
2433 
2434     // CR2 through CR4 are the nonvolatile CR fields.
2435     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2436 
2437     // Add the callee-saved register as live-in; it's killed at the spill.
2438     // Do not do this for callee-saved registers that are live-in to the
2439     // function because they will already be marked live-in and this will be
2440     // adding it for a second time. It is an error to add the same register
2441     // to the set more than once.
2442     const MachineRegisterInfo &MRI = MF->getRegInfo();
2443     bool IsLiveIn = MRI.isLiveIn(Reg);
2444     if (!IsLiveIn)
2445        MBB.addLiveIn(Reg);
2446 
2447     if (CRSpilled && IsCRField) {
2448       CRMIB.addReg(Reg, RegState::ImplicitKill);
2449       continue;
2450     }
2451 
2452     // The actual spill will happen in the prologue.
2453     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2454       continue;
2455 
2456     // Insert the spill to the stack frame.
2457     if (IsCRField) {
2458       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2459       if (!Subtarget.is32BitELFABI()) {
2460         // The actual spill will happen at the start of the prologue.
2461         FuncInfo->addMustSaveCR(Reg);
2462       } else {
2463         CRSpilled = true;
2464         FuncInfo->setSpillsCR();
2465 
2466         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2467         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2468         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2469                   .addReg(Reg, RegState::ImplicitKill);
2470 
2471         MBB.insert(MI, CRMIB);
2472         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2473                                          .addReg(PPC::R12,
2474                                                  getKillRegState(true)),
2475                                          I.getFrameIdx()));
2476       }
2477     } else {
2478       if (I.isSpilledToReg()) {
2479         unsigned Dst = I.getDstReg();
2480 
2481         if (Spilled[Dst])
2482           continue;
2483 
2484         if (VSRContainingGPRs[Dst].second != 0) {
2485           assert(Subtarget.hasP9Vector() &&
2486                  "mtvsrdd is unavailable on pre-P9 targets.");
2487 
2488           NumPESpillVSR += 2;
2489           BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst)
2490               .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true))
2491               .addReg(VSRContainingGPRs[Dst].second, getKillRegState(true));
2492         } else if (VSRContainingGPRs[Dst].second == 0) {
2493           assert(Subtarget.hasP8Vector() &&
2494                  "Can't move GPR to VSR on pre-P8 targets.");
2495 
2496           ++NumPESpillVSR;
2497           BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD),
2498                   TRI->getSubReg(Dst, PPC::sub_64))
2499               .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true));
2500         } else {
2501           llvm_unreachable("More than two GPRs spilled to a VSR!");
2502         }
2503         Spilled.set(Dst);
2504       } else {
2505         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2506         // Use !IsLiveIn for the kill flag.
2507         // We do not want to kill registers that are live in this function
2508         // before their use because they will become undefined registers.
2509         // Functions without NoUnwind need to preserve the order of elements in
2510         // saved vector registers.
2511         if (Subtarget.needsSwapsForVSXMemOps() &&
2512             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2513           TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2514                                        I.getFrameIdx(), RC, TRI);
2515         else
2516           TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), RC,
2517                                   TRI, Register());
2518       }
2519     }
2520   }
2521   return true;
2522 }
2523 
restoreCRs(bool is31,bool CR2Spilled,bool CR3Spilled,bool CR4Spilled,MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,ArrayRef<CalleeSavedInfo> CSI,unsigned CSIIndex)2524 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2525                        bool CR4Spilled, MachineBasicBlock &MBB,
2526                        MachineBasicBlock::iterator MI,
2527                        ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2528 
2529   MachineFunction *MF = MBB.getParent();
2530   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2531   DebugLoc DL;
2532   unsigned MoveReg = PPC::R12;
2533 
2534   // 32-bit:  FP-relative
2535   MBB.insert(MI,
2536              addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2537                                CSI[CSIIndex].getFrameIdx()));
2538 
2539   unsigned RestoreOp = PPC::MTOCRF;
2540   if (CR2Spilled)
2541     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2542                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2543 
2544   if (CR3Spilled)
2545     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2546                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2547 
2548   if (CR4Spilled)
2549     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2550                .addReg(MoveReg, getKillRegState(true)));
2551 }
2552 
2553 MachineBasicBlock::iterator PPCFrameLowering::
eliminateCallFramePseudoInstr(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I) const2554 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2555                               MachineBasicBlock::iterator I) const {
2556   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2557   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2558       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2559     // Add (actually subtract) back the amount the callee popped on return.
2560     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2561       bool is64Bit = Subtarget.isPPC64();
2562       CalleeAmt *= -1;
2563       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2564       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2565       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2566       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2567       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2568       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2569       const DebugLoc &dl = I->getDebugLoc();
2570 
2571       if (isInt<16>(CalleeAmt)) {
2572         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2573           .addReg(StackReg, RegState::Kill)
2574           .addImm(CalleeAmt);
2575       } else {
2576         MachineBasicBlock::iterator MBBI = I;
2577         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2578           .addImm(CalleeAmt >> 16);
2579         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2580           .addReg(TmpReg, RegState::Kill)
2581           .addImm(CalleeAmt & 0xFFFF);
2582         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2583           .addReg(StackReg, RegState::Kill)
2584           .addReg(TmpReg);
2585       }
2586     }
2587   }
2588   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2589   return MBB.erase(I);
2590 }
2591 
isCalleeSavedCR(unsigned Reg)2592 static bool isCalleeSavedCR(unsigned Reg) {
2593   return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2594 }
2595 
restoreCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,MutableArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI) const2596 bool PPCFrameLowering::restoreCalleeSavedRegisters(
2597     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2598     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2599   MachineFunction *MF = MBB.getParent();
2600   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2601   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2602   bool MustSaveTOC = FI->mustSaveTOC();
2603   bool CR2Spilled = false;
2604   bool CR3Spilled = false;
2605   bool CR4Spilled = false;
2606   unsigned CSIIndex = 0;
2607   BitVector Restored(TRI->getNumRegs());
2608 
2609   // Initialize insertion-point logic; we will be restoring in reverse
2610   // order of spill.
2611   MachineBasicBlock::iterator I = MI, BeforeI = I;
2612   bool AtStart = I == MBB.begin();
2613 
2614   if (!AtStart)
2615     --BeforeI;
2616 
2617   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2618     Register Reg = CSI[i].getReg();
2619 
2620     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2621       continue;
2622 
2623     // Restore of callee saved condition register field is handled during
2624     // epilogue insertion.
2625     if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2626       continue;
2627 
2628     if (Reg == PPC::CR2) {
2629       CR2Spilled = true;
2630       // The spill slot is associated only with CR2, which is the
2631       // first nonvolatile spilled.  Save it here.
2632       CSIIndex = i;
2633       continue;
2634     } else if (Reg == PPC::CR3) {
2635       CR3Spilled = true;
2636       continue;
2637     } else if (Reg == PPC::CR4) {
2638       CR4Spilled = true;
2639       continue;
2640     } else {
2641       // On 32-bit ELF when we first encounter a non-CR register after seeing at
2642       // least one CR register, restore all spilled CRs together.
2643       if (CR2Spilled || CR3Spilled || CR4Spilled) {
2644         bool is31 = needsFP(*MF);
2645         restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2646                    CSIIndex);
2647         CR2Spilled = CR3Spilled = CR4Spilled = false;
2648       }
2649 
2650       if (CSI[i].isSpilledToReg()) {
2651         DebugLoc DL;
2652         unsigned Dst = CSI[i].getDstReg();
2653 
2654         if (Restored[Dst])
2655           continue;
2656 
2657         if (VSRContainingGPRs[Dst].second != 0) {
2658           assert(Subtarget.hasP9Vector());
2659           NumPEReloadVSR += 2;
2660           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD),
2661                   VSRContainingGPRs[Dst].second)
2662               .addReg(Dst);
2663           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2664                   VSRContainingGPRs[Dst].first)
2665               .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2666         } else if (VSRContainingGPRs[Dst].second == 0) {
2667           assert(Subtarget.hasP8Vector());
2668           ++NumPEReloadVSR;
2669           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2670                   VSRContainingGPRs[Dst].first)
2671               .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2672         } else {
2673           llvm_unreachable("More than two GPRs spilled to a VSR!");
2674         }
2675 
2676         Restored.set(Dst);
2677 
2678       } else {
2679        // Default behavior for non-CR saves.
2680         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2681 
2682         // Functions without NoUnwind need to preserve the order of elements in
2683         // saved vector registers.
2684         if (Subtarget.needsSwapsForVSXMemOps() &&
2685             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2686           TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2687                                         TRI);
2688         else
2689           TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI,
2690                                    Register());
2691 
2692         assert(I != MBB.begin() &&
2693                "loadRegFromStackSlot didn't insert any code!");
2694       }
2695     }
2696 
2697     // Insert in reverse order.
2698     if (AtStart)
2699       I = MBB.begin();
2700     else {
2701       I = BeforeI;
2702       ++I;
2703     }
2704   }
2705 
2706   // If we haven't yet spilled the CRs, do so now.
2707   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2708     assert(Subtarget.is32BitELFABI() &&
2709            "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2710     bool is31 = needsFP(*MF);
2711     restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2712   }
2713 
2714   return true;
2715 }
2716 
getTOCSaveOffset() const2717 uint64_t PPCFrameLowering::getTOCSaveOffset() const {
2718   return TOCSaveOffset;
2719 }
2720 
getFramePointerSaveOffset() const2721 uint64_t PPCFrameLowering::getFramePointerSaveOffset() const {
2722   return FramePointerSaveOffset;
2723 }
2724 
getBasePointerSaveOffset() const2725 uint64_t PPCFrameLowering::getBasePointerSaveOffset() const {
2726   return BasePointerSaveOffset;
2727 }
2728 
enableShrinkWrapping(const MachineFunction & MF) const2729 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2730   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2731     return false;
2732   return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI();
2733 }
2734 
getStackThreshold() const2735 uint64_t PPCFrameLowering::getStackThreshold() const {
2736   // On PPC64, we use `stux r1, r1, <scratch_reg>` to extend the stack;
2737   // use `add r1, r1, <scratch_reg>` to release the stack frame.
2738   // Scratch register contains a signed 64-bit number, which is negative
2739   // when extending the stack and is positive when releasing the stack frame.
2740   // To make `stux` and `add` paired, the absolute value of the number contained
2741   // in the scratch register should be the same. Thus the maximum stack size
2742   // is (2^63)-1, i.e., LONG_MAX.
2743   if (Subtarget.isPPC64())
2744     return LONG_MAX;
2745 
2746   return TargetFrameLowering::getStackThreshold();
2747 }
2748