1 //===-- SystemZFrameLowering.cpp - Frame lowering for SystemZ -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "SystemZFrameLowering.h"
10 #include "SystemZCallingConv.h"
11 #include "SystemZInstrBuilder.h"
12 #include "SystemZInstrInfo.h"
13 #include "SystemZMachineFunctionInfo.h"
14 #include "SystemZRegisterInfo.h"
15 #include "SystemZSubtarget.h"
16 #include "llvm/CodeGen/LivePhysRegs.h"
17 #include "llvm/CodeGen/MachineModuleInfo.h"
18 #include "llvm/CodeGen/MachineRegisterInfo.h"
19 #include "llvm/CodeGen/RegisterScavenging.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/Target/TargetMachine.h"
22
23 using namespace llvm;
24
25 namespace {
26 // The ABI-defined register save slots, relative to the CFA (i.e.
27 // incoming stack pointer + SystemZMC::ELFCallFrameSize).
28 static const TargetFrameLowering::SpillSlot ELFSpillOffsetTable[] = {
29 { SystemZ::R2D, 0x10 },
30 { SystemZ::R3D, 0x18 },
31 { SystemZ::R4D, 0x20 },
32 { SystemZ::R5D, 0x28 },
33 { SystemZ::R6D, 0x30 },
34 { SystemZ::R7D, 0x38 },
35 { SystemZ::R8D, 0x40 },
36 { SystemZ::R9D, 0x48 },
37 { SystemZ::R10D, 0x50 },
38 { SystemZ::R11D, 0x58 },
39 { SystemZ::R12D, 0x60 },
40 { SystemZ::R13D, 0x68 },
41 { SystemZ::R14D, 0x70 },
42 { SystemZ::R15D, 0x78 },
43 { SystemZ::F0D, 0x80 },
44 { SystemZ::F2D, 0x88 },
45 { SystemZ::F4D, 0x90 },
46 { SystemZ::F6D, 0x98 }
47 };
48
49 static const TargetFrameLowering::SpillSlot XPLINKSpillOffsetTable[] = {
50 {SystemZ::R4D, 0x00}, {SystemZ::R5D, 0x08}, {SystemZ::R6D, 0x10},
51 {SystemZ::R7D, 0x18}, {SystemZ::R8D, 0x20}, {SystemZ::R9D, 0x28},
52 {SystemZ::R10D, 0x30}, {SystemZ::R11D, 0x38}, {SystemZ::R12D, 0x40},
53 {SystemZ::R13D, 0x48}, {SystemZ::R14D, 0x50}, {SystemZ::R15D, 0x58}};
54 } // end anonymous namespace
55
SystemZFrameLowering(StackDirection D,Align StackAl,int LAO,Align TransAl,bool StackReal)56 SystemZFrameLowering::SystemZFrameLowering(StackDirection D, Align StackAl,
57 int LAO, Align TransAl,
58 bool StackReal)
59 : TargetFrameLowering(D, StackAl, LAO, TransAl, StackReal) {}
60
61 std::unique_ptr<SystemZFrameLowering>
create(const SystemZSubtarget & STI)62 SystemZFrameLowering::create(const SystemZSubtarget &STI) {
63 if (STI.isTargetXPLINK64())
64 return std::make_unique<SystemZXPLINKFrameLowering>();
65 return std::make_unique<SystemZELFFrameLowering>();
66 }
67
eliminateCallFramePseudoInstr(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MI) const68 MachineBasicBlock::iterator SystemZFrameLowering::eliminateCallFramePseudoInstr(
69 MachineFunction &MF, MachineBasicBlock &MBB,
70 MachineBasicBlock::iterator MI) const {
71 switch (MI->getOpcode()) {
72 case SystemZ::ADJCALLSTACKDOWN:
73 case SystemZ::ADJCALLSTACKUP:
74 assert(hasReservedCallFrame(MF) &&
75 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
76 return MBB.erase(MI);
77 break;
78
79 default:
80 llvm_unreachable("Unexpected call frame instruction");
81 }
82 }
83
84 namespace {
85 struct SZFrameSortingObj {
86 bool IsValid = false; // True if we care about this Object.
87 uint32_t ObjectIndex = 0; // Index of Object into MFI list.
88 uint64_t ObjectSize = 0; // Size of Object in bytes.
89 uint32_t D12Count = 0; // 12-bit displacement only.
90 uint32_t DPairCount = 0; // 12 or 20 bit displacement.
91 };
92 typedef std::vector<SZFrameSortingObj> SZFrameObjVec;
93 } // namespace
94
95 // TODO: Move to base class.
orderFrameObjects(const MachineFunction & MF,SmallVectorImpl<int> & ObjectsToAllocate) const96 void SystemZELFFrameLowering::orderFrameObjects(
97 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
98 const MachineFrameInfo &MFI = MF.getFrameInfo();
99 auto *TII = MF.getSubtarget<SystemZSubtarget>().getInstrInfo();
100
101 // Make a vector of sorting objects to track all MFI objects and mark those
102 // to be sorted as valid.
103 if (ObjectsToAllocate.size() <= 1)
104 return;
105 SZFrameObjVec SortingObjects(MFI.getObjectIndexEnd());
106 for (auto &Obj : ObjectsToAllocate) {
107 SortingObjects[Obj].IsValid = true;
108 SortingObjects[Obj].ObjectIndex = Obj;
109 SortingObjects[Obj].ObjectSize = MFI.getObjectSize(Obj);
110 }
111
112 // Examine uses for each object and record short (12-bit) and "pair"
113 // displacement types.
114 for (auto &MBB : MF)
115 for (auto &MI : MBB) {
116 if (MI.isDebugInstr())
117 continue;
118 for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
119 const MachineOperand &MO = MI.getOperand(I);
120 if (!MO.isFI())
121 continue;
122 int Index = MO.getIndex();
123 if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
124 SortingObjects[Index].IsValid) {
125 if (TII->hasDisplacementPairInsn(MI.getOpcode()))
126 SortingObjects[Index].DPairCount++;
127 else if (!(MI.getDesc().TSFlags & SystemZII::Has20BitOffset))
128 SortingObjects[Index].D12Count++;
129 }
130 }
131 }
132
133 // Sort all objects for short/paired displacements, which should be
134 // sufficient as it seems like all frame objects typically are within the
135 // long displacement range. Sorting works by computing the "density" as
136 // Count / ObjectSize. The comparisons of two such fractions are refactored
137 // by multiplying both sides with A.ObjectSize * B.ObjectSize, in order to
138 // eliminate the (fp) divisions. A higher density object needs to go after
139 // in the list in order for it to end up lower on the stack.
140 auto CmpD12 = [](const SZFrameSortingObj &A, const SZFrameSortingObj &B) {
141 // Put all invalid and variable sized objects at the end.
142 if (!A.IsValid || !B.IsValid)
143 return A.IsValid;
144 if (!A.ObjectSize || !B.ObjectSize)
145 return A.ObjectSize > 0;
146 uint64_t ADensityCmp = A.D12Count * B.ObjectSize;
147 uint64_t BDensityCmp = B.D12Count * A.ObjectSize;
148 if (ADensityCmp != BDensityCmp)
149 return ADensityCmp < BDensityCmp;
150 return A.DPairCount * B.ObjectSize < B.DPairCount * A.ObjectSize;
151 };
152 std::stable_sort(SortingObjects.begin(), SortingObjects.end(), CmpD12);
153
154 // Now modify the original list to represent the final order that
155 // we want.
156 unsigned Idx = 0;
157 for (auto &Obj : SortingObjects) {
158 // All invalid items are sorted at the end, so it's safe to stop.
159 if (!Obj.IsValid)
160 break;
161 ObjectsToAllocate[Idx++] = Obj.ObjectIndex;
162 }
163 }
164
hasReservedCallFrame(const MachineFunction & MF) const165 bool SystemZFrameLowering::hasReservedCallFrame(
166 const MachineFunction &MF) const {
167 // The ELF ABI requires us to allocate 160 bytes of stack space for the
168 // callee, with any outgoing stack arguments being placed above that. It
169 // seems better to make that area a permanent feature of the frame even if
170 // we're using a frame pointer. Similarly, 64-bit XPLINK requires 96 bytes
171 // of stack space for the register save area.
172 return true;
173 }
174
assignCalleeSavedSpillSlots(MachineFunction & MF,const TargetRegisterInfo * TRI,std::vector<CalleeSavedInfo> & CSI) const175 bool SystemZELFFrameLowering::assignCalleeSavedSpillSlots(
176 MachineFunction &MF, const TargetRegisterInfo *TRI,
177 std::vector<CalleeSavedInfo> &CSI) const {
178 SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
179 MachineFrameInfo &MFFrame = MF.getFrameInfo();
180 bool IsVarArg = MF.getFunction().isVarArg();
181 if (CSI.empty())
182 return true; // Early exit if no callee saved registers are modified!
183
184 unsigned LowGPR = 0;
185 unsigned HighGPR = SystemZ::R15D;
186 int StartSPOffset = SystemZMC::ELFCallFrameSize;
187 for (auto &CS : CSI) {
188 Register Reg = CS.getReg();
189 int Offset = getRegSpillOffset(MF, Reg);
190 if (Offset) {
191 if (SystemZ::GR64BitRegClass.contains(Reg) && StartSPOffset > Offset) {
192 LowGPR = Reg;
193 StartSPOffset = Offset;
194 }
195 Offset -= SystemZMC::ELFCallFrameSize;
196 int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset);
197 CS.setFrameIdx(FrameIdx);
198 } else
199 CS.setFrameIdx(INT32_MAX);
200 }
201
202 // Save the range of call-saved registers, for use by the
203 // prologue/epilogue inserters.
204 ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset);
205 if (IsVarArg) {
206 // Also save the GPR varargs, if any. R6D is call-saved, so would
207 // already be included, but we also need to handle the call-clobbered
208 // argument registers.
209 Register FirstGPR = ZFI->getVarArgsFirstGPR();
210 if (FirstGPR < SystemZ::ELFNumArgGPRs) {
211 unsigned Reg = SystemZ::ELFArgGPRs[FirstGPR];
212 int Offset = getRegSpillOffset(MF, Reg);
213 if (StartSPOffset > Offset) {
214 LowGPR = Reg; StartSPOffset = Offset;
215 }
216 }
217 }
218 ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset);
219
220 // Create fixed stack objects for the remaining registers.
221 int CurrOffset = -SystemZMC::ELFCallFrameSize;
222 if (usePackedStack(MF))
223 CurrOffset += StartSPOffset;
224
225 for (auto &CS : CSI) {
226 if (CS.getFrameIdx() != INT32_MAX)
227 continue;
228 Register Reg = CS.getReg();
229 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
230 unsigned Size = TRI->getSpillSize(*RC);
231 CurrOffset -= Size;
232 assert(CurrOffset % 8 == 0 &&
233 "8-byte alignment required for for all register save slots");
234 int FrameIdx = MFFrame.CreateFixedSpillStackObject(Size, CurrOffset);
235 CS.setFrameIdx(FrameIdx);
236 }
237
238 return true;
239 }
240
determineCalleeSaves(MachineFunction & MF,BitVector & SavedRegs,RegScavenger * RS) const241 void SystemZELFFrameLowering::determineCalleeSaves(MachineFunction &MF,
242 BitVector &SavedRegs,
243 RegScavenger *RS) const {
244 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
245
246 MachineFrameInfo &MFFrame = MF.getFrameInfo();
247 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
248 bool HasFP = hasFP(MF);
249 SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
250 bool IsVarArg = MF.getFunction().isVarArg();
251
252 // va_start stores incoming FPR varargs in the normal way, but delegates
253 // the saving of incoming GPR varargs to spillCalleeSavedRegisters().
254 // Record these pending uses, which typically include the call-saved
255 // argument register R6D.
256 if (IsVarArg)
257 for (unsigned I = MFI->getVarArgsFirstGPR(); I < SystemZ::ELFNumArgGPRs; ++I)
258 SavedRegs.set(SystemZ::ELFArgGPRs[I]);
259
260 // If there are any landing pads, entering them will modify r6/r7.
261 if (!MF.getLandingPads().empty()) {
262 SavedRegs.set(SystemZ::R6D);
263 SavedRegs.set(SystemZ::R7D);
264 }
265
266 // If the function requires a frame pointer, record that the hard
267 // frame pointer will be clobbered.
268 if (HasFP)
269 SavedRegs.set(SystemZ::R11D);
270
271 // If the function calls other functions, record that the return
272 // address register will be clobbered.
273 if (MFFrame.hasCalls())
274 SavedRegs.set(SystemZ::R14D);
275
276 // If we are saving GPRs other than the stack pointer, we might as well
277 // save and restore the stack pointer at the same time, via STMG and LMG.
278 // This allows the deallocation to be done by the LMG, rather than needing
279 // a separate %r15 addition.
280 const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
281 for (unsigned I = 0; CSRegs[I]; ++I) {
282 unsigned Reg = CSRegs[I];
283 if (SystemZ::GR64BitRegClass.contains(Reg) && SavedRegs.test(Reg)) {
284 SavedRegs.set(SystemZ::R15D);
285 break;
286 }
287 }
288 }
289
SystemZELFFrameLowering()290 SystemZELFFrameLowering::SystemZELFFrameLowering()
291 : SystemZFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), 0,
292 Align(8), /* StackRealignable */ false),
293 RegSpillOffsets(0) {
294
295 // Due to the SystemZ ABI, the DWARF CFA (Canonical Frame Address) is not
296 // equal to the incoming stack pointer, but to incoming stack pointer plus
297 // 160. Instead of using a Local Area Offset, the Register save area will
298 // be occupied by fixed frame objects, and all offsets are actually
299 // relative to CFA.
300
301 // Create a mapping from register number to save slot offset.
302 // These offsets are relative to the start of the register save area.
303 RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
304 for (const auto &Entry : ELFSpillOffsetTable)
305 RegSpillOffsets[Entry.Reg] = Entry.Offset;
306 }
307
308 // Add GPR64 to the save instruction being built by MIB, which is in basic
309 // block MBB. IsImplicit says whether this is an explicit operand to the
310 // instruction, or an implicit one that comes between the explicit start
311 // and end registers.
addSavedGPR(MachineBasicBlock & MBB,MachineInstrBuilder & MIB,unsigned GPR64,bool IsImplicit)312 static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB,
313 unsigned GPR64, bool IsImplicit) {
314 const TargetRegisterInfo *RI =
315 MBB.getParent()->getSubtarget().getRegisterInfo();
316 Register GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_l32);
317 bool IsLive = MBB.isLiveIn(GPR64) || MBB.isLiveIn(GPR32);
318 if (!IsLive || !IsImplicit) {
319 MIB.addReg(GPR64, getImplRegState(IsImplicit) | getKillRegState(!IsLive));
320 if (!IsLive)
321 MBB.addLiveIn(GPR64);
322 }
323 }
324
spillCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,ArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI) const325 bool SystemZELFFrameLowering::spillCalleeSavedRegisters(
326 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
327 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
328 if (CSI.empty())
329 return false;
330
331 MachineFunction &MF = *MBB.getParent();
332 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
333 SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
334 bool IsVarArg = MF.getFunction().isVarArg();
335 DebugLoc DL;
336
337 // Save GPRs
338 SystemZ::GPRRegs SpillGPRs = ZFI->getSpillGPRRegs();
339 if (SpillGPRs.LowGPR) {
340 assert(SpillGPRs.LowGPR != SpillGPRs.HighGPR &&
341 "Should be saving %r15 and something else");
342
343 // Build an STMG instruction.
344 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG));
345
346 // Add the explicit register operands.
347 addSavedGPR(MBB, MIB, SpillGPRs.LowGPR, false);
348 addSavedGPR(MBB, MIB, SpillGPRs.HighGPR, false);
349
350 // Add the address.
351 MIB.addReg(SystemZ::R15D).addImm(SpillGPRs.GPROffset);
352
353 // Make sure all call-saved GPRs are included as operands and are
354 // marked as live on entry.
355 for (const CalleeSavedInfo &I : CSI) {
356 Register Reg = I.getReg();
357 if (SystemZ::GR64BitRegClass.contains(Reg))
358 addSavedGPR(MBB, MIB, Reg, true);
359 }
360
361 // ...likewise GPR varargs.
362 if (IsVarArg)
363 for (unsigned I = ZFI->getVarArgsFirstGPR(); I < SystemZ::ELFNumArgGPRs; ++I)
364 addSavedGPR(MBB, MIB, SystemZ::ELFArgGPRs[I], true);
365 }
366
367 // Save FPRs/VRs in the normal TargetInstrInfo way.
368 for (const CalleeSavedInfo &I : CSI) {
369 Register Reg = I.getReg();
370 if (SystemZ::FP64BitRegClass.contains(Reg)) {
371 MBB.addLiveIn(Reg);
372 TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(),
373 &SystemZ::FP64BitRegClass, TRI, Register());
374 }
375 if (SystemZ::VR128BitRegClass.contains(Reg)) {
376 MBB.addLiveIn(Reg);
377 TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(),
378 &SystemZ::VR128BitRegClass, TRI, Register());
379 }
380 }
381
382 return true;
383 }
384
restoreCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MutableArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI) const385 bool SystemZELFFrameLowering::restoreCalleeSavedRegisters(
386 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
387 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
388 if (CSI.empty())
389 return false;
390
391 MachineFunction &MF = *MBB.getParent();
392 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
393 SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
394 bool HasFP = hasFP(MF);
395 DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
396
397 // Restore FPRs/VRs in the normal TargetInstrInfo way.
398 for (const CalleeSavedInfo &I : CSI) {
399 Register Reg = I.getReg();
400 if (SystemZ::FP64BitRegClass.contains(Reg))
401 TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(),
402 &SystemZ::FP64BitRegClass, TRI, Register());
403 if (SystemZ::VR128BitRegClass.contains(Reg))
404 TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(),
405 &SystemZ::VR128BitRegClass, TRI, Register());
406 }
407
408 // Restore call-saved GPRs (but not call-clobbered varargs, which at
409 // this point might hold return values).
410 SystemZ::GPRRegs RestoreGPRs = ZFI->getRestoreGPRRegs();
411 if (RestoreGPRs.LowGPR) {
412 // If we saved any of %r2-%r5 as varargs, we should also be saving
413 // and restoring %r6. If we're saving %r6 or above, we should be
414 // restoring it too.
415 assert(RestoreGPRs.LowGPR != RestoreGPRs.HighGPR &&
416 "Should be loading %r15 and something else");
417
418 // Build an LMG instruction.
419 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LMG));
420
421 // Add the explicit register operands.
422 MIB.addReg(RestoreGPRs.LowGPR, RegState::Define);
423 MIB.addReg(RestoreGPRs.HighGPR, RegState::Define);
424
425 // Add the address.
426 MIB.addReg(HasFP ? SystemZ::R11D : SystemZ::R15D);
427 MIB.addImm(RestoreGPRs.GPROffset);
428
429 // Do a second scan adding regs as being defined by instruction
430 for (const CalleeSavedInfo &I : CSI) {
431 Register Reg = I.getReg();
432 if (Reg != RestoreGPRs.LowGPR && Reg != RestoreGPRs.HighGPR &&
433 SystemZ::GR64BitRegClass.contains(Reg))
434 MIB.addReg(Reg, RegState::ImplicitDefine);
435 }
436 }
437
438 return true;
439 }
440
processFunctionBeforeFrameFinalized(MachineFunction & MF,RegScavenger * RS) const441 void SystemZELFFrameLowering::processFunctionBeforeFrameFinalized(
442 MachineFunction &MF, RegScavenger *RS) const {
443 MachineFrameInfo &MFFrame = MF.getFrameInfo();
444 SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
445 MachineRegisterInfo *MRI = &MF.getRegInfo();
446 bool BackChain = MF.getFunction().hasFnAttribute("backchain");
447
448 if (!usePackedStack(MF) || BackChain)
449 // Create the incoming register save area.
450 getOrCreateFramePointerSaveIndex(MF);
451
452 // Get the size of our stack frame to be allocated ...
453 uint64_t StackSize = (MFFrame.estimateStackSize(MF) +
454 SystemZMC::ELFCallFrameSize);
455 // ... and the maximum offset we may need to reach into the
456 // caller's frame to access the save area or stack arguments.
457 int64_t MaxArgOffset = 0;
458 for (int I = MFFrame.getObjectIndexBegin(); I != 0; ++I)
459 if (MFFrame.getObjectOffset(I) >= 0) {
460 int64_t ArgOffset = MFFrame.getObjectOffset(I) +
461 MFFrame.getObjectSize(I);
462 MaxArgOffset = std::max(MaxArgOffset, ArgOffset);
463 }
464
465 uint64_t MaxReach = StackSize + MaxArgOffset;
466 if (!isUInt<12>(MaxReach)) {
467 // We may need register scavenging slots if some parts of the frame
468 // are outside the reach of an unsigned 12-bit displacement.
469 // Create 2 for the case where both addresses in an MVC are
470 // out of range.
471 RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, Align(8), false));
472 RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, Align(8), false));
473 }
474
475 // If R6 is used as an argument register it is still callee saved. If it in
476 // this case is not clobbered (and restored) it should never be marked as
477 // killed.
478 if (MF.front().isLiveIn(SystemZ::R6D) &&
479 ZFI->getRestoreGPRRegs().LowGPR != SystemZ::R6D)
480 for (auto &MO : MRI->use_nodbg_operands(SystemZ::R6D))
481 MO.setIsKill(false);
482 }
483
484 // Emit instructions before MBBI (in MBB) to add NumBytes to Reg.
emitIncrement(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,const DebugLoc & DL,Register Reg,int64_t NumBytes,const TargetInstrInfo * TII)485 static void emitIncrement(MachineBasicBlock &MBB,
486 MachineBasicBlock::iterator &MBBI, const DebugLoc &DL,
487 Register Reg, int64_t NumBytes,
488 const TargetInstrInfo *TII) {
489 while (NumBytes) {
490 unsigned Opcode;
491 int64_t ThisVal = NumBytes;
492 if (isInt<16>(NumBytes))
493 Opcode = SystemZ::AGHI;
494 else {
495 Opcode = SystemZ::AGFI;
496 // Make sure we maintain 8-byte stack alignment.
497 int64_t MinVal = -uint64_t(1) << 31;
498 int64_t MaxVal = (int64_t(1) << 31) - 8;
499 if (ThisVal < MinVal)
500 ThisVal = MinVal;
501 else if (ThisVal > MaxVal)
502 ThisVal = MaxVal;
503 }
504 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII->get(Opcode), Reg)
505 .addReg(Reg).addImm(ThisVal);
506 // The CC implicit def is dead.
507 MI->getOperand(3).setIsDead();
508 NumBytes -= ThisVal;
509 }
510 }
511
512 // Add CFI for the new CFA offset.
buildCFAOffs(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,int Offset,const SystemZInstrInfo * ZII)513 static void buildCFAOffs(MachineBasicBlock &MBB,
514 MachineBasicBlock::iterator MBBI,
515 const DebugLoc &DL, int Offset,
516 const SystemZInstrInfo *ZII) {
517 unsigned CFIIndex = MBB.getParent()->addFrameInst(
518 MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset));
519 BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
520 .addCFIIndex(CFIIndex);
521 }
522
523 // Add CFI for the new frame location.
buildDefCFAReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,unsigned Reg,const SystemZInstrInfo * ZII)524 static void buildDefCFAReg(MachineBasicBlock &MBB,
525 MachineBasicBlock::iterator MBBI,
526 const DebugLoc &DL, unsigned Reg,
527 const SystemZInstrInfo *ZII) {
528 MachineFunction &MF = *MBB.getParent();
529 MachineModuleInfo &MMI = MF.getMMI();
530 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
531 unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
532 unsigned CFIIndex = MF.addFrameInst(
533 MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
534 BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
535 .addCFIIndex(CFIIndex);
536 }
537
emitPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const538 void SystemZELFFrameLowering::emitPrologue(MachineFunction &MF,
539 MachineBasicBlock &MBB) const {
540 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
541 const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>();
542 const SystemZTargetLowering &TLI = *STI.getTargetLowering();
543 MachineFrameInfo &MFFrame = MF.getFrameInfo();
544 auto *ZII = static_cast<const SystemZInstrInfo *>(STI.getInstrInfo());
545 SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
546 MachineBasicBlock::iterator MBBI = MBB.begin();
547 MachineModuleInfo &MMI = MF.getMMI();
548 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
549 const std::vector<CalleeSavedInfo> &CSI = MFFrame.getCalleeSavedInfo();
550 bool HasFP = hasFP(MF);
551
552 // In GHC calling convention C stack space, including the ABI-defined
553 // 160-byte base area, is (de)allocated by GHC itself. This stack space may
554 // be used by LLVM as spill slots for the tail recursive GHC functions. Thus
555 // do not allocate stack space here, too.
556 if (MF.getFunction().getCallingConv() == CallingConv::GHC) {
557 if (MFFrame.getStackSize() > 2048 * sizeof(long)) {
558 report_fatal_error(
559 "Pre allocated stack space for GHC function is too small");
560 }
561 if (HasFP) {
562 report_fatal_error(
563 "In GHC calling convention a frame pointer is not supported");
564 }
565 MFFrame.setStackSize(MFFrame.getStackSize() + SystemZMC::ELFCallFrameSize);
566 return;
567 }
568
569 // Debug location must be unknown since the first debug location is used
570 // to determine the end of the prologue.
571 DebugLoc DL;
572
573 // The current offset of the stack pointer from the CFA.
574 int64_t SPOffsetFromCFA = -SystemZMC::ELFCFAOffsetFromInitialSP;
575
576 if (ZFI->getSpillGPRRegs().LowGPR) {
577 // Skip over the GPR saves.
578 if (MBBI != MBB.end() && MBBI->getOpcode() == SystemZ::STMG)
579 ++MBBI;
580 else
581 llvm_unreachable("Couldn't skip over GPR saves");
582
583 // Add CFI for the GPR saves.
584 for (auto &Save : CSI) {
585 Register Reg = Save.getReg();
586 if (SystemZ::GR64BitRegClass.contains(Reg)) {
587 int FI = Save.getFrameIdx();
588 int64_t Offset = MFFrame.getObjectOffset(FI);
589 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
590 nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
591 BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
592 .addCFIIndex(CFIIndex);
593 }
594 }
595 }
596
597 uint64_t StackSize = MFFrame.getStackSize();
598 // We need to allocate the ABI-defined 160-byte base area whenever
599 // we allocate stack space for our own use and whenever we call another
600 // function.
601 bool HasStackObject = false;
602 for (unsigned i = 0, e = MFFrame.getObjectIndexEnd(); i != e; ++i)
603 if (!MFFrame.isDeadObjectIndex(i)) {
604 HasStackObject = true;
605 break;
606 }
607 if (HasStackObject || MFFrame.hasCalls())
608 StackSize += SystemZMC::ELFCallFrameSize;
609 // Don't allocate the incoming reg save area.
610 StackSize = StackSize > SystemZMC::ELFCallFrameSize
611 ? StackSize - SystemZMC::ELFCallFrameSize
612 : 0;
613 MFFrame.setStackSize(StackSize);
614
615 if (StackSize) {
616 // Allocate StackSize bytes.
617 int64_t Delta = -int64_t(StackSize);
618 const unsigned ProbeSize = TLI.getStackProbeSize(MF);
619 bool FreeProbe = (ZFI->getSpillGPRRegs().GPROffset &&
620 (ZFI->getSpillGPRRegs().GPROffset + StackSize) < ProbeSize);
621 if (!FreeProbe &&
622 MF.getSubtarget().getTargetLowering()->hasInlineStackProbe(MF)) {
623 // Stack probing may involve looping, but splitting the prologue block
624 // is not possible at this point since it would invalidate the
625 // SaveBlocks / RestoreBlocks sets of PEI in the single block function
626 // case. Build a pseudo to be handled later by inlineStackProbe().
627 BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::PROBED_STACKALLOC))
628 .addImm(StackSize);
629 }
630 else {
631 bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
632 // If we need backchain, save current stack pointer. R1 is free at
633 // this point.
634 if (StoreBackchain)
635 BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR))
636 .addReg(SystemZ::R1D, RegState::Define).addReg(SystemZ::R15D);
637 emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
638 buildCFAOffs(MBB, MBBI, DL, SPOffsetFromCFA + Delta, ZII);
639 if (StoreBackchain)
640 BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG))
641 .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D)
642 .addImm(getBackchainOffset(MF)).addReg(0);
643 }
644 SPOffsetFromCFA += Delta;
645 }
646
647 if (HasFP) {
648 // Copy the base of the frame to R11.
649 BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R11D)
650 .addReg(SystemZ::R15D);
651
652 // Add CFI for the new frame location.
653 buildDefCFAReg(MBB, MBBI, DL, SystemZ::R11D, ZII);
654
655 // Mark the FramePtr as live at the beginning of every block except
656 // the entry block. (We'll have marked R11 as live on entry when
657 // saving the GPRs.)
658 for (MachineBasicBlock &MBBJ : llvm::drop_begin(MF))
659 MBBJ.addLiveIn(SystemZ::R11D);
660 }
661
662 // Skip over the FPR/VR saves.
663 SmallVector<unsigned, 8> CFIIndexes;
664 for (auto &Save : CSI) {
665 Register Reg = Save.getReg();
666 if (SystemZ::FP64BitRegClass.contains(Reg)) {
667 if (MBBI != MBB.end() &&
668 (MBBI->getOpcode() == SystemZ::STD ||
669 MBBI->getOpcode() == SystemZ::STDY))
670 ++MBBI;
671 else
672 llvm_unreachable("Couldn't skip over FPR save");
673 } else if (SystemZ::VR128BitRegClass.contains(Reg)) {
674 if (MBBI != MBB.end() &&
675 MBBI->getOpcode() == SystemZ::VST)
676 ++MBBI;
677 else
678 llvm_unreachable("Couldn't skip over VR save");
679 } else
680 continue;
681
682 // Add CFI for the this save.
683 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
684 Register IgnoredFrameReg;
685 int64_t Offset =
686 getFrameIndexReference(MF, Save.getFrameIdx(), IgnoredFrameReg)
687 .getFixed();
688
689 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
690 nullptr, DwarfReg, SPOffsetFromCFA + Offset));
691 CFIIndexes.push_back(CFIIndex);
692 }
693 // Complete the CFI for the FPR/VR saves, modelling them as taking effect
694 // after the last save.
695 for (auto CFIIndex : CFIIndexes) {
696 BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
697 .addCFIIndex(CFIIndex);
698 }
699 }
700
emitEpilogue(MachineFunction & MF,MachineBasicBlock & MBB) const701 void SystemZELFFrameLowering::emitEpilogue(MachineFunction &MF,
702 MachineBasicBlock &MBB) const {
703 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
704 auto *ZII =
705 static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
706 SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
707 MachineFrameInfo &MFFrame = MF.getFrameInfo();
708
709 // See SystemZELFFrameLowering::emitPrologue
710 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
711 return;
712
713 // Skip the return instruction.
714 assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks");
715
716 uint64_t StackSize = MFFrame.getStackSize();
717 if (ZFI->getRestoreGPRRegs().LowGPR) {
718 --MBBI;
719 unsigned Opcode = MBBI->getOpcode();
720 if (Opcode != SystemZ::LMG)
721 llvm_unreachable("Expected to see callee-save register restore code");
722
723 unsigned AddrOpNo = 2;
724 DebugLoc DL = MBBI->getDebugLoc();
725 uint64_t Offset = StackSize + MBBI->getOperand(AddrOpNo + 1).getImm();
726 unsigned NewOpcode = ZII->getOpcodeForOffset(Opcode, Offset);
727
728 // If the offset is too large, use the largest stack-aligned offset
729 // and add the rest to the base register (the stack or frame pointer).
730 if (!NewOpcode) {
731 uint64_t NumBytes = Offset - 0x7fff8;
732 emitIncrement(MBB, MBBI, DL, MBBI->getOperand(AddrOpNo).getReg(),
733 NumBytes, ZII);
734 Offset -= NumBytes;
735 NewOpcode = ZII->getOpcodeForOffset(Opcode, Offset);
736 assert(NewOpcode && "No restore instruction available");
737 }
738
739 MBBI->setDesc(ZII->get(NewOpcode));
740 MBBI->getOperand(AddrOpNo + 1).ChangeToImmediate(Offset);
741 } else if (StackSize) {
742 DebugLoc DL = MBBI->getDebugLoc();
743 emitIncrement(MBB, MBBI, DL, SystemZ::R15D, StackSize, ZII);
744 }
745 }
746
inlineStackProbe(MachineFunction & MF,MachineBasicBlock & PrologMBB) const747 void SystemZELFFrameLowering::inlineStackProbe(
748 MachineFunction &MF, MachineBasicBlock &PrologMBB) const {
749 auto *ZII =
750 static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
751 const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>();
752 const SystemZTargetLowering &TLI = *STI.getTargetLowering();
753
754 MachineInstr *StackAllocMI = nullptr;
755 for (MachineInstr &MI : PrologMBB)
756 if (MI.getOpcode() == SystemZ::PROBED_STACKALLOC) {
757 StackAllocMI = &MI;
758 break;
759 }
760 if (StackAllocMI == nullptr)
761 return;
762 uint64_t StackSize = StackAllocMI->getOperand(0).getImm();
763 const unsigned ProbeSize = TLI.getStackProbeSize(MF);
764 uint64_t NumFullBlocks = StackSize / ProbeSize;
765 uint64_t Residual = StackSize % ProbeSize;
766 int64_t SPOffsetFromCFA = -SystemZMC::ELFCFAOffsetFromInitialSP;
767 MachineBasicBlock *MBB = &PrologMBB;
768 MachineBasicBlock::iterator MBBI = StackAllocMI;
769 const DebugLoc DL = StackAllocMI->getDebugLoc();
770
771 // Allocate a block of Size bytes on the stack and probe it.
772 auto allocateAndProbe = [&](MachineBasicBlock &InsMBB,
773 MachineBasicBlock::iterator InsPt, unsigned Size,
774 bool EmitCFI) -> void {
775 emitIncrement(InsMBB, InsPt, DL, SystemZ::R15D, -int64_t(Size), ZII);
776 if (EmitCFI) {
777 SPOffsetFromCFA -= Size;
778 buildCFAOffs(InsMBB, InsPt, DL, SPOffsetFromCFA, ZII);
779 }
780 // Probe by means of a volatile compare.
781 MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(),
782 MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1));
783 BuildMI(InsMBB, InsPt, DL, ZII->get(SystemZ::CG))
784 .addReg(SystemZ::R0D, RegState::Undef)
785 .addReg(SystemZ::R15D).addImm(Size - 8).addReg(0)
786 .addMemOperand(MMO);
787 };
788
789 bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
790 if (StoreBackchain)
791 BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR))
792 .addReg(SystemZ::R1D, RegState::Define).addReg(SystemZ::R15D);
793
794 MachineBasicBlock *DoneMBB = nullptr;
795 MachineBasicBlock *LoopMBB = nullptr;
796 if (NumFullBlocks < 3) {
797 // Emit unrolled probe statements.
798 for (unsigned int i = 0; i < NumFullBlocks; i++)
799 allocateAndProbe(*MBB, MBBI, ProbeSize, true/*EmitCFI*/);
800 } else {
801 // Emit a loop probing the pages.
802 uint64_t LoopAlloc = ProbeSize * NumFullBlocks;
803 SPOffsetFromCFA -= LoopAlloc;
804
805 // Use R0D to hold the exit value.
806 BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R0D)
807 .addReg(SystemZ::R15D);
808 buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R0D, ZII);
809 emitIncrement(*MBB, MBBI, DL, SystemZ::R0D, -int64_t(LoopAlloc), ZII);
810 buildCFAOffs(*MBB, MBBI, DL, -int64_t(SystemZMC::ELFCallFrameSize + LoopAlloc),
811 ZII);
812
813 DoneMBB = SystemZ::splitBlockBefore(MBBI, MBB);
814 LoopMBB = SystemZ::emitBlockAfter(MBB);
815 MBB->addSuccessor(LoopMBB);
816 LoopMBB->addSuccessor(LoopMBB);
817 LoopMBB->addSuccessor(DoneMBB);
818
819 MBB = LoopMBB;
820 allocateAndProbe(*MBB, MBB->end(), ProbeSize, false/*EmitCFI*/);
821 BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::CLGR))
822 .addReg(SystemZ::R15D).addReg(SystemZ::R0D);
823 BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::BRC))
824 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_GT).addMBB(MBB);
825
826 MBB = DoneMBB;
827 MBBI = DoneMBB->begin();
828 buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R15D, ZII);
829 }
830
831 if (Residual)
832 allocateAndProbe(*MBB, MBBI, Residual, true/*EmitCFI*/);
833
834 if (StoreBackchain)
835 BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::STG))
836 .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D)
837 .addImm(getBackchainOffset(MF)).addReg(0);
838
839 StackAllocMI->eraseFromParent();
840 if (DoneMBB != nullptr) {
841 // Compute the live-in lists for the new blocks.
842 recomputeLiveIns(*DoneMBB);
843 recomputeLiveIns(*LoopMBB);
844 }
845 }
846
hasFP(const MachineFunction & MF) const847 bool SystemZELFFrameLowering::hasFP(const MachineFunction &MF) const {
848 return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
849 MF.getFrameInfo().hasVarSizedObjects());
850 }
851
getFrameIndexReference(const MachineFunction & MF,int FI,Register & FrameReg) const852 StackOffset SystemZELFFrameLowering::getFrameIndexReference(
853 const MachineFunction &MF, int FI, Register &FrameReg) const {
854 // Our incoming SP is actually SystemZMC::ELFCallFrameSize below the CFA, so
855 // add that difference here.
856 StackOffset Offset =
857 TargetFrameLowering::getFrameIndexReference(MF, FI, FrameReg);
858 return Offset + StackOffset::getFixed(SystemZMC::ELFCallFrameSize);
859 }
860
getRegSpillOffset(MachineFunction & MF,Register Reg) const861 unsigned SystemZELFFrameLowering::getRegSpillOffset(MachineFunction &MF,
862 Register Reg) const {
863 bool IsVarArg = MF.getFunction().isVarArg();
864 bool BackChain = MF.getFunction().hasFnAttribute("backchain");
865 bool SoftFloat = MF.getSubtarget<SystemZSubtarget>().hasSoftFloat();
866 unsigned Offset = RegSpillOffsets[Reg];
867 if (usePackedStack(MF) && !(IsVarArg && !SoftFloat)) {
868 if (SystemZ::GR64BitRegClass.contains(Reg))
869 // Put all GPRs at the top of the Register save area with packed
870 // stack. Make room for the backchain if needed.
871 Offset += BackChain ? 24 : 32;
872 else
873 Offset = 0;
874 }
875 return Offset;
876 }
877
getOrCreateFramePointerSaveIndex(MachineFunction & MF) const878 int SystemZELFFrameLowering::getOrCreateFramePointerSaveIndex(
879 MachineFunction &MF) const {
880 SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
881 int FI = ZFI->getFramePointerSaveIndex();
882 if (!FI) {
883 MachineFrameInfo &MFFrame = MF.getFrameInfo();
884 int Offset = getBackchainOffset(MF) - SystemZMC::ELFCallFrameSize;
885 FI = MFFrame.CreateFixedObject(8, Offset, false);
886 ZFI->setFramePointerSaveIndex(FI);
887 }
888 return FI;
889 }
890
usePackedStack(MachineFunction & MF) const891 bool SystemZELFFrameLowering::usePackedStack(MachineFunction &MF) const {
892 bool HasPackedStackAttr = MF.getFunction().hasFnAttribute("packed-stack");
893 bool BackChain = MF.getFunction().hasFnAttribute("backchain");
894 bool SoftFloat = MF.getSubtarget<SystemZSubtarget>().hasSoftFloat();
895 if (HasPackedStackAttr && BackChain && !SoftFloat)
896 report_fatal_error("packed-stack + backchain + hard-float is unsupported.");
897 bool CallConv = MF.getFunction().getCallingConv() != CallingConv::GHC;
898 return HasPackedStackAttr && CallConv;
899 }
900
SystemZXPLINKFrameLowering()901 SystemZXPLINKFrameLowering::SystemZXPLINKFrameLowering()
902 : SystemZFrameLowering(TargetFrameLowering::StackGrowsDown, Align(32), 0,
903 Align(32), /* StackRealignable */ false),
904 RegSpillOffsets(-1) {
905
906 // Create a mapping from register number to save slot offset.
907 // These offsets are relative to the start of the local are area.
908 RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
909 for (const auto &Entry : XPLINKSpillOffsetTable)
910 RegSpillOffsets[Entry.Reg] = Entry.Offset;
911 }
912
913 // Checks if the function is a potential candidate for being a XPLeaf routine.
isXPLeafCandidate(const MachineFunction & MF)914 static bool isXPLeafCandidate(const MachineFunction &MF) {
915 const MachineFrameInfo &MFFrame = MF.getFrameInfo();
916 const MachineRegisterInfo &MRI = MF.getRegInfo();
917 const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
918 auto *Regs =
919 static_cast<SystemZXPLINK64Registers *>(Subtarget.getSpecialRegisters());
920
921 // If function calls other functions including alloca, then it is not a XPLeaf
922 // routine.
923 if (MFFrame.hasCalls())
924 return false;
925
926 // If the function has var Sized Objects, then it is not a XPLeaf routine.
927 if (MFFrame.hasVarSizedObjects())
928 return false;
929
930 // If the function adjusts the stack, then it is not a XPLeaf routine.
931 if (MFFrame.adjustsStack())
932 return false;
933
934 // If function modifies the stack pointer register, then it is not a XPLeaf
935 // routine.
936 if (MRI.isPhysRegModified(Regs->getStackPointerRegister()))
937 return false;
938
939 // If function modifies the ADA register, then it is not a XPLeaf routine.
940 if (MRI.isPhysRegModified(Regs->getAddressOfCalleeRegister()))
941 return false;
942
943 // If function modifies the return address register, then it is not a XPLeaf
944 // routine.
945 if (MRI.isPhysRegModified(Regs->getReturnFunctionAddressRegister()))
946 return false;
947
948 // If the backchain pointer should be stored, then it is not a XPLeaf routine.
949 if (MF.getFunction().hasFnAttribute("backchain"))
950 return false;
951
952 // If function acquires its own stack frame, then it is not a XPLeaf routine.
953 // At the time this function is called, only slots for local variables are
954 // allocated, so this is a very rough estimate.
955 if (MFFrame.estimateStackSize(MF) > 0)
956 return false;
957
958 return true;
959 }
960
assignCalleeSavedSpillSlots(MachineFunction & MF,const TargetRegisterInfo * TRI,std::vector<CalleeSavedInfo> & CSI) const961 bool SystemZXPLINKFrameLowering::assignCalleeSavedSpillSlots(
962 MachineFunction &MF, const TargetRegisterInfo *TRI,
963 std::vector<CalleeSavedInfo> &CSI) const {
964 MachineFrameInfo &MFFrame = MF.getFrameInfo();
965 SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
966 const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
967 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
968 auto &GRRegClass = SystemZ::GR64BitRegClass;
969
970 // At this point, the result of isXPLeafCandidate() is not accurate because
971 // the size of the save area has not yet been determined. If
972 // isXPLeafCandidate() indicates a potential leaf function, and there are no
973 // callee-save registers, then it is indeed a leaf function, and we can early
974 // exit.
975 // TODO: It is possible for leaf functions to use callee-saved registers.
976 // It can use the 0-2k range between R4 and the caller's stack frame without
977 // acquiring its own stack frame.
978 bool IsLeaf = CSI.empty() && isXPLeafCandidate(MF);
979 if (IsLeaf)
980 return true;
981
982 // For non-leaf functions:
983 // - the address of callee (entry point) register R6 must be saved
984 CSI.push_back(CalleeSavedInfo(Regs.getAddressOfCalleeRegister()));
985 CSI.back().setRestored(false);
986
987 // The return address register R7 must be saved and restored.
988 CSI.push_back(CalleeSavedInfo(Regs.getReturnFunctionAddressRegister()));
989
990 // If the function needs a frame pointer, or if the backchain pointer should
991 // be stored, then save the stack pointer register R4.
992 if (hasFP(MF) || MF.getFunction().hasFnAttribute("backchain"))
993 CSI.push_back(CalleeSavedInfo(Regs.getStackPointerRegister()));
994
995 // Scan the call-saved GPRs and find the bounds of the register spill area.
996 Register LowRestoreGPR = 0;
997 int LowRestoreOffset = INT32_MAX;
998 Register LowSpillGPR = 0;
999 int LowSpillOffset = INT32_MAX;
1000 Register HighGPR = 0;
1001 int HighOffset = -1;
1002
1003 for (auto &CS : CSI) {
1004 Register Reg = CS.getReg();
1005 int Offset = RegSpillOffsets[Reg];
1006 if (Offset >= 0) {
1007 if (GRRegClass.contains(Reg)) {
1008 if (LowSpillOffset > Offset) {
1009 LowSpillOffset = Offset;
1010 LowSpillGPR = Reg;
1011 }
1012 if (CS.isRestored() && LowRestoreOffset > Offset) {
1013 LowRestoreOffset = Offset;
1014 LowRestoreGPR = Reg;
1015 }
1016
1017 if (Offset > HighOffset) {
1018 HighOffset = Offset;
1019 HighGPR = Reg;
1020 }
1021 // Non-volatile GPRs are saved in the dedicated register save area at
1022 // the bottom of the stack and are not truly part of the "normal" stack
1023 // frame. Mark the frame index as NoAlloc to indicate it as such.
1024 unsigned RegSize = 8;
1025 int FrameIdx = MFFrame.CreateFixedSpillStackObject(RegSize, Offset);
1026 CS.setFrameIdx(FrameIdx);
1027 MFFrame.setStackID(FrameIdx, TargetStackID::NoAlloc);
1028 }
1029 } else {
1030 Register Reg = CS.getReg();
1031 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
1032 Align Alignment = TRI->getSpillAlign(*RC);
1033 unsigned Size = TRI->getSpillSize(*RC);
1034 Alignment = std::min(Alignment, getStackAlign());
1035 int FrameIdx = MFFrame.CreateStackObject(Size, Alignment, true);
1036 CS.setFrameIdx(FrameIdx);
1037 }
1038 }
1039
1040 // Save the range of call-saved registers, for use by the
1041 // prologue/epilogue inserters.
1042 if (LowRestoreGPR)
1043 MFI->setRestoreGPRRegs(LowRestoreGPR, HighGPR, LowRestoreOffset);
1044
1045 // Save the range of call-saved registers, for use by the epilogue inserter.
1046 assert(LowSpillGPR && "Expected registers to spill");
1047 MFI->setSpillGPRRegs(LowSpillGPR, HighGPR, LowSpillOffset);
1048
1049 return true;
1050 }
1051
determineCalleeSaves(MachineFunction & MF,BitVector & SavedRegs,RegScavenger * RS) const1052 void SystemZXPLINKFrameLowering::determineCalleeSaves(MachineFunction &MF,
1053 BitVector &SavedRegs,
1054 RegScavenger *RS) const {
1055 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1056
1057 bool HasFP = hasFP(MF);
1058 const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
1059 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
1060
1061 // If the function requires a frame pointer, record that the hard
1062 // frame pointer will be clobbered.
1063 if (HasFP)
1064 SavedRegs.set(Regs.getFramePointerRegister());
1065 }
1066
spillCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,ArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI) const1067 bool SystemZXPLINKFrameLowering::spillCalleeSavedRegisters(
1068 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1069 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
1070 if (CSI.empty())
1071 return true;
1072
1073 MachineFunction &MF = *MBB.getParent();
1074 SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
1075 const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
1076 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1077 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
1078 SystemZ::GPRRegs SpillGPRs = ZFI->getSpillGPRRegs();
1079 DebugLoc DL;
1080
1081 // Save GPRs
1082 if (SpillGPRs.LowGPR) {
1083 assert(SpillGPRs.LowGPR != SpillGPRs.HighGPR &&
1084 "Should be saving multiple registers");
1085
1086 // Build an STM/STMG instruction.
1087 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG));
1088
1089 // Add the explicit register operands.
1090 addSavedGPR(MBB, MIB, SpillGPRs.LowGPR, false);
1091 addSavedGPR(MBB, MIB, SpillGPRs.HighGPR, false);
1092
1093 // Add the address r4
1094 MIB.addReg(Regs.getStackPointerRegister());
1095
1096 // Add the partial offset
1097 // We cannot add the actual offset as, at the stack is not finalized
1098 MIB.addImm(SpillGPRs.GPROffset);
1099
1100 // Make sure all call-saved GPRs are included as operands and are
1101 // marked as live on entry.
1102 auto &GRRegClass = SystemZ::GR64BitRegClass;
1103 for (const CalleeSavedInfo &I : CSI) {
1104 Register Reg = I.getReg();
1105 if (GRRegClass.contains(Reg))
1106 addSavedGPR(MBB, MIB, Reg, true);
1107 }
1108 }
1109
1110 // Spill FPRs to the stack in the normal TargetInstrInfo way
1111 for (const CalleeSavedInfo &I : CSI) {
1112 Register Reg = I.getReg();
1113 if (SystemZ::FP64BitRegClass.contains(Reg)) {
1114 MBB.addLiveIn(Reg);
1115 TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(),
1116 &SystemZ::FP64BitRegClass, TRI, Register());
1117 }
1118 if (SystemZ::VR128BitRegClass.contains(Reg)) {
1119 MBB.addLiveIn(Reg);
1120 TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(),
1121 &SystemZ::VR128BitRegClass, TRI, Register());
1122 }
1123 }
1124
1125 return true;
1126 }
1127
restoreCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MutableArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI) const1128 bool SystemZXPLINKFrameLowering::restoreCalleeSavedRegisters(
1129 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1130 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
1131
1132 if (CSI.empty())
1133 return false;
1134
1135 MachineFunction &MF = *MBB.getParent();
1136 SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
1137 const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
1138 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1139 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
1140
1141 DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
1142
1143 // Restore FPRs in the normal TargetInstrInfo way.
1144 for (const CalleeSavedInfo &I : CSI) {
1145 Register Reg = I.getReg();
1146 if (SystemZ::FP64BitRegClass.contains(Reg))
1147 TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(),
1148 &SystemZ::FP64BitRegClass, TRI, Register());
1149 if (SystemZ::VR128BitRegClass.contains(Reg))
1150 TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(),
1151 &SystemZ::VR128BitRegClass, TRI, Register());
1152 }
1153
1154 // Restore call-saved GPRs (but not call-clobbered varargs, which at
1155 // this point might hold return values).
1156 SystemZ::GPRRegs RestoreGPRs = ZFI->getRestoreGPRRegs();
1157 if (RestoreGPRs.LowGPR) {
1158 assert(isInt<20>(Regs.getStackPointerBias() + RestoreGPRs.GPROffset));
1159 if (RestoreGPRs.LowGPR == RestoreGPRs.HighGPR)
1160 // Build an LG/L instruction.
1161 BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LG), RestoreGPRs.LowGPR)
1162 .addReg(Regs.getStackPointerRegister())
1163 .addImm(Regs.getStackPointerBias() + RestoreGPRs.GPROffset)
1164 .addReg(0);
1165 else {
1166 // Build an LMG/LM instruction.
1167 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LMG));
1168
1169 // Add the explicit register operands.
1170 MIB.addReg(RestoreGPRs.LowGPR, RegState::Define);
1171 MIB.addReg(RestoreGPRs.HighGPR, RegState::Define);
1172
1173 // Add the address.
1174 MIB.addReg(Regs.getStackPointerRegister());
1175 MIB.addImm(Regs.getStackPointerBias() + RestoreGPRs.GPROffset);
1176
1177 // Do a second scan adding regs as being defined by instruction
1178 for (const CalleeSavedInfo &I : CSI) {
1179 Register Reg = I.getReg();
1180 if (Reg > RestoreGPRs.LowGPR && Reg < RestoreGPRs.HighGPR)
1181 MIB.addReg(Reg, RegState::ImplicitDefine);
1182 }
1183 }
1184 }
1185
1186 return true;
1187 }
1188
emitPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const1189 void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF,
1190 MachineBasicBlock &MBB) const {
1191 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
1192 const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
1193 SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
1194 MachineBasicBlock::iterator MBBI = MBB.begin();
1195 auto *ZII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
1196 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
1197 MachineFrameInfo &MFFrame = MF.getFrameInfo();
1198 MachineInstr *StoreInstr = nullptr;
1199
1200 determineFrameLayout(MF);
1201
1202 bool HasFP = hasFP(MF);
1203 // Debug location must be unknown since the first debug location is used
1204 // to determine the end of the prologue.
1205 DebugLoc DL;
1206 uint64_t Offset = 0;
1207
1208 const uint64_t StackSize = MFFrame.getStackSize();
1209
1210 if (ZFI->getSpillGPRRegs().LowGPR) {
1211 // Skip over the GPR saves.
1212 if ((MBBI != MBB.end()) && ((MBBI->getOpcode() == SystemZ::STMG))) {
1213 const int Operand = 3;
1214 // Now we can set the offset for the operation, since now the Stack
1215 // has been finalized.
1216 Offset = Regs.getStackPointerBias() + MBBI->getOperand(Operand).getImm();
1217 // Maximum displacement for STMG instruction.
1218 if (isInt<20>(Offset - StackSize))
1219 Offset -= StackSize;
1220 else
1221 StoreInstr = &*MBBI;
1222 MBBI->getOperand(Operand).setImm(Offset);
1223 ++MBBI;
1224 } else
1225 llvm_unreachable("Couldn't skip over GPR saves");
1226 }
1227
1228 if (StackSize) {
1229 MachineBasicBlock::iterator InsertPt = StoreInstr ? StoreInstr : MBBI;
1230 // Allocate StackSize bytes.
1231 int64_t Delta = -int64_t(StackSize);
1232
1233 // In case the STM(G) instruction also stores SP (R4), but the displacement
1234 // is too large, the SP register is manipulated first before storing,
1235 // resulting in the wrong value stored and retrieved later. In this case, we
1236 // need to temporarily save the value of SP, and store it later to memory.
1237 if (StoreInstr && HasFP) {
1238 // Insert LR r0,r4 before STMG instruction.
1239 BuildMI(MBB, InsertPt, DL, ZII->get(SystemZ::LGR))
1240 .addReg(SystemZ::R0D, RegState::Define)
1241 .addReg(SystemZ::R4D);
1242 // Insert ST r0,xxx(,r4) after STMG instruction.
1243 BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG))
1244 .addReg(SystemZ::R0D, RegState::Kill)
1245 .addReg(SystemZ::R4D)
1246 .addImm(Offset)
1247 .addReg(0);
1248 }
1249
1250 emitIncrement(MBB, InsertPt, DL, Regs.getStackPointerRegister(), Delta,
1251 ZII);
1252
1253 // If the requested stack size is larger than the guard page, then we need
1254 // to check if we need to call the stack extender. This requires adding a
1255 // conditional branch, but splitting the prologue block is not possible at
1256 // this point since it would invalidate the SaveBlocks / RestoreBlocks sets
1257 // of PEI in the single block function case. Build a pseudo to be handled
1258 // later by inlineStackProbe().
1259 const uint64_t GuardPageSize = 1024 * 1024;
1260 if (StackSize > GuardPageSize) {
1261 assert(StoreInstr && "Wrong insertion point");
1262 BuildMI(MBB, InsertPt, DL, ZII->get(SystemZ::XPLINK_STACKALLOC));
1263 }
1264 }
1265
1266 if (HasFP) {
1267 // Copy the base of the frame to Frame Pointer Register.
1268 BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR),
1269 Regs.getFramePointerRegister())
1270 .addReg(Regs.getStackPointerRegister());
1271
1272 // Mark the FramePtr as live at the beginning of every block except
1273 // the entry block. (We'll have marked R8 as live on entry when
1274 // saving the GPRs.)
1275 for (MachineBasicBlock &B : llvm::drop_begin(MF))
1276 B.addLiveIn(Regs.getFramePointerRegister());
1277 }
1278 }
1279
emitEpilogue(MachineFunction & MF,MachineBasicBlock & MBB) const1280 void SystemZXPLINKFrameLowering::emitEpilogue(MachineFunction &MF,
1281 MachineBasicBlock &MBB) const {
1282 const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
1283 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1284 SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
1285 MachineFrameInfo &MFFrame = MF.getFrameInfo();
1286 auto *ZII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
1287 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
1288
1289 // Skip the return instruction.
1290 assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks");
1291
1292 uint64_t StackSize = MFFrame.getStackSize();
1293 if (StackSize) {
1294 unsigned SPReg = Regs.getStackPointerRegister();
1295 if (ZFI->getRestoreGPRRegs().LowGPR != SPReg) {
1296 DebugLoc DL = MBBI->getDebugLoc();
1297 emitIncrement(MBB, MBBI, DL, SPReg, StackSize, ZII);
1298 }
1299 }
1300 }
1301
1302 // Emit a compare of the stack pointer against the stack floor, and a call to
1303 // the LE stack extender if needed.
inlineStackProbe(MachineFunction & MF,MachineBasicBlock & PrologMBB) const1304 void SystemZXPLINKFrameLowering::inlineStackProbe(
1305 MachineFunction &MF, MachineBasicBlock &PrologMBB) const {
1306 auto *ZII =
1307 static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
1308
1309 MachineInstr *StackAllocMI = nullptr;
1310 for (MachineInstr &MI : PrologMBB)
1311 if (MI.getOpcode() == SystemZ::XPLINK_STACKALLOC) {
1312 StackAllocMI = &MI;
1313 break;
1314 }
1315 if (StackAllocMI == nullptr)
1316 return;
1317
1318 MachineBasicBlock &MBB = PrologMBB;
1319 const DebugLoc DL = StackAllocMI->getDebugLoc();
1320
1321 // The 2nd half of block MBB after split.
1322 MachineBasicBlock *NextMBB;
1323
1324 // Add new basic block for the call to the stack overflow function.
1325 MachineBasicBlock *StackExtMBB =
1326 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
1327 MF.push_back(StackExtMBB);
1328
1329 // LG r3,72(,r3)
1330 BuildMI(StackExtMBB, DL, ZII->get(SystemZ::LG), SystemZ::R3D)
1331 .addReg(SystemZ::R3D)
1332 .addImm(72)
1333 .addReg(0);
1334 // BASR r3,r3
1335 BuildMI(StackExtMBB, DL, ZII->get(SystemZ::CallBASR_STACKEXT))
1336 .addReg(SystemZ::R3D);
1337
1338 // LLGT r3,1208
1339 BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::LLGT), SystemZ::R3D)
1340 .addReg(0)
1341 .addImm(1208)
1342 .addReg(0);
1343 // CG r4,64(,r3)
1344 BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::CG))
1345 .addReg(SystemZ::R4D)
1346 .addReg(SystemZ::R3D)
1347 .addImm(64)
1348 .addReg(0);
1349 // JLL b'0100',F'37'
1350 BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::BRC))
1351 .addImm(SystemZ::CCMASK_ICMP)
1352 .addImm(SystemZ::CCMASK_CMP_LT)
1353 .addMBB(StackExtMBB);
1354
1355 NextMBB = SystemZ::splitBlockBefore(StackAllocMI, &MBB);
1356 MBB.addSuccessor(NextMBB);
1357 MBB.addSuccessor(StackExtMBB);
1358
1359 // Add jump back from stack extension BB.
1360 BuildMI(StackExtMBB, DL, ZII->get(SystemZ::J)).addMBB(NextMBB);
1361 StackExtMBB->addSuccessor(NextMBB);
1362
1363 StackAllocMI->eraseFromParent();
1364
1365 // Compute the live-in lists for the new blocks.
1366 recomputeLiveIns(*NextMBB);
1367 recomputeLiveIns(*StackExtMBB);
1368 }
1369
hasFP(const MachineFunction & MF) const1370 bool SystemZXPLINKFrameLowering::hasFP(const MachineFunction &MF) const {
1371 return (MF.getFrameInfo().hasVarSizedObjects());
1372 }
1373
processFunctionBeforeFrameFinalized(MachineFunction & MF,RegScavenger * RS) const1374 void SystemZXPLINKFrameLowering::processFunctionBeforeFrameFinalized(
1375 MachineFunction &MF, RegScavenger *RS) const {
1376 MachineFrameInfo &MFFrame = MF.getFrameInfo();
1377 const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
1378 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
1379
1380 // Setup stack frame offset
1381 MFFrame.setOffsetAdjustment(Regs.getStackPointerBias());
1382 }
1383
1384 // Determines the size of the frame, and creates the deferred spill objects.
determineFrameLayout(MachineFunction & MF) const1385 void SystemZXPLINKFrameLowering::determineFrameLayout(
1386 MachineFunction &MF) const {
1387 MachineFrameInfo &MFFrame = MF.getFrameInfo();
1388 const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
1389 auto *Regs =
1390 static_cast<SystemZXPLINK64Registers *>(Subtarget.getSpecialRegisters());
1391
1392 uint64_t StackSize = MFFrame.getStackSize();
1393 if (StackSize == 0)
1394 return;
1395
1396 // Add the size of the register save area and the reserved area to the size.
1397 StackSize += Regs->getCallFrameSize();
1398 MFFrame.setStackSize(StackSize);
1399
1400 // We now know the stack size. Create the fixed spill stack objects for the
1401 // register save area now. This has no impact on the stack frame layout, as
1402 // this is already computed. However, it makes sure that all callee saved
1403 // registers have a valid frame index assigned.
1404 const unsigned RegSize = MF.getDataLayout().getPointerSize();
1405 for (auto &CS : MFFrame.getCalleeSavedInfo()) {
1406 int Offset = RegSpillOffsets[CS.getReg()];
1407 if (Offset >= 0)
1408 CS.setFrameIdx(
1409 MFFrame.CreateFixedSpillStackObject(RegSize, Offset - StackSize));
1410 }
1411 }
1412