1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SIMachineFunctionInfo.h"
10 #include "AMDGPUArgumentUsageInfo.h"
11 #include "AMDGPUTargetMachine.h"
12 #include "AMDGPUSubtarget.h"
13 #include "SIRegisterInfo.h"
14 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
15 #include "Utils/AMDGPUBaseInfo.h"
16 #include "llvm/ADT/Optional.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/IR/CallingConv.h"
22 #include "llvm/IR/Function.h"
23 #include <cassert>
24 #include <vector>
25 
26 #define MAX_LANES 64
27 
28 using namespace llvm;
29 
SIMachineFunctionInfo(const MachineFunction & MF)30 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
31   : AMDGPUMachineFunction(MF),
32     PrivateSegmentBuffer(false),
33     DispatchPtr(false),
34     QueuePtr(false),
35     KernargSegmentPtr(false),
36     DispatchID(false),
37     FlatScratchInit(false),
38     WorkGroupIDX(false),
39     WorkGroupIDY(false),
40     WorkGroupIDZ(false),
41     WorkGroupInfo(false),
42     PrivateSegmentWaveByteOffset(false),
43     WorkItemIDX(false),
44     WorkItemIDY(false),
45     WorkItemIDZ(false),
46     ImplicitBufferPtr(false),
47     ImplicitArgPtr(false),
48     GITPtrHigh(0xffffffff),
49     HighBitsOf32BitAddress(0),
50     GDSSize(0) {
51   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
52   const Function &F = MF.getFunction();
53   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
54   WavesPerEU = ST.getWavesPerEU(F);
55 
56   Occupancy = ST.computeOccupancy(F, getLDSSize());
57   CallingConv::ID CC = F.getCallingConv();
58 
59   // FIXME: Should have analysis or something rather than attribute to detect
60   // calls.
61   const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
62 
63   // Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't
64   // have any calls.
65   const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI &&
66                            (!isEntryFunction() || HasCalls);
67 
68   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
69     if (!F.arg_empty())
70       KernargSegmentPtr = true;
71     WorkGroupIDX = true;
72     WorkItemIDX = true;
73   } else if (CC == CallingConv::AMDGPU_PS) {
74     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
75   }
76 
77   if (!isEntryFunction()) {
78     // Non-entry functions have no special inputs for now, other registers
79     // required for scratch access.
80     ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
81 
82     // TODO: Pick a high register, and shift down, similar to a kernel.
83     FrameOffsetReg = AMDGPU::SGPR33;
84     StackPtrOffsetReg = AMDGPU::SGPR32;
85 
86     ArgInfo.PrivateSegmentBuffer =
87       ArgDescriptor::createRegister(ScratchRSrcReg);
88 
89     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
90       ImplicitArgPtr = true;
91   } else {
92     if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
93       KernargSegmentPtr = true;
94       MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
95                                  MaxKernArgAlign);
96     }
97   }
98 
99   if (UseFixedABI) {
100     WorkGroupIDX = true;
101     WorkGroupIDY = true;
102     WorkGroupIDZ = true;
103     WorkItemIDX = true;
104     WorkItemIDY = true;
105     WorkItemIDZ = true;
106     ImplicitArgPtr = true;
107   } else {
108     if (F.hasFnAttribute("amdgpu-work-group-id-x"))
109       WorkGroupIDX = true;
110 
111     if (F.hasFnAttribute("amdgpu-work-group-id-y"))
112       WorkGroupIDY = true;
113 
114     if (F.hasFnAttribute("amdgpu-work-group-id-z"))
115       WorkGroupIDZ = true;
116 
117     if (F.hasFnAttribute("amdgpu-work-item-id-x"))
118       WorkItemIDX = true;
119 
120     if (F.hasFnAttribute("amdgpu-work-item-id-y"))
121       WorkItemIDY = true;
122 
123     if (F.hasFnAttribute("amdgpu-work-item-id-z"))
124       WorkItemIDZ = true;
125   }
126 
127   bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
128   if (isEntryFunction()) {
129     // X, XY, and XYZ are the only supported combinations, so make sure Y is
130     // enabled if Z is.
131     if (WorkItemIDZ)
132       WorkItemIDY = true;
133 
134     PrivateSegmentWaveByteOffset = true;
135 
136     // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
137     if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
138         (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
139       ArgInfo.PrivateSegmentWaveByteOffset =
140           ArgDescriptor::createRegister(AMDGPU::SGPR5);
141   }
142 
143   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
144   if (isAmdHsaOrMesa) {
145     PrivateSegmentBuffer = true;
146 
147     if (UseFixedABI) {
148       DispatchPtr = true;
149       QueuePtr = true;
150 
151       // FIXME: We don't need this?
152       DispatchID = true;
153     } else {
154       if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
155         DispatchPtr = true;
156 
157       if (F.hasFnAttribute("amdgpu-queue-ptr"))
158         QueuePtr = true;
159 
160       if (F.hasFnAttribute("amdgpu-dispatch-id"))
161         DispatchID = true;
162     }
163   } else if (ST.isMesaGfxShader(F)) {
164     ImplicitBufferPtr = true;
165   }
166 
167   if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
168     KernargSegmentPtr = true;
169 
170   if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
171     // TODO: This could be refined a lot. The attribute is a poor way of
172     // detecting calls or stack objects that may require it before argument
173     // lowering.
174     if (HasCalls || HasStackObjects)
175       FlatScratchInit = true;
176   }
177 
178   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
179   StringRef S = A.getValueAsString();
180   if (!S.empty())
181     S.consumeInteger(0, GITPtrHigh);
182 
183   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
184   S = A.getValueAsString();
185   if (!S.empty())
186     S.consumeInteger(0, HighBitsOf32BitAddress);
187 
188   S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
189   if (!S.empty())
190     S.consumeInteger(0, GDSSize);
191 }
192 
limitOccupancy(const MachineFunction & MF)193 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
194   limitOccupancy(getMaxWavesPerEU());
195   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
196   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
197                  MF.getFunction()));
198 }
199 
addPrivateSegmentBuffer(const SIRegisterInfo & TRI)200 Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
201   const SIRegisterInfo &TRI) {
202   ArgInfo.PrivateSegmentBuffer =
203     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
204     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
205   NumUserSGPRs += 4;
206   return ArgInfo.PrivateSegmentBuffer.getRegister();
207 }
208 
addDispatchPtr(const SIRegisterInfo & TRI)209 Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
210   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
211     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
212   NumUserSGPRs += 2;
213   return ArgInfo.DispatchPtr.getRegister();
214 }
215 
addQueuePtr(const SIRegisterInfo & TRI)216 Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
217   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
218     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
219   NumUserSGPRs += 2;
220   return ArgInfo.QueuePtr.getRegister();
221 }
222 
addKernargSegmentPtr(const SIRegisterInfo & TRI)223 Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
224   ArgInfo.KernargSegmentPtr
225     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
226     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
227   NumUserSGPRs += 2;
228   return ArgInfo.KernargSegmentPtr.getRegister();
229 }
230 
addDispatchID(const SIRegisterInfo & TRI)231 Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
232   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
233     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
234   NumUserSGPRs += 2;
235   return ArgInfo.DispatchID.getRegister();
236 }
237 
addFlatScratchInit(const SIRegisterInfo & TRI)238 Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
239   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
240     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
241   NumUserSGPRs += 2;
242   return ArgInfo.FlatScratchInit.getRegister();
243 }
244 
addImplicitBufferPtr(const SIRegisterInfo & TRI)245 Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
246   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
247     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
248   NumUserSGPRs += 2;
249   return ArgInfo.ImplicitBufferPtr.getRegister();
250 }
251 
isCalleeSavedReg(const MCPhysReg * CSRegs,MCPhysReg Reg)252 bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
253                                              MCPhysReg Reg) {
254   for (unsigned I = 0; CSRegs[I]; ++I) {
255     if (CSRegs[I] == Reg)
256       return true;
257   }
258 
259   return false;
260 }
261 
262 /// \p returns true if \p NumLanes slots are available in VGPRs already used for
263 /// SGPR spilling.
264 //
265 // FIXME: This only works after processFunctionBeforeFrameFinalized
haveFreeLanesForSGPRSpill(const MachineFunction & MF,unsigned NumNeed) const266 bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
267                                                       unsigned NumNeed) const {
268   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
269   unsigned WaveSize = ST.getWavefrontSize();
270   return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
271 }
272 
273 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
allocateSGPRSpillToVGPR(MachineFunction & MF,int FI)274 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
275                                                     int FI) {
276   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
277 
278   // This has already been allocated.
279   if (!SpillLanes.empty())
280     return true;
281 
282   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
283   const SIRegisterInfo *TRI = ST.getRegisterInfo();
284   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
285   MachineRegisterInfo &MRI = MF.getRegInfo();
286   unsigned WaveSize = ST.getWavefrontSize();
287   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
288 
289   unsigned Size = FrameInfo.getObjectSize(FI);
290   unsigned NumLanes = Size / 4;
291 
292   if (NumLanes > WaveSize)
293     return false;
294 
295   assert(Size >= 4 && "invalid sgpr spill size");
296   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
297 
298   const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
299 
300   // Make sure to handle the case where a wide SGPR spill may span between two
301   // VGPRs.
302   for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
303     Register LaneVGPR;
304     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
305 
306     // Reserve a VGPR (when NumVGPRSpillLanes = 0, WaveSize, 2*WaveSize, ..) and
307     // when one of the two conditions is true:
308     // 1. One reserved VGPR being tracked by VGPRReservedForSGPRSpill is not yet
309     // reserved.
310     // 2. All spill lanes of reserved VGPR(s) are full and another spill lane is
311     // required.
312     if (FuncInfo->VGPRReservedForSGPRSpill && NumVGPRSpillLanes < WaveSize) {
313       assert(FuncInfo->VGPRReservedForSGPRSpill == SpillVGPRs.back().VGPR);
314       LaneVGPR = FuncInfo->VGPRReservedForSGPRSpill;
315     } else if (VGPRIndex == 0) {
316       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
317       if (LaneVGPR == AMDGPU::NoRegister) {
318         // We have no VGPRs left for spilling SGPRs. Reset because we will not
319         // partially spill the SGPR to VGPRs.
320         SGPRToVGPRSpills.erase(FI);
321         NumVGPRSpillLanes -= I;
322         return false;
323       }
324 
325       Optional<int> CSRSpillFI;
326       if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
327           isCalleeSavedReg(CSRegs, LaneVGPR)) {
328         CSRSpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
329       }
330 
331       SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
332 
333       // Add this register as live-in to all blocks to avoid machine verifer
334       // complaining about use of an undefined physical register.
335       for (MachineBasicBlock &BB : MF)
336         BB.addLiveIn(LaneVGPR);
337     } else {
338       LaneVGPR = SpillVGPRs.back().VGPR;
339     }
340 
341     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
342   }
343 
344   return true;
345 }
346 
347 /// Reserve a VGPR for spilling of SGPRs
reserveVGPRforSGPRSpills(MachineFunction & MF)348 bool SIMachineFunctionInfo::reserveVGPRforSGPRSpills(MachineFunction &MF) {
349   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
350   const SIRegisterInfo *TRI = ST.getRegisterInfo();
351   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
352 
353   Register LaneVGPR = TRI->findUnusedRegister(
354       MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF, true);
355   SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, None));
356   FuncInfo->VGPRReservedForSGPRSpill = LaneVGPR;
357   return true;
358 }
359 
360 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
361 /// Either AGPR is spilled to VGPR to vice versa.
362 /// Returns true if a \p FI can be eliminated completely.
allocateVGPRSpillToAGPR(MachineFunction & MF,int FI,bool isAGPRtoVGPR)363 bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
364                                                     int FI,
365                                                     bool isAGPRtoVGPR) {
366   MachineRegisterInfo &MRI = MF.getRegInfo();
367   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
368   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
369 
370   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
371 
372   auto &Spill = VGPRToAGPRSpills[FI];
373 
374   // This has already been allocated.
375   if (!Spill.Lanes.empty())
376     return Spill.FullyAllocated;
377 
378   unsigned Size = FrameInfo.getObjectSize(FI);
379   unsigned NumLanes = Size / 4;
380   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
381 
382   const TargetRegisterClass &RC =
383       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
384   auto Regs = RC.getRegisters();
385 
386   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
387   const SIRegisterInfo *TRI = ST.getRegisterInfo();
388   Spill.FullyAllocated = true;
389 
390   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
391   // once.
392   BitVector OtherUsedRegs;
393   OtherUsedRegs.resize(TRI->getNumRegs());
394 
395   const uint32_t *CSRMask =
396       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
397   if (CSRMask)
398     OtherUsedRegs.setBitsInMask(CSRMask);
399 
400   // TODO: Should include register tuples, but doesn't matter with current
401   // usage.
402   for (MCPhysReg Reg : SpillAGPR)
403     OtherUsedRegs.set(Reg);
404   for (MCPhysReg Reg : SpillVGPR)
405     OtherUsedRegs.set(Reg);
406 
407   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
408   for (unsigned I = 0; I < NumLanes; ++I) {
409     NextSpillReg = std::find_if(
410         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
411           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
412                  !OtherUsedRegs[Reg];
413         });
414 
415     if (NextSpillReg == Regs.end()) { // Registers exhausted
416       Spill.FullyAllocated = false;
417       break;
418     }
419 
420     OtherUsedRegs.set(*NextSpillReg);
421     SpillRegs.push_back(*NextSpillReg);
422     Spill.Lanes[I] = *NextSpillReg++;
423   }
424 
425   return Spill.FullyAllocated;
426 }
427 
removeDeadFrameIndices(MachineFrameInfo & MFI)428 void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
429   // The FP & BP spills haven't been inserted yet, so keep them around.
430   for (auto &R : SGPRToVGPRSpills) {
431     if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex)
432       MFI.RemoveStackObject(R.first);
433   }
434 
435   // All other SPGRs must be allocated on the default stack, so reset the stack
436   // ID.
437   for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
438        ++i)
439     if (i != FramePointerSaveIndex && i != BasePointerSaveIndex)
440       MFI.setStackID(i, TargetStackID::Default);
441 
442   for (auto &R : VGPRToAGPRSpills) {
443     if (R.second.FullyAllocated)
444       MFI.RemoveStackObject(R.first);
445   }
446 }
447 
getNextUserSGPR() const448 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
449   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
450   return AMDGPU::SGPR0 + NumUserSGPRs;
451 }
452 
getNextSystemSGPR() const453 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
454   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
455 }
456 
457 Register
getGITPtrLoReg(const MachineFunction & MF) const458 SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
459   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
460   if (!ST.isAmdPalOS())
461     return Register();
462   Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
463   if (ST.hasMergedShaders()) {
464     switch (MF.getFunction().getCallingConv()) {
465     case CallingConv::AMDGPU_HS:
466     case CallingConv::AMDGPU_GS:
467       // Low GIT address is passed in s8 rather than s0 for an LS+HS or
468       // ES+GS merged shader on gfx9+.
469       GitPtrLo = AMDGPU::SGPR8;
470       return GitPtrLo;
471     default:
472       return GitPtrLo;
473     }
474   }
475   return GitPtrLo;
476 }
477 
regToString(Register Reg,const TargetRegisterInfo & TRI)478 static yaml::StringValue regToString(Register Reg,
479                                      const TargetRegisterInfo &TRI) {
480   yaml::StringValue Dest;
481   {
482     raw_string_ostream OS(Dest.Value);
483     OS << printReg(Reg, &TRI);
484   }
485   return Dest;
486 }
487 
488 static Optional<yaml::SIArgumentInfo>
convertArgumentInfo(const AMDGPUFunctionArgInfo & ArgInfo,const TargetRegisterInfo & TRI)489 convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
490                     const TargetRegisterInfo &TRI) {
491   yaml::SIArgumentInfo AI;
492 
493   auto convertArg = [&](Optional<yaml::SIArgument> &A,
494                         const ArgDescriptor &Arg) {
495     if (!Arg)
496       return false;
497 
498     // Create a register or stack argument.
499     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
500     if (Arg.isRegister()) {
501       raw_string_ostream OS(SA.RegisterName.Value);
502       OS << printReg(Arg.getRegister(), &TRI);
503     } else
504       SA.StackOffset = Arg.getStackOffset();
505     // Check and update the optional mask.
506     if (Arg.isMasked())
507       SA.Mask = Arg.getMask();
508 
509     A = SA;
510     return true;
511   };
512 
513   bool Any = false;
514   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
515   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
516   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
517   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
518   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
519   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
520   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
521   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
522   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
523   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
524   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
525   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
526                     ArgInfo.PrivateSegmentWaveByteOffset);
527   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
528   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
529   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
530   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
531   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
532 
533   if (Any)
534     return AI;
535 
536   return None;
537 }
538 
SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo & MFI,const TargetRegisterInfo & TRI)539 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
540   const llvm::SIMachineFunctionInfo& MFI,
541   const TargetRegisterInfo &TRI)
542   : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
543     MaxKernArgAlign(MFI.getMaxKernArgAlign()),
544     LDSSize(MFI.getLDSSize()),
545     IsEntryFunction(MFI.isEntryFunction()),
546     NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
547     MemoryBound(MFI.isMemoryBound()),
548     WaveLimiter(MFI.needsWaveLimiter()),
549     HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
550     ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
551     FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
552     StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
553     ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)),
554     Mode(MFI.getMode()) {}
555 
mappingImpl(yaml::IO & YamlIO)556 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
557   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
558 }
559 
initializeBaseYamlFields(const yaml::SIMachineFunctionInfo & YamlMFI)560 bool SIMachineFunctionInfo::initializeBaseYamlFields(
561   const yaml::SIMachineFunctionInfo &YamlMFI) {
562   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
563   MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
564   LDSSize = YamlMFI.LDSSize;
565   HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
566   IsEntryFunction = YamlMFI.IsEntryFunction;
567   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
568   MemoryBound = YamlMFI.MemoryBound;
569   WaveLimiter = YamlMFI.WaveLimiter;
570   return false;
571 }
572 
573 // Remove VGPR which was reserved for SGPR spills if there are no spilled SGPRs
removeVGPRForSGPRSpill(Register ReservedVGPR,MachineFunction & MF)574 bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR,
575                                                    MachineFunction &MF) {
576   for (auto *i = SpillVGPRs.begin(); i < SpillVGPRs.end(); i++) {
577     if (i->VGPR == ReservedVGPR) {
578       SpillVGPRs.erase(i);
579 
580       for (MachineBasicBlock &MBB : MF) {
581         MBB.removeLiveIn(ReservedVGPR);
582         MBB.sortUniqueLiveIns();
583       }
584       this->VGPRReservedForSGPRSpill = AMDGPU::NoRegister;
585       return true;
586     }
587   }
588   return false;
589 }
590