1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SIMachineFunctionInfo.h"
10 #include "AMDGPUTargetMachine.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIRegisterInfo.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/CodeGen/LiveIntervals.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/MIRParser/MIParser.h"
22 #include "llvm/IR/CallingConv.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
25 #include <cassert>
26 #include <vector>
27 
28 #define MAX_LANES 64
29 
30 using namespace llvm;
31 
32 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
33   : AMDGPUMachineFunction(MF),
34     PrivateSegmentBuffer(false),
35     DispatchPtr(false),
36     QueuePtr(false),
37     KernargSegmentPtr(false),
38     DispatchID(false),
39     FlatScratchInit(false),
40     WorkGroupIDX(false),
41     WorkGroupIDY(false),
42     WorkGroupIDZ(false),
43     WorkGroupInfo(false),
44     PrivateSegmentWaveByteOffset(false),
45     WorkItemIDX(false),
46     WorkItemIDY(false),
47     WorkItemIDZ(false),
48     ImplicitBufferPtr(false),
49     ImplicitArgPtr(false),
50     GITPtrHigh(0xffffffff),
51     HighBitsOf32BitAddress(0),
52     GDSSize(0) {
53   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
54   const Function &F = MF.getFunction();
55   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
56   WavesPerEU = ST.getWavesPerEU(F);
57 
58   Occupancy = ST.computeOccupancy(F, getLDSSize());
59   CallingConv::ID CC = F.getCallingConv();
60 
61   // FIXME: Should have analysis or something rather than attribute to detect
62   // calls.
63   const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
64 
65   // Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't
66   // have any calls.
67   const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI &&
68                            CC != CallingConv::AMDGPU_Gfx &&
69                            (!isEntryFunction() || HasCalls);
70 
71   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
72     if (!F.arg_empty())
73       KernargSegmentPtr = true;
74     WorkGroupIDX = true;
75     WorkItemIDX = true;
76   } else if (CC == CallingConv::AMDGPU_PS) {
77     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
78   }
79 
80   if (!isEntryFunction()) {
81     if (UseFixedABI)
82       ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
83 
84     // TODO: Pick a high register, and shift down, similar to a kernel.
85     FrameOffsetReg = AMDGPU::SGPR33;
86     StackPtrOffsetReg = AMDGPU::SGPR32;
87 
88     if (!ST.enableFlatScratch()) {
89       // Non-entry functions have no special inputs for now, other registers
90       // required for scratch access.
91       ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
92 
93       ArgInfo.PrivateSegmentBuffer =
94         ArgDescriptor::createRegister(ScratchRSrcReg);
95     }
96 
97     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
98       ImplicitArgPtr = true;
99   } else {
100     if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
101       KernargSegmentPtr = true;
102       MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
103                                  MaxKernArgAlign);
104     }
105   }
106 
107   if (UseFixedABI) {
108     WorkGroupIDX = true;
109     WorkGroupIDY = true;
110     WorkGroupIDZ = true;
111     WorkItemIDX = true;
112     WorkItemIDY = true;
113     WorkItemIDZ = true;
114     ImplicitArgPtr = true;
115   } else {
116     if (F.hasFnAttribute("amdgpu-work-group-id-x"))
117       WorkGroupIDX = true;
118 
119     if (F.hasFnAttribute("amdgpu-work-group-id-y"))
120       WorkGroupIDY = true;
121 
122     if (F.hasFnAttribute("amdgpu-work-group-id-z"))
123       WorkGroupIDZ = true;
124 
125     if (F.hasFnAttribute("amdgpu-work-item-id-x"))
126       WorkItemIDX = true;
127 
128     if (F.hasFnAttribute("amdgpu-work-item-id-y"))
129       WorkItemIDY = true;
130 
131     if (F.hasFnAttribute("amdgpu-work-item-id-z"))
132       WorkItemIDZ = true;
133   }
134 
135   bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
136   if (isEntryFunction()) {
137     // X, XY, and XYZ are the only supported combinations, so make sure Y is
138     // enabled if Z is.
139     if (WorkItemIDZ)
140       WorkItemIDY = true;
141 
142     if (!ST.flatScratchIsArchitected()) {
143       PrivateSegmentWaveByteOffset = true;
144 
145       // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
146       if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
147           (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
148         ArgInfo.PrivateSegmentWaveByteOffset =
149             ArgDescriptor::createRegister(AMDGPU::SGPR5);
150     }
151   }
152 
153   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
154   if (isAmdHsaOrMesa) {
155     if (!ST.enableFlatScratch())
156       PrivateSegmentBuffer = true;
157 
158     if (UseFixedABI) {
159       DispatchPtr = true;
160       QueuePtr = true;
161 
162       // FIXME: We don't need this?
163       DispatchID = true;
164     } else {
165       if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
166         DispatchPtr = true;
167 
168       if (F.hasFnAttribute("amdgpu-queue-ptr"))
169         QueuePtr = true;
170 
171       if (F.hasFnAttribute("amdgpu-dispatch-id"))
172         DispatchID = true;
173     }
174   } else if (ST.isMesaGfxShader(F)) {
175     ImplicitBufferPtr = true;
176   }
177 
178   if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
179     KernargSegmentPtr = true;
180 
181   // TODO: This could be refined a lot. The attribute is a poor way of
182   // detecting calls or stack objects that may require it before argument
183   // lowering.
184   if (ST.hasFlatAddressSpace() && isEntryFunction() &&
185       (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
186       (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
187       !ST.flatScratchIsArchitected()) {
188     FlatScratchInit = true;
189   }
190 
191   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
192   StringRef S = A.getValueAsString();
193   if (!S.empty())
194     S.consumeInteger(0, GITPtrHigh);
195 
196   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
197   S = A.getValueAsString();
198   if (!S.empty())
199     S.consumeInteger(0, HighBitsOf32BitAddress);
200 
201   S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
202   if (!S.empty())
203     S.consumeInteger(0, GDSSize);
204 }
205 
206 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
207   limitOccupancy(getMaxWavesPerEU());
208   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
209   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
210                  MF.getFunction()));
211 }
212 
213 Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
214   const SIRegisterInfo &TRI) {
215   ArgInfo.PrivateSegmentBuffer =
216     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
217     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
218   NumUserSGPRs += 4;
219   return ArgInfo.PrivateSegmentBuffer.getRegister();
220 }
221 
222 Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
223   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
224     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
225   NumUserSGPRs += 2;
226   return ArgInfo.DispatchPtr.getRegister();
227 }
228 
229 Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
230   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
231     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
232   NumUserSGPRs += 2;
233   return ArgInfo.QueuePtr.getRegister();
234 }
235 
236 Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
237   ArgInfo.KernargSegmentPtr
238     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
239     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
240   NumUserSGPRs += 2;
241   return ArgInfo.KernargSegmentPtr.getRegister();
242 }
243 
244 Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
245   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
246     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
247   NumUserSGPRs += 2;
248   return ArgInfo.DispatchID.getRegister();
249 }
250 
251 Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
252   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
253     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
254   NumUserSGPRs += 2;
255   return ArgInfo.FlatScratchInit.getRegister();
256 }
257 
258 Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
259   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
260     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
261   NumUserSGPRs += 2;
262   return ArgInfo.ImplicitBufferPtr.getRegister();
263 }
264 
265 bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
266                                              MCPhysReg Reg) {
267   for (unsigned I = 0; CSRegs[I]; ++I) {
268     if (CSRegs[I] == Reg)
269       return true;
270   }
271 
272   return false;
273 }
274 
275 /// \p returns true if \p NumLanes slots are available in VGPRs already used for
276 /// SGPR spilling.
277 //
278 // FIXME: This only works after processFunctionBeforeFrameFinalized
279 bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
280                                                       unsigned NumNeed) const {
281   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
282   unsigned WaveSize = ST.getWavefrontSize();
283   return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
284 }
285 
286 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
287 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
288                                                     int FI) {
289   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
290 
291   // This has already been allocated.
292   if (!SpillLanes.empty())
293     return true;
294 
295   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
296   const SIRegisterInfo *TRI = ST.getRegisterInfo();
297   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
298   MachineRegisterInfo &MRI = MF.getRegInfo();
299   unsigned WaveSize = ST.getWavefrontSize();
300   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
301 
302   unsigned Size = FrameInfo.getObjectSize(FI);
303   unsigned NumLanes = Size / 4;
304 
305   if (NumLanes > WaveSize)
306     return false;
307 
308   assert(Size >= 4 && "invalid sgpr spill size");
309   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
310 
311   // Make sure to handle the case where a wide SGPR spill may span between two
312   // VGPRs.
313   for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
314     Register LaneVGPR;
315     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
316 
317     // Reserve a VGPR (when NumVGPRSpillLanes = 0, WaveSize, 2*WaveSize, ..) and
318     // when one of the two conditions is true:
319     // 1. One reserved VGPR being tracked by VGPRReservedForSGPRSpill is not yet
320     // reserved.
321     // 2. All spill lanes of reserved VGPR(s) are full and another spill lane is
322     // required.
323     if (FuncInfo->VGPRReservedForSGPRSpill && NumVGPRSpillLanes < WaveSize) {
324       assert(FuncInfo->VGPRReservedForSGPRSpill == SpillVGPRs.back().VGPR);
325       LaneVGPR = FuncInfo->VGPRReservedForSGPRSpill;
326     } else if (VGPRIndex == 0) {
327       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
328       if (LaneVGPR == AMDGPU::NoRegister) {
329         // We have no VGPRs left for spilling SGPRs. Reset because we will not
330         // partially spill the SGPR to VGPRs.
331         SGPRToVGPRSpills.erase(FI);
332         NumVGPRSpillLanes -= I;
333 
334 #if 0
335         DiagnosticInfoResourceLimit DiagOutOfRegs(MF.getFunction(),
336                                                   "VGPRs for SGPR spilling",
337                                                   0, DS_Error);
338         MF.getFunction().getContext().diagnose(DiagOutOfRegs);
339 #endif
340         return false;
341       }
342 
343       Optional<int> SpillFI;
344       // We need to preserve inactive lanes, so always save, even caller-save
345       // registers.
346       if (!isEntryFunction()) {
347         SpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
348       }
349 
350       SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI));
351 
352       // Add this register as live-in to all blocks to avoid machine verifer
353       // complaining about use of an undefined physical register.
354       for (MachineBasicBlock &BB : MF)
355         BB.addLiveIn(LaneVGPR);
356     } else {
357       LaneVGPR = SpillVGPRs.back().VGPR;
358     }
359 
360     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
361   }
362 
363   return true;
364 }
365 
366 /// Reserve a VGPR for spilling of SGPRs
367 bool SIMachineFunctionInfo::reserveVGPRforSGPRSpills(MachineFunction &MF) {
368   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
369   const SIRegisterInfo *TRI = ST.getRegisterInfo();
370   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
371 
372   Register LaneVGPR = TRI->findUnusedRegister(
373       MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF, true);
374   if (LaneVGPR == Register())
375     return false;
376   SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, None));
377   FuncInfo->VGPRReservedForSGPRSpill = LaneVGPR;
378   return true;
379 }
380 
381 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
382 /// Either AGPR is spilled to VGPR to vice versa.
383 /// Returns true if a \p FI can be eliminated completely.
384 bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
385                                                     int FI,
386                                                     bool isAGPRtoVGPR) {
387   MachineRegisterInfo &MRI = MF.getRegInfo();
388   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
389   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
390 
391   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
392 
393   auto &Spill = VGPRToAGPRSpills[FI];
394 
395   // This has already been allocated.
396   if (!Spill.Lanes.empty())
397     return Spill.FullyAllocated;
398 
399   unsigned Size = FrameInfo.getObjectSize(FI);
400   unsigned NumLanes = Size / 4;
401   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
402 
403   const TargetRegisterClass &RC =
404       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
405   auto Regs = RC.getRegisters();
406 
407   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
408   const SIRegisterInfo *TRI = ST.getRegisterInfo();
409   Spill.FullyAllocated = true;
410 
411   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
412   // once.
413   BitVector OtherUsedRegs;
414   OtherUsedRegs.resize(TRI->getNumRegs());
415 
416   const uint32_t *CSRMask =
417       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
418   if (CSRMask)
419     OtherUsedRegs.setBitsInMask(CSRMask);
420 
421   // TODO: Should include register tuples, but doesn't matter with current
422   // usage.
423   for (MCPhysReg Reg : SpillAGPR)
424     OtherUsedRegs.set(Reg);
425   for (MCPhysReg Reg : SpillVGPR)
426     OtherUsedRegs.set(Reg);
427 
428   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
429   for (unsigned I = 0; I < NumLanes; ++I) {
430     NextSpillReg = std::find_if(
431         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
432           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
433                  !OtherUsedRegs[Reg];
434         });
435 
436     if (NextSpillReg == Regs.end()) { // Registers exhausted
437       Spill.FullyAllocated = false;
438       break;
439     }
440 
441     OtherUsedRegs.set(*NextSpillReg);
442     SpillRegs.push_back(*NextSpillReg);
443     Spill.Lanes[I] = *NextSpillReg++;
444   }
445 
446   return Spill.FullyAllocated;
447 }
448 
449 void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
450   // The FP & BP spills haven't been inserted yet, so keep them around.
451   for (auto &R : SGPRToVGPRSpills) {
452     if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex)
453       MFI.RemoveStackObject(R.first);
454   }
455 
456   // All other SPGRs must be allocated on the default stack, so reset the stack
457   // ID.
458   for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
459        ++i)
460     if (i != FramePointerSaveIndex && i != BasePointerSaveIndex)
461       MFI.setStackID(i, TargetStackID::Default);
462 
463   for (auto &R : VGPRToAGPRSpills) {
464     if (R.second.FullyAllocated)
465       MFI.RemoveStackObject(R.first);
466   }
467 }
468 
469 int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
470                                          const SIRegisterInfo &TRI) {
471   if (ScavengeFI)
472     return *ScavengeFI;
473   if (isEntryFunction()) {
474     ScavengeFI = MFI.CreateFixedObject(
475         TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
476   } else {
477     ScavengeFI = MFI.CreateStackObject(
478         TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
479         TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
480   }
481   return *ScavengeFI;
482 }
483 
484 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
485   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
486   return AMDGPU::SGPR0 + NumUserSGPRs;
487 }
488 
489 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
490   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
491 }
492 
493 Register
494 SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
495   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
496   if (!ST.isAmdPalOS())
497     return Register();
498   Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
499   if (ST.hasMergedShaders()) {
500     switch (MF.getFunction().getCallingConv()) {
501     case CallingConv::AMDGPU_HS:
502     case CallingConv::AMDGPU_GS:
503       // Low GIT address is passed in s8 rather than s0 for an LS+HS or
504       // ES+GS merged shader on gfx9+.
505       GitPtrLo = AMDGPU::SGPR8;
506       return GitPtrLo;
507     default:
508       return GitPtrLo;
509     }
510   }
511   return GitPtrLo;
512 }
513 
514 static yaml::StringValue regToString(Register Reg,
515                                      const TargetRegisterInfo &TRI) {
516   yaml::StringValue Dest;
517   {
518     raw_string_ostream OS(Dest.Value);
519     OS << printReg(Reg, &TRI);
520   }
521   return Dest;
522 }
523 
524 static Optional<yaml::SIArgumentInfo>
525 convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
526                     const TargetRegisterInfo &TRI) {
527   yaml::SIArgumentInfo AI;
528 
529   auto convertArg = [&](Optional<yaml::SIArgument> &A,
530                         const ArgDescriptor &Arg) {
531     if (!Arg)
532       return false;
533 
534     // Create a register or stack argument.
535     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
536     if (Arg.isRegister()) {
537       raw_string_ostream OS(SA.RegisterName.Value);
538       OS << printReg(Arg.getRegister(), &TRI);
539     } else
540       SA.StackOffset = Arg.getStackOffset();
541     // Check and update the optional mask.
542     if (Arg.isMasked())
543       SA.Mask = Arg.getMask();
544 
545     A = SA;
546     return true;
547   };
548 
549   bool Any = false;
550   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
551   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
552   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
553   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
554   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
555   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
556   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
557   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
558   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
559   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
560   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
561   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
562                     ArgInfo.PrivateSegmentWaveByteOffset);
563   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
564   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
565   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
566   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
567   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
568 
569   if (Any)
570     return AI;
571 
572   return None;
573 }
574 
575 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
576     const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,
577     const llvm::MachineFunction &MF)
578     : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
579       MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
580       DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
581       NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
582       MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
583       HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
584       HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
585       HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
586       Occupancy(MFI.getOccupancy()),
587       ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
588       FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
589       StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
590       ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
591   auto SFI = MFI.getOptionalScavengeFI();
592   if (SFI)
593     ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
594 }
595 
596 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
597   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
598 }
599 
600 bool SIMachineFunctionInfo::initializeBaseYamlFields(
601     const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
602     PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
603   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
604   MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
605   LDSSize = YamlMFI.LDSSize;
606   DynLDSAlign = YamlMFI.DynLDSAlign;
607   HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
608   Occupancy = YamlMFI.Occupancy;
609   IsEntryFunction = YamlMFI.IsEntryFunction;
610   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
611   MemoryBound = YamlMFI.MemoryBound;
612   WaveLimiter = YamlMFI.WaveLimiter;
613   HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
614   HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
615 
616   if (YamlMFI.ScavengeFI) {
617     auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
618     if (!FIOrErr) {
619       // Create a diagnostic for a the frame index.
620       const MemoryBuffer &Buffer =
621           *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
622 
623       Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
624                            SourceMgr::DK_Error, toString(FIOrErr.takeError()),
625                            "", None, None);
626       SourceRange = YamlMFI.ScavengeFI->SourceRange;
627       return true;
628     }
629     ScavengeFI = *FIOrErr;
630   } else {
631     ScavengeFI = None;
632   }
633   return false;
634 }
635 
636 // Remove VGPR which was reserved for SGPR spills if there are no spilled SGPRs
637 bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR,
638                                                    MachineFunction &MF) {
639   for (auto *i = SpillVGPRs.begin(); i < SpillVGPRs.end(); i++) {
640     if (i->VGPR == ReservedVGPR) {
641       SpillVGPRs.erase(i);
642 
643       for (MachineBasicBlock &MBB : MF) {
644         MBB.removeLiveIn(ReservedVGPR);
645         MBB.sortUniqueLiveIns();
646       }
647       this->VGPRReservedForSGPRSpill = AMDGPU::NoRegister;
648       return true;
649     }
650   }
651   return false;
652 }
653