1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "SIMachineFunctionInfo.h"
10 #include "AMDGPUTargetMachine.h"
11
12 #define MAX_LANES 64
13
14 using namespace llvm;
15
SIMachineFunctionInfo(const MachineFunction & MF)16 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
17 : AMDGPUMachineFunction(MF),
18 PrivateSegmentBuffer(false),
19 DispatchPtr(false),
20 QueuePtr(false),
21 KernargSegmentPtr(false),
22 DispatchID(false),
23 FlatScratchInit(false),
24 WorkGroupIDX(false),
25 WorkGroupIDY(false),
26 WorkGroupIDZ(false),
27 WorkGroupInfo(false),
28 PrivateSegmentWaveByteOffset(false),
29 WorkItemIDX(false),
30 WorkItemIDY(false),
31 WorkItemIDZ(false),
32 ImplicitBufferPtr(false),
33 ImplicitArgPtr(false),
34 GITPtrHigh(0xffffffff),
35 HighBitsOf32BitAddress(0),
36 GDSSize(0) {
37 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
38 const Function &F = MF.getFunction();
39 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
40 WavesPerEU = ST.getWavesPerEU(F);
41
42 Occupancy = ST.computeOccupancy(F, getLDSSize());
43 CallingConv::ID CC = F.getCallingConv();
44
45 // FIXME: Should have analysis or something rather than attribute to detect
46 // calls.
47 const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
48
49 // Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't
50 // have any calls.
51 const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI &&
52 (!isEntryFunction() || HasCalls);
53
54 if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
55 if (!F.arg_empty())
56 KernargSegmentPtr = true;
57 WorkGroupIDX = true;
58 WorkItemIDX = true;
59 } else if (CC == CallingConv::AMDGPU_PS) {
60 PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
61 }
62
63 if (!isEntryFunction()) {
64 // TODO: Pick a high register, and shift down, similar to a kernel.
65 FrameOffsetReg = AMDGPU::SGPR33;
66 StackPtrOffsetReg = AMDGPU::SGPR32;
67
68 if (!ST.enableFlatScratch()) {
69 // Non-entry functions have no special inputs for now, other registers
70 // required for scratch access.
71 ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
72
73 ArgInfo.PrivateSegmentBuffer =
74 ArgDescriptor::createRegister(ScratchRSrcReg);
75 }
76
77 if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
78 ImplicitArgPtr = true;
79 } else {
80 if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
81 KernargSegmentPtr = true;
82 MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
83 MaxKernArgAlign);
84 }
85 }
86
87 if (UseFixedABI) {
88 WorkGroupIDX = true;
89 WorkGroupIDY = true;
90 WorkGroupIDZ = true;
91 WorkItemIDX = true;
92 WorkItemIDY = true;
93 WorkItemIDZ = true;
94 ImplicitArgPtr = true;
95 } else {
96 if (F.hasFnAttribute("amdgpu-work-group-id-x"))
97 WorkGroupIDX = true;
98
99 if (F.hasFnAttribute("amdgpu-work-group-id-y"))
100 WorkGroupIDY = true;
101
102 if (F.hasFnAttribute("amdgpu-work-group-id-z"))
103 WorkGroupIDZ = true;
104
105 if (F.hasFnAttribute("amdgpu-work-item-id-x"))
106 WorkItemIDX = true;
107
108 if (F.hasFnAttribute("amdgpu-work-item-id-y"))
109 WorkItemIDY = true;
110
111 if (F.hasFnAttribute("amdgpu-work-item-id-z"))
112 WorkItemIDZ = true;
113 }
114
115 bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
116 if (isEntryFunction()) {
117 // X, XY, and XYZ are the only supported combinations, so make sure Y is
118 // enabled if Z is.
119 if (WorkItemIDZ)
120 WorkItemIDY = true;
121
122 PrivateSegmentWaveByteOffset = true;
123
124 // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
125 if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
126 (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
127 ArgInfo.PrivateSegmentWaveByteOffset =
128 ArgDescriptor::createRegister(AMDGPU::SGPR5);
129 }
130
131 bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
132 if (isAmdHsaOrMesa) {
133 if (!ST.enableFlatScratch())
134 PrivateSegmentBuffer = true;
135
136 if (UseFixedABI) {
137 DispatchPtr = true;
138 QueuePtr = true;
139
140 // FIXME: We don't need this?
141 DispatchID = true;
142 } else {
143 if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
144 DispatchPtr = true;
145
146 if (F.hasFnAttribute("amdgpu-queue-ptr"))
147 QueuePtr = true;
148
149 if (F.hasFnAttribute("amdgpu-dispatch-id"))
150 DispatchID = true;
151 }
152 } else if (ST.isMesaGfxShader(F)) {
153 ImplicitBufferPtr = true;
154 }
155
156 if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
157 KernargSegmentPtr = true;
158
159 if (ST.hasFlatAddressSpace() && isEntryFunction() &&
160 (isAmdHsaOrMesa || ST.enableFlatScratch())) {
161 // TODO: This could be refined a lot. The attribute is a poor way of
162 // detecting calls or stack objects that may require it before argument
163 // lowering.
164 if (HasCalls || HasStackObjects || ST.enableFlatScratch())
165 FlatScratchInit = true;
166 }
167
168 Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
169 StringRef S = A.getValueAsString();
170 if (!S.empty())
171 S.consumeInteger(0, GITPtrHigh);
172
173 A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
174 S = A.getValueAsString();
175 if (!S.empty())
176 S.consumeInteger(0, HighBitsOf32BitAddress);
177
178 S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
179 if (!S.empty())
180 S.consumeInteger(0, GDSSize);
181 }
182
limitOccupancy(const MachineFunction & MF)183 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
184 limitOccupancy(getMaxWavesPerEU());
185 const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
186 limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
187 MF.getFunction()));
188 }
189
addPrivateSegmentBuffer(const SIRegisterInfo & TRI)190 Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
191 const SIRegisterInfo &TRI) {
192 ArgInfo.PrivateSegmentBuffer =
193 ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
194 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
195 NumUserSGPRs += 4;
196 return ArgInfo.PrivateSegmentBuffer.getRegister();
197 }
198
addDispatchPtr(const SIRegisterInfo & TRI)199 Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
200 ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
201 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
202 NumUserSGPRs += 2;
203 return ArgInfo.DispatchPtr.getRegister();
204 }
205
addQueuePtr(const SIRegisterInfo & TRI)206 Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
207 ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
208 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
209 NumUserSGPRs += 2;
210 return ArgInfo.QueuePtr.getRegister();
211 }
212
addKernargSegmentPtr(const SIRegisterInfo & TRI)213 Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
214 ArgInfo.KernargSegmentPtr
215 = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
216 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
217 NumUserSGPRs += 2;
218 return ArgInfo.KernargSegmentPtr.getRegister();
219 }
220
addDispatchID(const SIRegisterInfo & TRI)221 Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
222 ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
223 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
224 NumUserSGPRs += 2;
225 return ArgInfo.DispatchID.getRegister();
226 }
227
addFlatScratchInit(const SIRegisterInfo & TRI)228 Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
229 ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
230 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
231 NumUserSGPRs += 2;
232 return ArgInfo.FlatScratchInit.getRegister();
233 }
234
addImplicitBufferPtr(const SIRegisterInfo & TRI)235 Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
236 ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
237 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
238 NumUserSGPRs += 2;
239 return ArgInfo.ImplicitBufferPtr.getRegister();
240 }
241
isCalleeSavedReg(const MCPhysReg * CSRegs,MCPhysReg Reg)242 bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
243 MCPhysReg Reg) {
244 for (unsigned I = 0; CSRegs[I]; ++I) {
245 if (CSRegs[I] == Reg)
246 return true;
247 }
248
249 return false;
250 }
251
252 /// \p returns true if \p NumLanes slots are available in VGPRs already used for
253 /// SGPR spilling.
254 //
255 // FIXME: This only works after processFunctionBeforeFrameFinalized
haveFreeLanesForSGPRSpill(const MachineFunction & MF,unsigned NumNeed) const256 bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
257 unsigned NumNeed) const {
258 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
259 unsigned WaveSize = ST.getWavefrontSize();
260 return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
261 }
262
263 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
allocateSGPRSpillToVGPR(MachineFunction & MF,int FI)264 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
265 int FI) {
266 std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
267
268 // This has already been allocated.
269 if (!SpillLanes.empty())
270 return true;
271
272 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
273 const SIRegisterInfo *TRI = ST.getRegisterInfo();
274 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
275 MachineRegisterInfo &MRI = MF.getRegInfo();
276 unsigned WaveSize = ST.getWavefrontSize();
277 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
278
279 unsigned Size = FrameInfo.getObjectSize(FI);
280 unsigned NumLanes = Size / 4;
281
282 if (NumLanes > WaveSize)
283 return false;
284
285 assert(Size >= 4 && "invalid sgpr spill size");
286 assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
287
288 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
289
290 // Make sure to handle the case where a wide SGPR spill may span between two
291 // VGPRs.
292 for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
293 Register LaneVGPR;
294 unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
295
296 // Reserve a VGPR (when NumVGPRSpillLanes = 0, WaveSize, 2*WaveSize, ..) and
297 // when one of the two conditions is true:
298 // 1. One reserved VGPR being tracked by VGPRReservedForSGPRSpill is not yet
299 // reserved.
300 // 2. All spill lanes of reserved VGPR(s) are full and another spill lane is
301 // required.
302 if (FuncInfo->VGPRReservedForSGPRSpill && NumVGPRSpillLanes < WaveSize) {
303 assert(FuncInfo->VGPRReservedForSGPRSpill == SpillVGPRs.back().VGPR);
304 LaneVGPR = FuncInfo->VGPRReservedForSGPRSpill;
305 } else if (VGPRIndex == 0) {
306 LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
307 if (LaneVGPR == AMDGPU::NoRegister) {
308 // We have no VGPRs left for spilling SGPRs. Reset because we will not
309 // partially spill the SGPR to VGPRs.
310 SGPRToVGPRSpills.erase(FI);
311 NumVGPRSpillLanes -= I;
312 return false;
313 }
314
315 Optional<int> CSRSpillFI;
316 if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
317 isCalleeSavedReg(CSRegs, LaneVGPR)) {
318 CSRSpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
319 }
320
321 SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
322
323 // Add this register as live-in to all blocks to avoid machine verifer
324 // complaining about use of an undefined physical register.
325 for (MachineBasicBlock &BB : MF)
326 BB.addLiveIn(LaneVGPR);
327 } else {
328 LaneVGPR = SpillVGPRs.back().VGPR;
329 }
330
331 SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
332 }
333
334 return true;
335 }
336
337 /// Reserve a VGPR for spilling of SGPRs
reserveVGPRforSGPRSpills(MachineFunction & MF)338 bool SIMachineFunctionInfo::reserveVGPRforSGPRSpills(MachineFunction &MF) {
339 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
340 const SIRegisterInfo *TRI = ST.getRegisterInfo();
341 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
342
343 Register LaneVGPR = TRI->findUnusedRegister(
344 MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF, true);
345 if (LaneVGPR == Register())
346 return false;
347 SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, None));
348 FuncInfo->VGPRReservedForSGPRSpill = LaneVGPR;
349 return true;
350 }
351
352 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
353 /// Either AGPR is spilled to VGPR to vice versa.
354 /// Returns true if a \p FI can be eliminated completely.
allocateVGPRSpillToAGPR(MachineFunction & MF,int FI,bool isAGPRtoVGPR)355 bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
356 int FI,
357 bool isAGPRtoVGPR) {
358 MachineRegisterInfo &MRI = MF.getRegInfo();
359 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
360 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
361
362 assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
363
364 auto &Spill = VGPRToAGPRSpills[FI];
365
366 // This has already been allocated.
367 if (!Spill.Lanes.empty())
368 return Spill.FullyAllocated;
369
370 unsigned Size = FrameInfo.getObjectSize(FI);
371 unsigned NumLanes = Size / 4;
372 Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
373
374 const TargetRegisterClass &RC =
375 isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
376 auto Regs = RC.getRegisters();
377
378 auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
379 const SIRegisterInfo *TRI = ST.getRegisterInfo();
380 Spill.FullyAllocated = true;
381
382 // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
383 // once.
384 BitVector OtherUsedRegs;
385 OtherUsedRegs.resize(TRI->getNumRegs());
386
387 const uint32_t *CSRMask =
388 TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
389 if (CSRMask)
390 OtherUsedRegs.setBitsInMask(CSRMask);
391
392 // TODO: Should include register tuples, but doesn't matter with current
393 // usage.
394 for (MCPhysReg Reg : SpillAGPR)
395 OtherUsedRegs.set(Reg);
396 for (MCPhysReg Reg : SpillVGPR)
397 OtherUsedRegs.set(Reg);
398
399 SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
400 for (unsigned I = 0; I < NumLanes; ++I) {
401 NextSpillReg = std::find_if(
402 NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
403 return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
404 !OtherUsedRegs[Reg];
405 });
406
407 if (NextSpillReg == Regs.end()) { // Registers exhausted
408 Spill.FullyAllocated = false;
409 break;
410 }
411
412 OtherUsedRegs.set(*NextSpillReg);
413 SpillRegs.push_back(*NextSpillReg);
414 Spill.Lanes[I] = *NextSpillReg++;
415 }
416
417 return Spill.FullyAllocated;
418 }
419
removeDeadFrameIndices(MachineFrameInfo & MFI)420 void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
421 // The FP & BP spills haven't been inserted yet, so keep them around.
422 for (auto &R : SGPRToVGPRSpills) {
423 if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex)
424 MFI.RemoveStackObject(R.first);
425 }
426
427 // All other SPGRs must be allocated on the default stack, so reset the stack
428 // ID.
429 for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
430 ++i)
431 if (i != FramePointerSaveIndex && i != BasePointerSaveIndex)
432 MFI.setStackID(i, TargetStackID::Default);
433
434 for (auto &R : VGPRToAGPRSpills) {
435 if (R.second.FullyAllocated)
436 MFI.RemoveStackObject(R.first);
437 }
438 }
439
getNextUserSGPR() const440 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
441 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
442 return AMDGPU::SGPR0 + NumUserSGPRs;
443 }
444
getNextSystemSGPR() const445 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
446 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
447 }
448
449 Register
getGITPtrLoReg(const MachineFunction & MF) const450 SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
451 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
452 if (!ST.isAmdPalOS())
453 return Register();
454 Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
455 if (ST.hasMergedShaders()) {
456 switch (MF.getFunction().getCallingConv()) {
457 case CallingConv::AMDGPU_HS:
458 case CallingConv::AMDGPU_GS:
459 // Low GIT address is passed in s8 rather than s0 for an LS+HS or
460 // ES+GS merged shader on gfx9+.
461 GitPtrLo = AMDGPU::SGPR8;
462 return GitPtrLo;
463 default:
464 return GitPtrLo;
465 }
466 }
467 return GitPtrLo;
468 }
469
regToString(Register Reg,const TargetRegisterInfo & TRI)470 static yaml::StringValue regToString(Register Reg,
471 const TargetRegisterInfo &TRI) {
472 yaml::StringValue Dest;
473 {
474 raw_string_ostream OS(Dest.Value);
475 OS << printReg(Reg, &TRI);
476 }
477 return Dest;
478 }
479
480 static Optional<yaml::SIArgumentInfo>
convertArgumentInfo(const AMDGPUFunctionArgInfo & ArgInfo,const TargetRegisterInfo & TRI)481 convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
482 const TargetRegisterInfo &TRI) {
483 yaml::SIArgumentInfo AI;
484
485 auto convertArg = [&](Optional<yaml::SIArgument> &A,
486 const ArgDescriptor &Arg) {
487 if (!Arg)
488 return false;
489
490 // Create a register or stack argument.
491 yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
492 if (Arg.isRegister()) {
493 raw_string_ostream OS(SA.RegisterName.Value);
494 OS << printReg(Arg.getRegister(), &TRI);
495 } else
496 SA.StackOffset = Arg.getStackOffset();
497 // Check and update the optional mask.
498 if (Arg.isMasked())
499 SA.Mask = Arg.getMask();
500
501 A = SA;
502 return true;
503 };
504
505 bool Any = false;
506 Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
507 Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
508 Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
509 Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
510 Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
511 Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
512 Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
513 Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
514 Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
515 Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
516 Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
517 Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
518 ArgInfo.PrivateSegmentWaveByteOffset);
519 Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
520 Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
521 Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
522 Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
523 Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
524
525 if (Any)
526 return AI;
527
528 return None;
529 }
530
SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo & MFI,const TargetRegisterInfo & TRI)531 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
532 const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI)
533 : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
534 MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
535 DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
536 NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
537 MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
538 HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
539 HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
540 HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
541 Occupancy(MFI.getOccupancy()),
542 ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
543 FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
544 StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
545 ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
546 }
547
mappingImpl(yaml::IO & YamlIO)548 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
549 MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
550 }
551
initializeBaseYamlFields(const yaml::SIMachineFunctionInfo & YamlMFI)552 bool SIMachineFunctionInfo::initializeBaseYamlFields(
553 const yaml::SIMachineFunctionInfo &YamlMFI) {
554 ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
555 MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
556 LDSSize = YamlMFI.LDSSize;
557 DynLDSAlign = YamlMFI.DynLDSAlign;
558 HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
559 Occupancy = YamlMFI.Occupancy;
560 IsEntryFunction = YamlMFI.IsEntryFunction;
561 NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
562 MemoryBound = YamlMFI.MemoryBound;
563 WaveLimiter = YamlMFI.WaveLimiter;
564 HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
565 HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
566 return false;
567 }
568
569 // Remove VGPR which was reserved for SGPR spills if there are no spilled SGPRs
removeVGPRForSGPRSpill(Register ReservedVGPR,MachineFunction & MF)570 bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR,
571 MachineFunction &MF) {
572 for (auto *i = SpillVGPRs.begin(); i < SpillVGPRs.end(); i++) {
573 if (i->VGPR == ReservedVGPR) {
574 SpillVGPRs.erase(i);
575
576 for (MachineBasicBlock &MBB : MF) {
577 MBB.removeLiveIn(ReservedVGPR);
578 MBB.sortUniqueLiveIns();
579 }
580 this->VGPRReservedForSGPRSpill = AMDGPU::NoRegister;
581 return true;
582 }
583 }
584 return false;
585 }
586