1 //===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the lowering of LLVM calls to machine code calls for
11 /// GlobalISel.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "AArch64CallLowering.h"
16 #include "AArch64ISelLowering.h"
17 #include "AArch64MachineFunctionInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/CodeGen/Analysis.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
24 #include "llvm/CodeGen/GlobalISel/Utils.h"
25 #include "llvm/CodeGen/LowLevelType.h"
26 #include "llvm/CodeGen/MachineBasicBlock.h"
27 #include "llvm/CodeGen/MachineFrameInfo.h"
28 #include "llvm/CodeGen/MachineFunction.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineMemOperand.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/CodeGen/TargetRegisterInfo.h"
34 #include "llvm/CodeGen/TargetSubtargetInfo.h"
35 #include "llvm/CodeGen/ValueTypes.h"
36 #include "llvm/IR/Argument.h"
37 #include "llvm/IR/Attributes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/Type.h"
40 #include "llvm/IR/Value.h"
41 #include "llvm/Support/MachineValueType.h"
42 #include <algorithm>
43 #include <cassert>
44 #include <cstdint>
45 #include <iterator>
46
47 #define DEBUG_TYPE "aarch64-call-lowering"
48
49 using namespace llvm;
50
AArch64CallLowering(const AArch64TargetLowering & TLI)51 AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
52 : CallLowering(&TLI) {}
53
applyStackPassedSmallTypeDAGHack(EVT OrigVT,MVT & ValVT,MVT & LocVT)54 static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT,
55 MVT &LocVT) {
56 // If ValVT is i1/i8/i16, we should set LocVT to i8/i8/i16. This is a legacy
57 // hack because the DAG calls the assignment function with pre-legalized
58 // register typed values, not the raw type.
59 //
60 // This hack is not applied to return values which are not passed on the
61 // stack.
62 if (OrigVT == MVT::i1 || OrigVT == MVT::i8)
63 ValVT = LocVT = MVT::i8;
64 else if (OrigVT == MVT::i16)
65 ValVT = LocVT = MVT::i16;
66 }
67
68 // Account for i1/i8/i16 stack passed value hack
getStackValueStoreTypeHack(const CCValAssign & VA)69 static LLT getStackValueStoreTypeHack(const CCValAssign &VA) {
70 const MVT ValVT = VA.getValVT();
71 return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT)
72 : LLT(VA.getLocVT());
73 }
74
75 namespace {
76
77 struct AArch64IncomingValueAssigner
78 : public CallLowering::IncomingValueAssigner {
AArch64IncomingValueAssigner__anoneb52b2c90111::AArch64IncomingValueAssigner79 AArch64IncomingValueAssigner(CCAssignFn *AssignFn_,
80 CCAssignFn *AssignFnVarArg_)
81 : IncomingValueAssigner(AssignFn_, AssignFnVarArg_) {}
82
assignArg__anoneb52b2c90111::AArch64IncomingValueAssigner83 bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
84 CCValAssign::LocInfo LocInfo,
85 const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
86 CCState &State) override {
87 applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
88 return IncomingValueAssigner::assignArg(ValNo, OrigVT, ValVT, LocVT,
89 LocInfo, Info, Flags, State);
90 }
91 };
92
93 struct AArch64OutgoingValueAssigner
94 : public CallLowering::OutgoingValueAssigner {
95 const AArch64Subtarget &Subtarget;
96
97 /// Track if this is used for a return instead of function argument
98 /// passing. We apply a hack to i1/i8/i16 stack passed values, but do not use
99 /// stack passed returns for them and cannot apply the type adjustment.
100 bool IsReturn;
101
AArch64OutgoingValueAssigner__anoneb52b2c90111::AArch64OutgoingValueAssigner102 AArch64OutgoingValueAssigner(CCAssignFn *AssignFn_,
103 CCAssignFn *AssignFnVarArg_,
104 const AArch64Subtarget &Subtarget_,
105 bool IsReturn)
106 : OutgoingValueAssigner(AssignFn_, AssignFnVarArg_),
107 Subtarget(Subtarget_), IsReturn(IsReturn) {}
108
assignArg__anoneb52b2c90111::AArch64OutgoingValueAssigner109 bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
110 CCValAssign::LocInfo LocInfo,
111 const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
112 CCState &State) override {
113 bool IsCalleeWin = Subtarget.isCallingConvWin64(State.getCallingConv());
114 bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg();
115
116 if (!State.isVarArg() && !UseVarArgsCCForFixed && !IsReturn)
117 applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
118
119 bool Res;
120 if (Info.IsFixed && !UseVarArgsCCForFixed)
121 Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
122 else
123 Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State);
124
125 StackOffset = State.getNextStackOffset();
126 return Res;
127 }
128 };
129
130 struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
IncomingArgHandler__anoneb52b2c90111::IncomingArgHandler131 IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
132 : IncomingValueHandler(MIRBuilder, MRI) {}
133
getStackAddress__anoneb52b2c90111::IncomingArgHandler134 Register getStackAddress(uint64_t Size, int64_t Offset,
135 MachinePointerInfo &MPO,
136 ISD::ArgFlagsTy Flags) override {
137 auto &MFI = MIRBuilder.getMF().getFrameInfo();
138
139 // Byval is assumed to be writable memory, but other stack passed arguments
140 // are not.
141 const bool IsImmutable = !Flags.isByVal();
142
143 int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable);
144 MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
145 auto AddrReg = MIRBuilder.buildFrameIndex(LLT::pointer(0, 64), FI);
146 return AddrReg.getReg(0);
147 }
148
getStackValueStoreType__anoneb52b2c90111::IncomingArgHandler149 LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
150 ISD::ArgFlagsTy Flags) const override {
151 // For pointers, we just need to fixup the integer types reported in the
152 // CCValAssign.
153 if (Flags.isPointer())
154 return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
155 return getStackValueStoreTypeHack(VA);
156 }
157
assignValueToReg__anoneb52b2c90111::IncomingArgHandler158 void assignValueToReg(Register ValVReg, Register PhysReg,
159 CCValAssign &VA) override {
160 markPhysRegUsed(PhysReg);
161 IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
162 }
163
assignValueToAddress__anoneb52b2c90111::IncomingArgHandler164 void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
165 MachinePointerInfo &MPO, CCValAssign &VA) override {
166 MachineFunction &MF = MIRBuilder.getMF();
167
168 LLT ValTy(VA.getValVT());
169 LLT LocTy(VA.getLocVT());
170
171 // Fixup the types for the DAG compatibility hack.
172 if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16)
173 std::swap(ValTy, LocTy);
174 else {
175 // The calling code knows if this is a pointer or not, we're only touching
176 // the LocTy for the i8/i16 hack.
177 assert(LocTy.getSizeInBits() == MemTy.getSizeInBits());
178 LocTy = MemTy;
179 }
180
181 auto MMO = MF.getMachineMemOperand(
182 MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, LocTy,
183 inferAlignFromPtrInfo(MF, MPO));
184 MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
185 }
186
187 /// How the physical register gets marked varies between formal
188 /// parameters (it's a basic-block live-in), and a call instruction
189 /// (it's an implicit-def of the BL).
190 virtual void markPhysRegUsed(MCRegister PhysReg) = 0;
191 };
192
193 struct FormalArgHandler : public IncomingArgHandler {
FormalArgHandler__anoneb52b2c90111::FormalArgHandler194 FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
195 : IncomingArgHandler(MIRBuilder, MRI) {}
196
markPhysRegUsed__anoneb52b2c90111::FormalArgHandler197 void markPhysRegUsed(MCRegister PhysReg) override {
198 MIRBuilder.getMRI()->addLiveIn(PhysReg);
199 MIRBuilder.getMBB().addLiveIn(PhysReg);
200 }
201 };
202
203 struct CallReturnHandler : public IncomingArgHandler {
CallReturnHandler__anoneb52b2c90111::CallReturnHandler204 CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
205 MachineInstrBuilder MIB)
206 : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
207
markPhysRegUsed__anoneb52b2c90111::CallReturnHandler208 void markPhysRegUsed(MCRegister PhysReg) override {
209 MIB.addDef(PhysReg, RegState::Implicit);
210 }
211
212 MachineInstrBuilder MIB;
213 };
214
215 /// A special return arg handler for "returned" attribute arg calls.
216 struct ReturnedArgCallReturnHandler : public CallReturnHandler {
ReturnedArgCallReturnHandler__anoneb52b2c90111::ReturnedArgCallReturnHandler217 ReturnedArgCallReturnHandler(MachineIRBuilder &MIRBuilder,
218 MachineRegisterInfo &MRI,
219 MachineInstrBuilder MIB)
220 : CallReturnHandler(MIRBuilder, MRI, MIB) {}
221
markPhysRegUsed__anoneb52b2c90111::ReturnedArgCallReturnHandler222 void markPhysRegUsed(MCRegister PhysReg) override {}
223 };
224
225 struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
OutgoingArgHandler__anoneb52b2c90111::OutgoingArgHandler226 OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
227 MachineInstrBuilder MIB, bool IsTailCall = false,
228 int FPDiff = 0)
229 : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), IsTailCall(IsTailCall),
230 FPDiff(FPDiff),
231 Subtarget(MIRBuilder.getMF().getSubtarget<AArch64Subtarget>()) {}
232
getStackAddress__anoneb52b2c90111::OutgoingArgHandler233 Register getStackAddress(uint64_t Size, int64_t Offset,
234 MachinePointerInfo &MPO,
235 ISD::ArgFlagsTy Flags) override {
236 MachineFunction &MF = MIRBuilder.getMF();
237 LLT p0 = LLT::pointer(0, 64);
238 LLT s64 = LLT::scalar(64);
239
240 if (IsTailCall) {
241 assert(!Flags.isByVal() && "byval unhandled with tail calls");
242
243 Offset += FPDiff;
244 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
245 auto FIReg = MIRBuilder.buildFrameIndex(p0, FI);
246 MPO = MachinePointerInfo::getFixedStack(MF, FI);
247 return FIReg.getReg(0);
248 }
249
250 if (!SPReg)
251 SPReg = MIRBuilder.buildCopy(p0, Register(AArch64::SP)).getReg(0);
252
253 auto OffsetReg = MIRBuilder.buildConstant(s64, Offset);
254
255 auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg);
256
257 MPO = MachinePointerInfo::getStack(MF, Offset);
258 return AddrReg.getReg(0);
259 }
260
261 /// We need to fixup the reported store size for certain value types because
262 /// we invert the interpretation of ValVT and LocVT in certain cases. This is
263 /// for compatability with the DAG call lowering implementation, which we're
264 /// currently building on top of.
getStackValueStoreType__anoneb52b2c90111::OutgoingArgHandler265 LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
266 ISD::ArgFlagsTy Flags) const override {
267 if (Flags.isPointer())
268 return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
269 return getStackValueStoreTypeHack(VA);
270 }
271
assignValueToReg__anoneb52b2c90111::OutgoingArgHandler272 void assignValueToReg(Register ValVReg, Register PhysReg,
273 CCValAssign &VA) override {
274 MIB.addUse(PhysReg, RegState::Implicit);
275 Register ExtReg = extendRegister(ValVReg, VA);
276 MIRBuilder.buildCopy(PhysReg, ExtReg);
277 }
278
assignValueToAddress__anoneb52b2c90111::OutgoingArgHandler279 void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
280 MachinePointerInfo &MPO, CCValAssign &VA) override {
281 MachineFunction &MF = MIRBuilder.getMF();
282 auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
283 inferAlignFromPtrInfo(MF, MPO));
284 MIRBuilder.buildStore(ValVReg, Addr, *MMO);
285 }
286
assignValueToAddress__anoneb52b2c90111::OutgoingArgHandler287 void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex,
288 Register Addr, LLT MemTy, MachinePointerInfo &MPO,
289 CCValAssign &VA) override {
290 unsigned MaxSize = MemTy.getSizeInBytes() * 8;
291 // For varargs, we always want to extend them to 8 bytes, in which case
292 // we disable setting a max.
293 if (!Arg.IsFixed)
294 MaxSize = 0;
295
296 Register ValVReg = Arg.Regs[RegIndex];
297 if (VA.getLocInfo() != CCValAssign::LocInfo::FPExt) {
298 MVT LocVT = VA.getLocVT();
299 MVT ValVT = VA.getValVT();
300
301 if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) {
302 std::swap(ValVT, LocVT);
303 MemTy = LLT(VA.getValVT());
304 }
305
306 ValVReg = extendRegister(ValVReg, VA, MaxSize);
307 } else {
308 // The store does not cover the full allocated stack slot.
309 MemTy = LLT(VA.getValVT());
310 }
311
312 assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);
313 }
314
315 MachineInstrBuilder MIB;
316
317 bool IsTailCall;
318
319 /// For tail calls, the byte offset of the call's argument area from the
320 /// callee's. Unused elsewhere.
321 int FPDiff;
322
323 // Cache the SP register vreg if we need it more than once in this call site.
324 Register SPReg;
325
326 const AArch64Subtarget &Subtarget;
327 };
328 } // namespace
329
doesCalleeRestoreStack(CallingConv::ID CallConv,bool TailCallOpt)330 static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
331 return (CallConv == CallingConv::Fast && TailCallOpt) ||
332 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
333 }
334
lowerReturn(MachineIRBuilder & MIRBuilder,const Value * Val,ArrayRef<Register> VRegs,FunctionLoweringInfo & FLI,Register SwiftErrorVReg) const335 bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
336 const Value *Val,
337 ArrayRef<Register> VRegs,
338 FunctionLoweringInfo &FLI,
339 Register SwiftErrorVReg) const {
340 auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
341 assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
342 "Return value without a vreg");
343
344 bool Success = true;
345 if (!VRegs.empty()) {
346 MachineFunction &MF = MIRBuilder.getMF();
347 const Function &F = MF.getFunction();
348 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
349
350 MachineRegisterInfo &MRI = MF.getRegInfo();
351 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
352 CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
353 auto &DL = F.getParent()->getDataLayout();
354 LLVMContext &Ctx = Val->getType()->getContext();
355
356 SmallVector<EVT, 4> SplitEVTs;
357 ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
358 assert(VRegs.size() == SplitEVTs.size() &&
359 "For each split Type there should be exactly one VReg.");
360
361 SmallVector<ArgInfo, 8> SplitArgs;
362 CallingConv::ID CC = F.getCallingConv();
363
364 for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
365 Register CurVReg = VRegs[i];
366 ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx), 0};
367 setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
368
369 // i1 is a special case because SDAG i1 true is naturally zero extended
370 // when widened using ANYEXT. We need to do it explicitly here.
371 if (MRI.getType(CurVReg).getSizeInBits() == 1) {
372 CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
373 } else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) ==
374 1) {
375 // Some types will need extending as specified by the CC.
376 MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]);
377 if (EVT(NewVT) != SplitEVTs[i]) {
378 unsigned ExtendOp = TargetOpcode::G_ANYEXT;
379 if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
380 Attribute::SExt))
381 ExtendOp = TargetOpcode::G_SEXT;
382 else if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
383 Attribute::ZExt))
384 ExtendOp = TargetOpcode::G_ZEXT;
385
386 LLT NewLLT(NewVT);
387 LLT OldLLT(MVT::getVT(CurArgInfo.Ty));
388 CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx);
389 // Instead of an extend, we might have a vector type which needs
390 // padding with more elements, e.g. <2 x half> -> <4 x half>.
391 if (NewVT.isVector()) {
392 if (OldLLT.isVector()) {
393 if (NewLLT.getNumElements() > OldLLT.getNumElements()) {
394 // We don't handle VA types which are not exactly twice the
395 // size, but can easily be done in future.
396 if (NewLLT.getNumElements() != OldLLT.getNumElements() * 2) {
397 LLVM_DEBUG(dbgs() << "Outgoing vector ret has too many elts");
398 return false;
399 }
400 auto Undef = MIRBuilder.buildUndef({OldLLT});
401 CurVReg =
402 MIRBuilder.buildMerge({NewLLT}, {CurVReg, Undef}).getReg(0);
403 } else {
404 // Just do a vector extend.
405 CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
406 .getReg(0);
407 }
408 } else if (NewLLT.getNumElements() == 2) {
409 // We need to pad a <1 x S> type to <2 x S>. Since we don't have
410 // <1 x S> vector types in GISel we use a build_vector instead
411 // of a vector merge/concat.
412 auto Undef = MIRBuilder.buildUndef({OldLLT});
413 CurVReg =
414 MIRBuilder
415 .buildBuildVector({NewLLT}, {CurVReg, Undef.getReg(0)})
416 .getReg(0);
417 } else {
418 LLVM_DEBUG(dbgs() << "Could not handle ret ty\n");
419 return false;
420 }
421 } else {
422 // If the split EVT was a <1 x T> vector, and NewVT is T, then we
423 // don't have to do anything since we don't distinguish between the
424 // two.
425 if (NewLLT != MRI.getType(CurVReg)) {
426 // A scalar extend.
427 CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
428 .getReg(0);
429 }
430 }
431 }
432 }
433 if (CurVReg != CurArgInfo.Regs[0]) {
434 CurArgInfo.Regs[0] = CurVReg;
435 // Reset the arg flags after modifying CurVReg.
436 setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
437 }
438 splitToValueTypes(CurArgInfo, SplitArgs, DL, CC);
439 }
440
441 AArch64OutgoingValueAssigner Assigner(AssignFn, AssignFn, Subtarget,
442 /*IsReturn*/ true);
443 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
444 Success = determineAndHandleAssignments(Handler, Assigner, SplitArgs,
445 MIRBuilder, CC, F.isVarArg());
446 }
447
448 if (SwiftErrorVReg) {
449 MIB.addUse(AArch64::X21, RegState::Implicit);
450 MIRBuilder.buildCopy(AArch64::X21, SwiftErrorVReg);
451 }
452
453 MIRBuilder.insertInstr(MIB);
454 return Success;
455 }
456
457 /// Helper function to compute forwarded registers for musttail calls. Computes
458 /// the forwarded registers, sets MBB liveness, and emits COPY instructions that
459 /// can be used to save + restore registers later.
handleMustTailForwardedRegisters(MachineIRBuilder & MIRBuilder,CCAssignFn * AssignFn)460 static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
461 CCAssignFn *AssignFn) {
462 MachineBasicBlock &MBB = MIRBuilder.getMBB();
463 MachineFunction &MF = MIRBuilder.getMF();
464 MachineFrameInfo &MFI = MF.getFrameInfo();
465
466 if (!MFI.hasMustTailInVarArgFunc())
467 return;
468
469 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
470 const Function &F = MF.getFunction();
471 assert(F.isVarArg() && "Expected F to be vararg?");
472
473 // Compute the set of forwarded registers. The rest are scratch.
474 SmallVector<CCValAssign, 16> ArgLocs;
475 CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs,
476 F.getContext());
477 SmallVector<MVT, 2> RegParmTypes;
478 RegParmTypes.push_back(MVT::i64);
479 RegParmTypes.push_back(MVT::f128);
480
481 // Later on, we can use this vector to restore the registers if necessary.
482 SmallVectorImpl<ForwardedRegister> &Forwards =
483 FuncInfo->getForwardedMustTailRegParms();
484 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, AssignFn);
485
486 // Conservatively forward X8, since it might be used for an aggregate
487 // return.
488 if (!CCInfo.isAllocated(AArch64::X8)) {
489 Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
490 Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
491 }
492
493 // Add the forwards to the MachineBasicBlock and MachineFunction.
494 for (const auto &F : Forwards) {
495 MBB.addLiveIn(F.PReg);
496 MIRBuilder.buildCopy(Register(F.VReg), Register(F.PReg));
497 }
498 }
499
fallBackToDAGISel(const MachineFunction & MF) const500 bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
501 auto &F = MF.getFunction();
502 if (isa<ScalableVectorType>(F.getReturnType()))
503 return true;
504 if (llvm::any_of(F.args(), [](const Argument &A) {
505 return isa<ScalableVectorType>(A.getType());
506 }))
507 return true;
508 const auto &ST = MF.getSubtarget<AArch64Subtarget>();
509 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
510 LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n");
511 return true;
512 }
513 return false;
514 }
515
lowerFormalArguments(MachineIRBuilder & MIRBuilder,const Function & F,ArrayRef<ArrayRef<Register>> VRegs,FunctionLoweringInfo & FLI) const516 bool AArch64CallLowering::lowerFormalArguments(
517 MachineIRBuilder &MIRBuilder, const Function &F,
518 ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const {
519 MachineFunction &MF = MIRBuilder.getMF();
520 MachineBasicBlock &MBB = MIRBuilder.getMBB();
521 MachineRegisterInfo &MRI = MF.getRegInfo();
522 auto &DL = F.getParent()->getDataLayout();
523
524 SmallVector<ArgInfo, 8> SplitArgs;
525 unsigned i = 0;
526 for (auto &Arg : F.args()) {
527 if (DL.getTypeStoreSize(Arg.getType()).isZero())
528 continue;
529
530 ArgInfo OrigArg{VRegs[i], Arg, i};
531 setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
532
533 if (Arg.hasAttribute(Attribute::SwiftAsync))
534 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
535
536 splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv());
537 ++i;
538 }
539
540 if (!MBB.empty())
541 MIRBuilder.setInstr(*MBB.begin());
542
543 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
544 CCAssignFn *AssignFn =
545 TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
546
547 AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn);
548 FormalArgHandler Handler(MIRBuilder, MRI);
549 if (!determineAndHandleAssignments(Handler, Assigner, SplitArgs, MIRBuilder,
550 F.getCallingConv(), F.isVarArg()))
551 return false;
552
553 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
554 uint64_t StackOffset = Assigner.StackOffset;
555 if (F.isVarArg()) {
556 auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
557 if (!Subtarget.isTargetDarwin()) {
558 // FIXME: we need to reimplement saveVarArgsRegisters from
559 // AArch64ISelLowering.
560 return false;
561 }
562
563 // We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
564 StackOffset =
565 alignTo(Assigner.StackOffset, Subtarget.isTargetILP32() ? 4 : 8);
566
567 auto &MFI = MIRBuilder.getMF().getFrameInfo();
568 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
569 }
570
571 if (doesCalleeRestoreStack(F.getCallingConv(),
572 MF.getTarget().Options.GuaranteedTailCallOpt)) {
573 // We have a non-standard ABI, so why not make full use of the stack that
574 // we're going to pop? It must be aligned to 16 B in any case.
575 StackOffset = alignTo(StackOffset, 16);
576
577 // If we're expected to restore the stack (e.g. fastcc), then we'll be
578 // adding a multiple of 16.
579 FuncInfo->setArgumentStackToRestore(StackOffset);
580
581 // Our own callers will guarantee that the space is free by giving an
582 // aligned value to CALLSEQ_START.
583 }
584
585 // When we tail call, we need to check if the callee's arguments
586 // will fit on the caller's stack. So, whenever we lower formal arguments,
587 // we should keep track of this information, since we might lower a tail call
588 // in this function later.
589 FuncInfo->setBytesInStackArgArea(StackOffset);
590
591 auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
592 if (Subtarget.hasCustomCallingConv())
593 Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
594
595 handleMustTailForwardedRegisters(MIRBuilder, AssignFn);
596
597 // Move back to the end of the basic block.
598 MIRBuilder.setMBB(MBB);
599
600 return true;
601 }
602
603 /// Return true if the calling convention is one that we can guarantee TCO for.
canGuaranteeTCO(CallingConv::ID CC,bool GuaranteeTailCalls)604 static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
605 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
606 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
607 }
608
609 /// Return true if we might ever do TCO for calls with this calling convention.
mayTailCallThisCC(CallingConv::ID CC)610 static bool mayTailCallThisCC(CallingConv::ID CC) {
611 switch (CC) {
612 case CallingConv::C:
613 case CallingConv::PreserveMost:
614 case CallingConv::Swift:
615 case CallingConv::SwiftTail:
616 case CallingConv::Tail:
617 case CallingConv::Fast:
618 return true;
619 default:
620 return false;
621 }
622 }
623
624 /// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
625 /// CC.
626 static std::pair<CCAssignFn *, CCAssignFn *>
getAssignFnsForCC(CallingConv::ID CC,const AArch64TargetLowering & TLI)627 getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) {
628 return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)};
629 }
630
doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo & Info,MachineFunction & MF,SmallVectorImpl<ArgInfo> & InArgs) const631 bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay(
632 CallLoweringInfo &Info, MachineFunction &MF,
633 SmallVectorImpl<ArgInfo> &InArgs) const {
634 const Function &CallerF = MF.getFunction();
635 CallingConv::ID CalleeCC = Info.CallConv;
636 CallingConv::ID CallerCC = CallerF.getCallingConv();
637
638 // If the calling conventions match, then everything must be the same.
639 if (CalleeCC == CallerCC)
640 return true;
641
642 // Check if the caller and callee will handle arguments in the same way.
643 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
644 CCAssignFn *CalleeAssignFnFixed;
645 CCAssignFn *CalleeAssignFnVarArg;
646 std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
647 getAssignFnsForCC(CalleeCC, TLI);
648
649 CCAssignFn *CallerAssignFnFixed;
650 CCAssignFn *CallerAssignFnVarArg;
651 std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
652 getAssignFnsForCC(CallerCC, TLI);
653
654 AArch64IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed,
655 CalleeAssignFnVarArg);
656 AArch64IncomingValueAssigner CallerAssigner(CallerAssignFnFixed,
657 CallerAssignFnVarArg);
658
659 if (!resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner))
660 return false;
661
662 // Make sure that the caller and callee preserve all of the same registers.
663 auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
664 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
665 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
666 if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) {
667 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
668 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
669 }
670
671 return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved);
672 }
673
areCalleeOutgoingArgsTailCallable(CallLoweringInfo & Info,MachineFunction & MF,SmallVectorImpl<ArgInfo> & OutArgs) const674 bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
675 CallLoweringInfo &Info, MachineFunction &MF,
676 SmallVectorImpl<ArgInfo> &OutArgs) const {
677 // If there are no outgoing arguments, then we are done.
678 if (OutArgs.empty())
679 return true;
680
681 const Function &CallerF = MF.getFunction();
682 LLVMContext &Ctx = CallerF.getContext();
683 CallingConv::ID CalleeCC = Info.CallConv;
684 CallingConv::ID CallerCC = CallerF.getCallingConv();
685 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
686 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
687
688 CCAssignFn *AssignFnFixed;
689 CCAssignFn *AssignFnVarArg;
690 std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
691
692 // We have outgoing arguments. Make sure that we can tail call with them.
693 SmallVector<CCValAssign, 16> OutLocs;
694 CCState OutInfo(CalleeCC, false, MF, OutLocs, Ctx);
695
696 AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
697 Subtarget, /*IsReturn*/ false);
698 if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo)) {
699 LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
700 return false;
701 }
702
703 // Make sure that they can fit on the caller's stack.
704 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
705 if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) {
706 LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
707 return false;
708 }
709
710 // Verify that the parameters in callee-saved registers match.
711 // TODO: Port this over to CallLowering as general code once swiftself is
712 // supported.
713 auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
714 const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
715 MachineRegisterInfo &MRI = MF.getRegInfo();
716
717 if (Info.IsVarArg) {
718 // Be conservative and disallow variadic memory operands to match SDAG's
719 // behaviour.
720 // FIXME: If the caller's calling convention is C, then we can
721 // potentially use its argument area. However, for cases like fastcc,
722 // we can't do anything.
723 for (unsigned i = 0; i < OutLocs.size(); ++i) {
724 auto &ArgLoc = OutLocs[i];
725 if (ArgLoc.isRegLoc())
726 continue;
727
728 LLVM_DEBUG(
729 dbgs()
730 << "... Cannot tail call vararg function with stack arguments\n");
731 return false;
732 }
733 }
734
735 return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs);
736 }
737
isEligibleForTailCallOptimization(MachineIRBuilder & MIRBuilder,CallLoweringInfo & Info,SmallVectorImpl<ArgInfo> & InArgs,SmallVectorImpl<ArgInfo> & OutArgs) const738 bool AArch64CallLowering::isEligibleForTailCallOptimization(
739 MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
740 SmallVectorImpl<ArgInfo> &InArgs,
741 SmallVectorImpl<ArgInfo> &OutArgs) const {
742
743 // Must pass all target-independent checks in order to tail call optimize.
744 if (!Info.IsTailCall)
745 return false;
746
747 CallingConv::ID CalleeCC = Info.CallConv;
748 MachineFunction &MF = MIRBuilder.getMF();
749 const Function &CallerF = MF.getFunction();
750
751 LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
752
753 if (Info.SwiftErrorVReg) {
754 // TODO: We should handle this.
755 // Note that this is also handled by the check for no outgoing arguments.
756 // Proactively disabling this though, because the swifterror handling in
757 // lowerCall inserts a COPY *after* the location of the call.
758 LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n");
759 return false;
760 }
761
762 if (!mayTailCallThisCC(CalleeCC)) {
763 LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
764 return false;
765 }
766
767 // Byval parameters hand the function a pointer directly into the stack area
768 // we want to reuse during a tail call. Working around this *is* possible (see
769 // X86).
770 //
771 // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
772 // it?
773 //
774 // On Windows, "inreg" attributes signify non-aggregate indirect returns.
775 // In this case, it is necessary to save/restore X0 in the callee. Tail
776 // call opt interferes with this. So we disable tail call opt when the
777 // caller has an argument with "inreg" attribute.
778 //
779 // FIXME: Check whether the callee also has an "inreg" argument.
780 //
781 // When the caller has a swifterror argument, we don't want to tail call
782 // because would have to move into the swifterror register before the
783 // tail call.
784 if (any_of(CallerF.args(), [](const Argument &A) {
785 return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr();
786 })) {
787 LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, "
788 "inreg, or swifterror arguments\n");
789 return false;
790 }
791
792 // Externally-defined functions with weak linkage should not be
793 // tail-called on AArch64 when the OS does not support dynamic
794 // pre-emption of symbols, as the AAELF spec requires normal calls
795 // to undefined weak functions to be replaced with a NOP or jump to the
796 // next instruction. The behaviour of branch instructions in this
797 // situation (as used for tail calls) is implementation-defined, so we
798 // cannot rely on the linker replacing the tail call with a return.
799 if (Info.Callee.isGlobal()) {
800 const GlobalValue *GV = Info.Callee.getGlobal();
801 const Triple &TT = MF.getTarget().getTargetTriple();
802 if (GV->hasExternalWeakLinkage() &&
803 (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
804 TT.isOSBinFormatMachO())) {
805 LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
806 "with weak linkage for this OS.\n");
807 return false;
808 }
809 }
810
811 // If we have -tailcallopt, then we're done.
812 if (canGuaranteeTCO(CalleeCC, MF.getTarget().Options.GuaranteedTailCallOpt))
813 return CalleeCC == CallerF.getCallingConv();
814
815 // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
816 // Try to find cases where we can do that.
817
818 // I want anyone implementing a new calling convention to think long and hard
819 // about this assert.
820 assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
821 "Unexpected variadic calling convention");
822
823 // Verify that the incoming and outgoing arguments from the callee are
824 // safe to tail call.
825 if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
826 LLVM_DEBUG(
827 dbgs()
828 << "... Caller and callee have incompatible calling conventions.\n");
829 return false;
830 }
831
832 if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs))
833 return false;
834
835 LLVM_DEBUG(
836 dbgs() << "... Call is eligible for tail call optimization.\n");
837 return true;
838 }
839
getCallOpcode(const MachineFunction & CallerF,bool IsIndirect,bool IsTailCall)840 static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
841 bool IsTailCall) {
842 if (!IsTailCall)
843 return IsIndirect ? getBLRCallOpcode(CallerF) : (unsigned)AArch64::BL;
844
845 if (!IsIndirect)
846 return AArch64::TCRETURNdi;
847
848 // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
849 // x16 or x17.
850 if (CallerF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
851 return AArch64::TCRETURNriBTI;
852
853 return AArch64::TCRETURNri;
854 }
855
856 static const uint32_t *
getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> & OutArgs,AArch64CallLowering::CallLoweringInfo & Info,const AArch64RegisterInfo & TRI,MachineFunction & MF)857 getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> &OutArgs,
858 AArch64CallLowering::CallLoweringInfo &Info,
859 const AArch64RegisterInfo &TRI, MachineFunction &MF) {
860 const uint32_t *Mask;
861 if (!OutArgs.empty() && OutArgs[0].Flags[0].isReturned()) {
862 // For 'this' returns, use the X0-preserving mask if applicable
863 Mask = TRI.getThisReturnPreservedMask(MF, Info.CallConv);
864 if (!Mask) {
865 OutArgs[0].Flags[0].setReturned(false);
866 Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
867 }
868 } else {
869 Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
870 }
871 return Mask;
872 }
873
lowerTailCall(MachineIRBuilder & MIRBuilder,CallLoweringInfo & Info,SmallVectorImpl<ArgInfo> & OutArgs) const874 bool AArch64CallLowering::lowerTailCall(
875 MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
876 SmallVectorImpl<ArgInfo> &OutArgs) const {
877 MachineFunction &MF = MIRBuilder.getMF();
878 const Function &F = MF.getFunction();
879 MachineRegisterInfo &MRI = MF.getRegInfo();
880 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
881 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
882
883 // True when we're tail calling, but without -tailcallopt.
884 bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt &&
885 Info.CallConv != CallingConv::Tail &&
886 Info.CallConv != CallingConv::SwiftTail;
887
888 // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
889 // register class. Until we can do that, we should fall back here.
890 if (MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) {
891 LLVM_DEBUG(
892 dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n");
893 return false;
894 }
895
896 // Find out which ABI gets to decide where things go.
897 CallingConv::ID CalleeCC = Info.CallConv;
898 CCAssignFn *AssignFnFixed;
899 CCAssignFn *AssignFnVarArg;
900 std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
901
902 MachineInstrBuilder CallSeqStart;
903 if (!IsSibCall)
904 CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
905
906 unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true);
907 auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
908 MIB.add(Info.Callee);
909
910 // Byte offset for the tail call. When we are sibcalling, this will always
911 // be 0.
912 MIB.addImm(0);
913
914 // Tell the call which registers are clobbered.
915 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
916 auto TRI = Subtarget.getRegisterInfo();
917 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
918 if (Subtarget.hasCustomCallingConv())
919 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
920 MIB.addRegMask(Mask);
921
922 if (TRI->isAnyArgRegReserved(MF))
923 TRI->emitReservedArgRegCallError(MF);
924
925 // FPDiff is the byte offset of the call's argument area from the callee's.
926 // Stores to callee stack arguments will be placed in FixedStackSlots offset
927 // by this amount for a tail call. In a sibling call it must be 0 because the
928 // caller will deallocate the entire stack and the callee still expects its
929 // arguments to begin at SP+0.
930 int FPDiff = 0;
931
932 // This will be 0 for sibcalls, potentially nonzero for tail calls produced
933 // by -tailcallopt. For sibcalls, the memory operands for the call are
934 // already available in the caller's incoming argument space.
935 unsigned NumBytes = 0;
936 if (!IsSibCall) {
937 // We aren't sibcalling, so we need to compute FPDiff. We need to do this
938 // before handling assignments, because FPDiff must be known for memory
939 // arguments.
940 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
941 SmallVector<CCValAssign, 16> OutLocs;
942 CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
943
944 AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
945 Subtarget, /*IsReturn*/ false);
946 if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo))
947 return false;
948
949 // The callee will pop the argument stack as a tail call. Thus, we must
950 // keep it 16-byte aligned.
951 NumBytes = alignTo(OutInfo.getNextStackOffset(), 16);
952
953 // FPDiff will be negative if this tail call requires more space than we
954 // would automatically have in our incoming argument space. Positive if we
955 // actually shrink the stack.
956 FPDiff = NumReusableBytes - NumBytes;
957
958 // Update the required reserved area if this is the tail call requiring the
959 // most argument stack space.
960 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
961 FuncInfo->setTailCallReservedStack(-FPDiff);
962
963 // The stack pointer must be 16-byte aligned at all times it's used for a
964 // memory operation, which in practice means at *all* times and in
965 // particular across call boundaries. Therefore our own arguments started at
966 // a 16-byte aligned SP and the delta applied for the tail call should
967 // satisfy the same constraint.
968 assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
969 }
970
971 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
972
973 AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
974 Subtarget, /*IsReturn*/ false);
975
976 // Do the actual argument marshalling.
977 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB,
978 /*IsTailCall*/ true, FPDiff);
979 if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder,
980 CalleeCC, Info.IsVarArg))
981 return false;
982
983 Mask = getMaskForArgs(OutArgs, Info, *TRI, MF);
984
985 if (Info.IsVarArg && Info.IsMustTailCall) {
986 // Now we know what's being passed to the function. Add uses to the call for
987 // the forwarded registers that we *aren't* passing as parameters. This will
988 // preserve the copies we build earlier.
989 for (const auto &F : Forwards) {
990 Register ForwardedReg = F.PReg;
991 // If the register is already passed, or aliases a register which is
992 // already being passed, then skip it.
993 if (any_of(MIB->uses(), [&ForwardedReg, &TRI](const MachineOperand &Use) {
994 if (!Use.isReg())
995 return false;
996 return TRI->regsOverlap(Use.getReg(), ForwardedReg);
997 }))
998 continue;
999
1000 // We aren't passing it already, so we should add it to the call.
1001 MIRBuilder.buildCopy(ForwardedReg, Register(F.VReg));
1002 MIB.addReg(ForwardedReg, RegState::Implicit);
1003 }
1004 }
1005
1006 // If we have -tailcallopt, we need to adjust the stack. We'll do the call
1007 // sequence start and end here.
1008 if (!IsSibCall) {
1009 MIB->getOperand(1).setImm(FPDiff);
1010 CallSeqStart.addImm(0).addImm(0);
1011 // End the call sequence *before* emitting the call. Normally, we would
1012 // tidy the frame up after the call. However, here, we've laid out the
1013 // parameters so that when SP is reset, they will be in the correct
1014 // location.
1015 MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(0).addImm(0);
1016 }
1017
1018 // Now we can add the actual call instruction to the correct basic block.
1019 MIRBuilder.insertInstr(MIB);
1020
1021 // If Callee is a reg, since it is used by a target specific instruction,
1022 // it must have a register class matching the constraint of that instruction.
1023 if (Info.Callee.isReg())
1024 constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
1025 *MF.getSubtarget().getRegBankInfo(), *MIB,
1026 MIB->getDesc(), Info.Callee, 0);
1027
1028 MF.getFrameInfo().setHasTailCall();
1029 Info.LoweredTailCall = true;
1030 return true;
1031 }
1032
lowerCall(MachineIRBuilder & MIRBuilder,CallLoweringInfo & Info) const1033 bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
1034 CallLoweringInfo &Info) const {
1035 MachineFunction &MF = MIRBuilder.getMF();
1036 const Function &F = MF.getFunction();
1037 MachineRegisterInfo &MRI = MF.getRegInfo();
1038 auto &DL = F.getParent()->getDataLayout();
1039 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
1040
1041 SmallVector<ArgInfo, 8> OutArgs;
1042 for (auto &OrigArg : Info.OrigArgs) {
1043 splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv);
1044 // AAPCS requires that we zero-extend i1 to 8 bits by the caller.
1045 if (OrigArg.Ty->isIntegerTy(1))
1046 OutArgs.back().Flags[0].setZExt();
1047 }
1048
1049 SmallVector<ArgInfo, 8> InArgs;
1050 if (!Info.OrigRet.Ty->isVoidTy())
1051 splitToValueTypes(Info.OrigRet, InArgs, DL, Info.CallConv);
1052
1053 // If we can lower as a tail call, do that instead.
1054 bool CanTailCallOpt =
1055 isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
1056
1057 // We must emit a tail call if we have musttail.
1058 if (Info.IsMustTailCall && !CanTailCallOpt) {
1059 // There are types of incoming/outgoing arguments we can't handle yet, so
1060 // it doesn't make sense to actually die here like in ISelLowering. Instead,
1061 // fall back to SelectionDAG and let it try to handle this.
1062 LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
1063 return false;
1064 }
1065
1066 if (CanTailCallOpt)
1067 return lowerTailCall(MIRBuilder, Info, OutArgs);
1068
1069 // Find out which ABI gets to decide where things go.
1070 CCAssignFn *AssignFnFixed;
1071 CCAssignFn *AssignFnVarArg;
1072 std::tie(AssignFnFixed, AssignFnVarArg) =
1073 getAssignFnsForCC(Info.CallConv, TLI);
1074
1075 MachineInstrBuilder CallSeqStart;
1076 CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
1077
1078 // Create a temporarily-floating call instruction so we can add the implicit
1079 // uses of arg registers.
1080 unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
1081
1082 auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
1083 MIB.add(Info.Callee);
1084
1085 // Tell the call which registers are clobbered.
1086 const uint32_t *Mask;
1087 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1088 const auto *TRI = Subtarget.getRegisterInfo();
1089
1090 AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
1091 Subtarget, /*IsReturn*/ false);
1092 // Do the actual argument marshalling.
1093 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, /*IsReturn*/ false);
1094 if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder,
1095 Info.CallConv, Info.IsVarArg))
1096 return false;
1097
1098 Mask = getMaskForArgs(OutArgs, Info, *TRI, MF);
1099
1100 if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
1101 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
1102 MIB.addRegMask(Mask);
1103
1104 if (TRI->isAnyArgRegReserved(MF))
1105 TRI->emitReservedArgRegCallError(MF);
1106
1107 // Now we can add the actual call instruction to the correct basic block.
1108 MIRBuilder.insertInstr(MIB);
1109
1110 // If Callee is a reg, since it is used by a target specific
1111 // instruction, it must have a register class matching the
1112 // constraint of that instruction.
1113 if (Info.Callee.isReg())
1114 constrainOperandRegClass(MF, *TRI, MRI, *Subtarget.getInstrInfo(),
1115 *Subtarget.getRegBankInfo(), *MIB, MIB->getDesc(),
1116 Info.Callee, 0);
1117
1118 // Finally we can copy the returned value back into its virtual-register. In
1119 // symmetry with the arguments, the physical register must be an
1120 // implicit-define of the call instruction.
1121 if (!Info.OrigRet.Ty->isVoidTy()) {
1122 CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv);
1123 CallReturnHandler Handler(MIRBuilder, MRI, MIB);
1124 bool UsingReturnedArg =
1125 !OutArgs.empty() && OutArgs[0].Flags[0].isReturned();
1126
1127 AArch64OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn, Subtarget,
1128 /*IsReturn*/ false);
1129 ReturnedArgCallReturnHandler ReturnedArgHandler(MIRBuilder, MRI, MIB);
1130 if (!determineAndHandleAssignments(
1131 UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, InArgs,
1132 MIRBuilder, Info.CallConv, Info.IsVarArg,
1133 UsingReturnedArg ? OutArgs[0].Regs[0] : Register()))
1134 return false;
1135 }
1136
1137 if (Info.SwiftErrorVReg) {
1138 MIB.addDef(AArch64::X21, RegState::Implicit);
1139 MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
1140 }
1141
1142 uint64_t CalleePopBytes =
1143 doesCalleeRestoreStack(Info.CallConv,
1144 MF.getTarget().Options.GuaranteedTailCallOpt)
1145 ? alignTo(Assigner.StackOffset, 16)
1146 : 0;
1147
1148 CallSeqStart.addImm(Assigner.StackOffset).addImm(0);
1149 MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
1150 .addImm(Assigner.StackOffset)
1151 .addImm(CalleePopBytes);
1152
1153 return true;
1154 }
1155
isTypeIsValidForThisReturn(EVT Ty) const1156 bool AArch64CallLowering::isTypeIsValidForThisReturn(EVT Ty) const {
1157 return Ty.getSizeInBits() == 64;
1158 }
1159