1 //=== AArch64CallingConvention.cpp - AArch64 CC impl ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the table-generated and custom routines for the AArch64
10 // Calling Convention.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64CallingConvention.h"
15 #include "AArch64.h"
16 #include "AArch64InstrInfo.h"
17 #include "AArch64Subtarget.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/TargetInstrInfo.h"
20 #include "llvm/IR/CallingConv.h"
21 using namespace llvm;
22 
23 static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
24                                      AArch64::X3, AArch64::X4, AArch64::X5,
25                                      AArch64::X6, AArch64::X7};
26 static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2,
27                                      AArch64::H3, AArch64::H4, AArch64::H5,
28                                      AArch64::H6, AArch64::H7};
29 static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
30                                      AArch64::S3, AArch64::S4, AArch64::S5,
31                                      AArch64::S6, AArch64::S7};
32 static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
33                                      AArch64::D3, AArch64::D4, AArch64::D5,
34                                      AArch64::D6, AArch64::D7};
35 static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
36                                      AArch64::Q3, AArch64::Q4, AArch64::Q5,
37                                      AArch64::Q6, AArch64::Q7};
38 static const MCPhysReg ZRegList[] = {AArch64::Z0, AArch64::Z1, AArch64::Z2,
39                                      AArch64::Z3, AArch64::Z4, AArch64::Z5,
40                                      AArch64::Z6, AArch64::Z7};
41 
42 static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
43                              MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
44                              CCState &State, Align SlotAlign) {
45   if (LocVT.isScalableVector()) {
46     const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
47         State.getMachineFunction().getSubtarget());
48     const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
49 
50     // We are about to reinvoke the CCAssignFn auto-generated handler. If we
51     // don't unset these flags we will get stuck in an infinite loop forever
52     // invoking the custom handler.
53     ArgFlags.setInConsecutiveRegs(false);
54     ArgFlags.setInConsecutiveRegsLast(false);
55 
56     // The calling convention for passing SVE tuples states that in the event
57     // we cannot allocate enough registers for the tuple we should still leave
58     // any remaining registers unallocated. However, when we call the
59     // CCAssignFn again we want it to behave as if all remaining registers are
60     // allocated. This will force the code to pass the tuple indirectly in
61     // accordance with the PCS.
62     bool RegsAllocated[8];
63     for (int I = 0; I < 8; I++) {
64       RegsAllocated[I] = State.isAllocated(ZRegList[I]);
65       State.AllocateReg(ZRegList[I]);
66     }
67 
68     auto &It = PendingMembers[0];
69     CCAssignFn *AssignFn =
70         TLI->CCAssignFnForCall(State.getCallingConv(), /*IsVarArg=*/false);
71     if (AssignFn(It.getValNo(), It.getValVT(), It.getValVT(), CCValAssign::Full,
72                  ArgFlags, State))
73       llvm_unreachable("Call operand has unhandled type");
74 
75     // Return the flags to how they were before.
76     ArgFlags.setInConsecutiveRegs(true);
77     ArgFlags.setInConsecutiveRegsLast(true);
78 
79     // Return the register state back to how it was before, leaving any
80     // unallocated registers available for other smaller types.
81     for (int I = 0; I < 8; I++)
82       if (!RegsAllocated[I])
83         State.DeallocateReg(ZRegList[I]);
84 
85     // All pending members have now been allocated
86     PendingMembers.clear();
87     return true;
88   }
89 
90   unsigned Size = LocVT.getSizeInBits() / 8;
91   const Align StackAlign =
92       State.getMachineFunction().getDataLayout().getStackAlignment();
93   const Align OrigAlign = ArgFlags.getNonZeroOrigAlign();
94   const Align Alignment = std::min(OrigAlign, StackAlign);
95 
96   for (auto &It : PendingMembers) {
97     It.convertToMem(State.AllocateStack(Size, std::max(Alignment, SlotAlign)));
98     State.addLoc(It);
99     SlotAlign = Align(1);
100   }
101 
102   // All pending members have now been allocated
103   PendingMembers.clear();
104   return true;
105 }
106 
107 /// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An
108 /// [N x Ty] type must still be contiguous in memory though.
109 static bool CC_AArch64_Custom_Stack_Block(
110       unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
111       ISD::ArgFlagsTy &ArgFlags, CCState &State) {
112   SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
113 
114   // Add the argument to the list to be allocated once we know the size of the
115   // block.
116   PendingMembers.push_back(
117       CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
118 
119   if (!ArgFlags.isInConsecutiveRegsLast())
120     return true;
121 
122   return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, Align(8));
123 }
124 
125 /// Given an [N x Ty] block, it should be passed in a consecutive sequence of
126 /// registers. If no such sequence is available, mark the rest of the registers
127 /// of that type as used and place the argument on the stack.
128 static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
129                                     CCValAssign::LocInfo &LocInfo,
130                                     ISD::ArgFlagsTy &ArgFlags, CCState &State) {
131   const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
132       State.getMachineFunction().getSubtarget());
133   bool IsDarwinILP32 = Subtarget.isTargetILP32() && Subtarget.isTargetMachO();
134 
135   // Try to allocate a contiguous block of registers, each of the correct
136   // size to hold one member.
137   ArrayRef<MCPhysReg> RegList;
138   if (LocVT.SimpleTy == MVT::i64 || (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32))
139     RegList = XRegList;
140   else if (LocVT.SimpleTy == MVT::f16)
141     RegList = HRegList;
142   else if (LocVT.SimpleTy == MVT::f32 || LocVT.is32BitVector())
143     RegList = SRegList;
144   else if (LocVT.SimpleTy == MVT::f64 || LocVT.is64BitVector())
145     RegList = DRegList;
146   else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector())
147     RegList = QRegList;
148   else if (LocVT.isScalableVector())
149     RegList = ZRegList;
150   else {
151     // Not an array we want to split up after all.
152     return false;
153   }
154 
155   SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
156 
157   // Add the argument to the list to be allocated once we know the size of the
158   // block.
159   PendingMembers.push_back(
160       CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
161 
162   if (!ArgFlags.isInConsecutiveRegsLast())
163     return true;
164 
165   // [N x i32] arguments get packed into x-registers on Darwin's arm64_32
166   // because that's how the armv7k Clang front-end emits small structs.
167   unsigned EltsPerReg = (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32) ? 2 : 1;
168   unsigned RegResult = State.AllocateRegBlock(
169       RegList, alignTo(PendingMembers.size(), EltsPerReg) / EltsPerReg);
170   if (RegResult && EltsPerReg == 1) {
171     for (auto &It : PendingMembers) {
172       It.convertToReg(RegResult);
173       State.addLoc(It);
174       ++RegResult;
175     }
176     PendingMembers.clear();
177     return true;
178   } else if (RegResult) {
179     assert(EltsPerReg == 2 && "unexpected ABI");
180     bool UseHigh = false;
181     CCValAssign::LocInfo Info;
182     for (auto &It : PendingMembers) {
183       Info = UseHigh ? CCValAssign::AExtUpper : CCValAssign::ZExt;
184       State.addLoc(CCValAssign::getReg(It.getValNo(), MVT::i32, RegResult,
185                                        MVT::i64, Info));
186       UseHigh = !UseHigh;
187       if (!UseHigh)
188         ++RegResult;
189     }
190     PendingMembers.clear();
191     return true;
192   }
193 
194   if (!LocVT.isScalableVector()) {
195     // Mark all regs in the class as unavailable
196     for (auto Reg : RegList)
197       State.AllocateReg(Reg);
198   }
199 
200   const Align SlotAlign = Subtarget.isTargetDarwin() ? Align(1) : Align(8);
201 
202   return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
203 }
204 
205 // TableGen provides definitions of the calling convention analysis entry
206 // points.
207 #include "AArch64GenCallingConv.inc"
208