1 //===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the RegisterBankInfo class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64RegisterBankInfo.h"
15 #include "AArch64InstrInfo.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
19 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
20 #include "llvm/CodeGen/GlobalISel/Utils.h"
21 #include "llvm/CodeGen/LowLevelType.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstr.h"
24 #include "llvm/CodeGen/MachineOperand.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/TargetOpcodes.h"
27 #include "llvm/CodeGen/TargetRegisterInfo.h"
28 #include "llvm/CodeGen/TargetSubtargetInfo.h"
29 #include "llvm/IR/IntrinsicsAArch64.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include <algorithm>
32 #include <cassert>
33 
34 #define GET_TARGET_REGBANK_IMPL
35 #include "AArch64GenRegisterBank.inc"
36 
37 // This file will be TableGen'ed at some point.
38 #include "AArch64GenRegisterBankInfo.def"
39 
40 using namespace llvm;
41 
42 AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
43     : AArch64GenRegisterBankInfo() {
44   static llvm::once_flag InitializeRegisterBankFlag;
45 
46   static auto InitializeRegisterBankOnce = [&]() {
47     // We have only one set of register banks, whatever the subtarget
48     // is. Therefore, the initialization of the RegBanks table should be
49     // done only once. Indeed the table of all register banks
50     // (AArch64::RegBanks) is unique in the compiler. At some point, it
51     // will get tablegen'ed and the whole constructor becomes empty.
52 
53     const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID);
54     (void)RBGPR;
55     assert(&AArch64::GPRRegBank == &RBGPR &&
56            "The order in RegBanks is messed up");
57 
58     const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID);
59     (void)RBFPR;
60     assert(&AArch64::FPRRegBank == &RBFPR &&
61            "The order in RegBanks is messed up");
62 
63     const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID);
64     (void)RBCCR;
65     assert(&AArch64::CCRegBank == &RBCCR &&
66            "The order in RegBanks is messed up");
67 
68     // The GPR register bank is fully defined by all the registers in
69     // GR64all + its subclasses.
70     assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) &&
71            "Subclass not added?");
72     assert(RBGPR.getSize() == 128 && "GPRs should hold up to 128-bit");
73 
74     // The FPR register bank is fully defined by all the registers in
75     // GR64all + its subclasses.
76     assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) &&
77            "Subclass not added?");
78     assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) &&
79            "Subclass not added?");
80     assert(RBFPR.getSize() == 512 &&
81            "FPRs should hold up to 512-bit via QQQQ sequence");
82 
83     assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) &&
84            "Class not added?");
85     assert(RBCCR.getSize() == 32 && "CCR should hold up to 32-bit");
86 
87     // Check that the TableGen'ed like file is in sync we our expectations.
88     // First, the Idx.
89     assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
90                                   {PMI_GPR32, PMI_GPR64, PMI_GPR128}) &&
91            "PartialMappingIdx's are incorrectly ordered");
92     assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR,
93                                   {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128,
94                                    PMI_FPR256, PMI_FPR512}) &&
95            "PartialMappingIdx's are incorrectly ordered");
96 // Now, the content.
97 // Check partial mapping.
98 #define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB)                      \
99   do {                                                                         \
100     assert(                                                                    \
101         checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \
102         #Idx " is incorrectly initialized");                                   \
103   } while (false)
104 
105     CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
106     CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
107     CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR);
108     CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR);
109     CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
110     CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
111     CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
112     CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR);
113     CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR);
114 
115 // Check value mapping.
116 #define CHECK_VALUEMAP_IMPL(RBName, Size, Offset)                              \
117   do {                                                                         \
118     assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size,            \
119                              PartialMappingIdx::PMI_First##RBName, Size,       \
120                              Offset) &&                                        \
121            #RBName #Size " " #Offset " is incorrectly initialized");           \
122   } while (false)
123 
124 #define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
125 
126     CHECK_VALUEMAP(GPR, 32);
127     CHECK_VALUEMAP(GPR, 64);
128     CHECK_VALUEMAP(GPR, 128);
129     CHECK_VALUEMAP(FPR, 16);
130     CHECK_VALUEMAP(FPR, 32);
131     CHECK_VALUEMAP(FPR, 64);
132     CHECK_VALUEMAP(FPR, 128);
133     CHECK_VALUEMAP(FPR, 256);
134     CHECK_VALUEMAP(FPR, 512);
135 
136 // Check the value mapping for 3-operands instructions where all the operands
137 // map to the same value mapping.
138 #define CHECK_VALUEMAP_3OPS(RBName, Size)                                      \
139   do {                                                                         \
140     CHECK_VALUEMAP_IMPL(RBName, Size, 0);                                      \
141     CHECK_VALUEMAP_IMPL(RBName, Size, 1);                                      \
142     CHECK_VALUEMAP_IMPL(RBName, Size, 2);                                      \
143   } while (false)
144 
145     CHECK_VALUEMAP_3OPS(GPR, 32);
146     CHECK_VALUEMAP_3OPS(GPR, 64);
147     CHECK_VALUEMAP_3OPS(GPR, 128);
148     CHECK_VALUEMAP_3OPS(FPR, 32);
149     CHECK_VALUEMAP_3OPS(FPR, 64);
150     CHECK_VALUEMAP_3OPS(FPR, 128);
151     CHECK_VALUEMAP_3OPS(FPR, 256);
152     CHECK_VALUEMAP_3OPS(FPR, 512);
153 
154 #define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size)                 \
155   do {                                                                         \
156     unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min;               \
157     unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min;               \
158     (void)PartialMapDstIdx;                                                    \
159     (void)PartialMapSrcIdx;                                                    \
160     const ValueMapping *Map = getCopyMapping(                                  \
161         AArch64::RBNameDst##RegBankID, AArch64::RBNameSrc##RegBankID, Size);  \
162     (void)Map;                                                                 \
163     assert(Map[0].BreakDown ==                                                 \
164                &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] &&  \
165            Map[0].NumBreakDowns == 1 && #RBNameDst #Size                       \
166            " Dst is incorrectly initialized");                                 \
167     assert(Map[1].BreakDown ==                                                 \
168                &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] &&  \
169            Map[1].NumBreakDowns == 1 && #RBNameSrc #Size                       \
170            " Src is incorrectly initialized");                                 \
171                                                                                \
172   } while (false)
173 
174     CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
175     CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32);
176     CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64);
177     CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64);
178     CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32);
179     CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32);
180     CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64);
181     CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64);
182 
183 #define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize)                                 \
184   do {                                                                         \
185     unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min;                    \
186     unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min;                    \
187     (void)PartialMapDstIdx;                                                    \
188     (void)PartialMapSrcIdx;                                                    \
189     const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize);               \
190     (void)Map;                                                                 \
191     assert(Map[0].BreakDown ==                                                 \
192                &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] &&  \
193            Map[0].NumBreakDowns == 1 && "FPR" #DstSize                         \
194                                         " Dst is incorrectly initialized");    \
195     assert(Map[1].BreakDown ==                                                 \
196                &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] &&  \
197            Map[1].NumBreakDowns == 1 && "FPR" #SrcSize                         \
198                                         " Src is incorrectly initialized");    \
199                                                                                \
200   } while (false)
201 
202     CHECK_VALUEMAP_FPEXT(32, 16);
203     CHECK_VALUEMAP_FPEXT(64, 16);
204     CHECK_VALUEMAP_FPEXT(64, 32);
205     CHECK_VALUEMAP_FPEXT(128, 64);
206 
207     assert(verify(TRI) && "Invalid register bank information");
208   };
209 
210   llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
211 }
212 
213 unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A,
214                                            const RegisterBank &B,
215                                            unsigned Size) const {
216   // What do we do with different size?
217   // copy are same size.
218   // Will introduce other hooks for different size:
219   // * extract cost.
220   // * build_sequence cost.
221 
222   // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV.
223   // FIXME: This should be deduced from the scheduling model.
224   if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank)
225     // FMOVXDr or FMOVWSr.
226     return 5;
227   if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank)
228     // FMOVDXr or FMOVSWr.
229     return 4;
230 
231   return RegisterBankInfo::copyCost(A, B, Size);
232 }
233 
234 const RegisterBank &
235 AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
236                                                 LLT) const {
237   switch (RC.getID()) {
238   case AArch64::FPR8RegClassID:
239   case AArch64::FPR16RegClassID:
240   case AArch64::FPR16_loRegClassID:
241   case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID:
242   case AArch64::FPR32RegClassID:
243   case AArch64::FPR64RegClassID:
244   case AArch64::FPR64_loRegClassID:
245   case AArch64::FPR128RegClassID:
246   case AArch64::FPR128_loRegClassID:
247   case AArch64::DDRegClassID:
248   case AArch64::DDDRegClassID:
249   case AArch64::DDDDRegClassID:
250   case AArch64::QQRegClassID:
251   case AArch64::QQQRegClassID:
252   case AArch64::QQQQRegClassID:
253     return getRegBank(AArch64::FPRRegBankID);
254   case AArch64::GPR32commonRegClassID:
255   case AArch64::GPR32RegClassID:
256   case AArch64::GPR32spRegClassID:
257   case AArch64::GPR32sponlyRegClassID:
258   case AArch64::GPR32argRegClassID:
259   case AArch64::GPR32allRegClassID:
260   case AArch64::GPR64commonRegClassID:
261   case AArch64::GPR64RegClassID:
262   case AArch64::GPR64spRegClassID:
263   case AArch64::GPR64sponlyRegClassID:
264   case AArch64::GPR64argRegClassID:
265   case AArch64::GPR64allRegClassID:
266   case AArch64::GPR64noipRegClassID:
267   case AArch64::GPR64common_and_GPR64noipRegClassID:
268   case AArch64::GPR64noip_and_tcGPR64RegClassID:
269   case AArch64::tcGPR64RegClassID:
270   case AArch64::rtcGPR64RegClassID:
271   case AArch64::WSeqPairsClassRegClassID:
272   case AArch64::XSeqPairsClassRegClassID:
273   case AArch64::MatrixIndexGPR32_12_15RegClassID:
274     return getRegBank(AArch64::GPRRegBankID);
275   case AArch64::CCRRegClassID:
276     return getRegBank(AArch64::CCRegBankID);
277   default:
278     llvm_unreachable("Register class not supported");
279   }
280 }
281 
282 RegisterBankInfo::InstructionMappings
283 AArch64RegisterBankInfo::getInstrAlternativeMappings(
284     const MachineInstr &MI) const {
285   const MachineFunction &MF = *MI.getParent()->getParent();
286   const TargetSubtargetInfo &STI = MF.getSubtarget();
287   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
288   const MachineRegisterInfo &MRI = MF.getRegInfo();
289 
290   switch (MI.getOpcode()) {
291   case TargetOpcode::G_OR: {
292     // 32 and 64-bit or can be mapped on either FPR or
293     // GPR for the same cost.
294     unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
295     if (Size != 32 && Size != 64)
296       break;
297 
298     // If the instruction has any implicit-defs or uses,
299     // do not mess with it.
300     if (MI.getNumOperands() != 3)
301       break;
302     InstructionMappings AltMappings;
303     const InstructionMapping &GPRMapping = getInstructionMapping(
304         /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size),
305         /*NumOperands*/ 3);
306     const InstructionMapping &FPRMapping = getInstructionMapping(
307         /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size),
308         /*NumOperands*/ 3);
309 
310     AltMappings.push_back(&GPRMapping);
311     AltMappings.push_back(&FPRMapping);
312     return AltMappings;
313   }
314   case TargetOpcode::G_BITCAST: {
315     unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
316     if (Size != 32 && Size != 64)
317       break;
318 
319     // If the instruction has any implicit-defs or uses,
320     // do not mess with it.
321     if (MI.getNumOperands() != 2)
322       break;
323 
324     InstructionMappings AltMappings;
325     const InstructionMapping &GPRMapping = getInstructionMapping(
326         /*ID*/ 1, /*Cost*/ 1,
327         getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size),
328         /*NumOperands*/ 2);
329     const InstructionMapping &FPRMapping = getInstructionMapping(
330         /*ID*/ 2, /*Cost*/ 1,
331         getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size),
332         /*NumOperands*/ 2);
333     const InstructionMapping &GPRToFPRMapping = getInstructionMapping(
334         /*ID*/ 3,
335         /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
336         getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size),
337         /*NumOperands*/ 2);
338     const InstructionMapping &FPRToGPRMapping = getInstructionMapping(
339         /*ID*/ 3,
340         /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
341         getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size),
342         /*NumOperands*/ 2);
343 
344     AltMappings.push_back(&GPRMapping);
345     AltMappings.push_back(&FPRMapping);
346     AltMappings.push_back(&GPRToFPRMapping);
347     AltMappings.push_back(&FPRToGPRMapping);
348     return AltMappings;
349   }
350   case TargetOpcode::G_LOAD: {
351     unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
352     if (Size != 64)
353       break;
354 
355     // If the instruction has any implicit-defs or uses,
356     // do not mess with it.
357     if (MI.getNumOperands() != 2)
358       break;
359 
360     InstructionMappings AltMappings;
361     const InstructionMapping &GPRMapping = getInstructionMapping(
362         /*ID*/ 1, /*Cost*/ 1,
363         getOperandsMapping({getValueMapping(PMI_FirstGPR, Size),
364                             // Addresses are GPR 64-bit.
365                             getValueMapping(PMI_FirstGPR, 64)}),
366         /*NumOperands*/ 2);
367     const InstructionMapping &FPRMapping = getInstructionMapping(
368         /*ID*/ 2, /*Cost*/ 1,
369         getOperandsMapping({getValueMapping(PMI_FirstFPR, Size),
370                             // Addresses are GPR 64-bit.
371                             getValueMapping(PMI_FirstGPR, 64)}),
372         /*NumOperands*/ 2);
373 
374     AltMappings.push_back(&GPRMapping);
375     AltMappings.push_back(&FPRMapping);
376     return AltMappings;
377   }
378   default:
379     break;
380   }
381   return RegisterBankInfo::getInstrAlternativeMappings(MI);
382 }
383 
384 void AArch64RegisterBankInfo::applyMappingImpl(
385     const OperandsMapper &OpdMapper) const {
386   switch (OpdMapper.getMI().getOpcode()) {
387   case TargetOpcode::G_OR:
388   case TargetOpcode::G_BITCAST:
389   case TargetOpcode::G_LOAD:
390     // Those ID must match getInstrAlternativeMappings.
391     assert((OpdMapper.getInstrMapping().getID() >= 1 &&
392             OpdMapper.getInstrMapping().getID() <= 4) &&
393            "Don't know how to handle that ID");
394     return applyDefaultMapping(OpdMapper);
395   default:
396     llvm_unreachable("Don't know how to handle that operation");
397   }
398 }
399 
400 /// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
401 /// having only floating-point operands.
402 static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
403   switch (Opc) {
404   case TargetOpcode::G_FADD:
405   case TargetOpcode::G_FSUB:
406   case TargetOpcode::G_FMUL:
407   case TargetOpcode::G_FMA:
408   case TargetOpcode::G_FDIV:
409   case TargetOpcode::G_FCONSTANT:
410   case TargetOpcode::G_FPEXT:
411   case TargetOpcode::G_FPTRUNC:
412   case TargetOpcode::G_FCEIL:
413   case TargetOpcode::G_FFLOOR:
414   case TargetOpcode::G_FNEARBYINT:
415   case TargetOpcode::G_FNEG:
416   case TargetOpcode::G_FCOS:
417   case TargetOpcode::G_FSIN:
418   case TargetOpcode::G_FLOG10:
419   case TargetOpcode::G_FLOG:
420   case TargetOpcode::G_FLOG2:
421   case TargetOpcode::G_FSQRT:
422   case TargetOpcode::G_FABS:
423   case TargetOpcode::G_FEXP:
424   case TargetOpcode::G_FRINT:
425   case TargetOpcode::G_INTRINSIC_TRUNC:
426   case TargetOpcode::G_INTRINSIC_ROUND:
427     return true;
428   }
429   return false;
430 }
431 
432 const RegisterBankInfo::InstructionMapping &
433 AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
434     const MachineInstr &MI) const {
435   const unsigned Opc = MI.getOpcode();
436   const MachineFunction &MF = *MI.getParent()->getParent();
437   const MachineRegisterInfo &MRI = MF.getRegInfo();
438 
439   unsigned NumOperands = MI.getNumOperands();
440   assert(NumOperands <= 3 &&
441          "This code is for instructions with 3 or less operands");
442 
443   LLT Ty = MRI.getType(MI.getOperand(0).getReg());
444   unsigned Size = Ty.getSizeInBits();
445   bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
446 
447   PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR;
448 
449 #ifndef NDEBUG
450   // Make sure all the operands are using similar size and type.
451   // Should probably be checked by the machine verifier.
452   // This code won't catch cases where the number of lanes is
453   // different between the operands.
454   // If we want to go to that level of details, it is probably
455   // best to check that the types are the same, period.
456   // Currently, we just check that the register banks are the same
457   // for each types.
458   for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
459     LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
460     assert(
461         AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(
462             RBIdx, OpTy.getSizeInBits()) ==
463             AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) &&
464         "Operand has incompatible size");
465     bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
466     (void)OpIsFPR;
467     assert(IsFPR == OpIsFPR && "Operand has incompatible type");
468   }
469 #endif // End NDEBUG.
470 
471   return getInstructionMapping(DefaultMappingID, 1,
472                                getValueMapping(RBIdx, Size), NumOperands);
473 }
474 
475 /// \returns true if a given intrinsic \p ID only uses and defines FPRs.
476 static bool isFPIntrinsic(unsigned ID) {
477   // TODO: Add more intrinsics.
478   switch (ID) {
479   default:
480     return false;
481   case Intrinsic::aarch64_neon_uaddlv:
482     return true;
483   }
484 }
485 
486 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
487                                                const MachineRegisterInfo &MRI,
488                                                const TargetRegisterInfo &TRI,
489                                                unsigned Depth) const {
490   unsigned Op = MI.getOpcode();
491   if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MI.getIntrinsicID()))
492     return true;
493 
494   // Do we have an explicit floating point instruction?
495   if (isPreISelGenericFloatingPointOpcode(Op))
496     return true;
497 
498   // No. Check if we have a copy-like instruction. If we do, then we could
499   // still be fed by floating point instructions.
500   if (Op != TargetOpcode::COPY && !MI.isPHI() &&
501       !isPreISelGenericOptimizationHint(Op))
502     return false;
503 
504   // Check if we already know the register bank.
505   auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
506   if (RB == &AArch64::FPRRegBank)
507     return true;
508   if (RB == &AArch64::GPRRegBank)
509     return false;
510 
511   // We don't know anything.
512   //
513   // If we have a phi, we may be able to infer that it will be assigned a FPR
514   // based off of its inputs.
515   if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
516     return false;
517 
518   return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
519     return Op.isReg() &&
520            onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
521   });
522 }
523 
524 bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
525                                          const MachineRegisterInfo &MRI,
526                                          const TargetRegisterInfo &TRI,
527                                          unsigned Depth) const {
528   switch (MI.getOpcode()) {
529   case TargetOpcode::G_FPTOSI:
530   case TargetOpcode::G_FPTOUI:
531   case TargetOpcode::G_FCMP:
532     return true;
533   default:
534     break;
535   }
536   return hasFPConstraints(MI, MRI, TRI, Depth);
537 }
538 
539 bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
540                                             const MachineRegisterInfo &MRI,
541                                             const TargetRegisterInfo &TRI,
542                                             unsigned Depth) const {
543   switch (MI.getOpcode()) {
544   case AArch64::G_DUP:
545   case TargetOpcode::G_SITOFP:
546   case TargetOpcode::G_UITOFP:
547   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
548   case TargetOpcode::G_INSERT_VECTOR_ELT:
549   case TargetOpcode::G_BUILD_VECTOR:
550   case TargetOpcode::G_BUILD_VECTOR_TRUNC:
551     return true;
552   default:
553     break;
554   }
555   return hasFPConstraints(MI, MRI, TRI, Depth);
556 }
557 
558 const RegisterBankInfo::InstructionMapping &
559 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
560   const unsigned Opc = MI.getOpcode();
561 
562   // Try the default logic for non-generic instructions that are either copies
563   // or already have some operands assigned to banks.
564   if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) ||
565       Opc == TargetOpcode::G_PHI) {
566     const RegisterBankInfo::InstructionMapping &Mapping =
567         getInstrMappingImpl(MI);
568     if (Mapping.isValid())
569       return Mapping;
570   }
571 
572   const MachineFunction &MF = *MI.getParent()->getParent();
573   const MachineRegisterInfo &MRI = MF.getRegInfo();
574   const TargetSubtargetInfo &STI = MF.getSubtarget();
575   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
576 
577   switch (Opc) {
578     // G_{F|S|U}REM are not listed because they are not legal.
579     // Arithmetic ops.
580   case TargetOpcode::G_ADD:
581   case TargetOpcode::G_SUB:
582   case TargetOpcode::G_PTR_ADD:
583   case TargetOpcode::G_MUL:
584   case TargetOpcode::G_SDIV:
585   case TargetOpcode::G_UDIV:
586     // Bitwise ops.
587   case TargetOpcode::G_AND:
588   case TargetOpcode::G_OR:
589   case TargetOpcode::G_XOR:
590     // Floating point ops.
591   case TargetOpcode::G_FADD:
592   case TargetOpcode::G_FSUB:
593   case TargetOpcode::G_FMUL:
594   case TargetOpcode::G_FDIV:
595     return getSameKindOfOperandsMapping(MI);
596   case TargetOpcode::G_FPEXT: {
597     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
598     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
599     return getInstructionMapping(
600         DefaultMappingID, /*Cost*/ 1,
601         getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()),
602         /*NumOperands*/ 2);
603   }
604     // Shifts.
605   case TargetOpcode::G_SHL:
606   case TargetOpcode::G_LSHR:
607   case TargetOpcode::G_ASHR: {
608     LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg());
609     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
610     if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32)
611       return getInstructionMapping(DefaultMappingID, 1,
612                                    &ValMappings[Shift64Imm], 3);
613     return getSameKindOfOperandsMapping(MI);
614   }
615   case TargetOpcode::COPY: {
616     Register DstReg = MI.getOperand(0).getReg();
617     Register SrcReg = MI.getOperand(1).getReg();
618     // Check if one of the register is not a generic register.
619     if ((Register::isPhysicalRegister(DstReg) ||
620          !MRI.getType(DstReg).isValid()) ||
621         (Register::isPhysicalRegister(SrcReg) ||
622          !MRI.getType(SrcReg).isValid())) {
623       const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI);
624       const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI);
625       if (!DstRB)
626         DstRB = SrcRB;
627       else if (!SrcRB)
628         SrcRB = DstRB;
629       // If both RB are null that means both registers are generic.
630       // We shouldn't be here.
631       assert(DstRB && SrcRB && "Both RegBank were nullptr");
632       unsigned Size = getSizeInBits(DstReg, MRI, TRI);
633       return getInstructionMapping(
634           DefaultMappingID, copyCost(*DstRB, *SrcRB, Size),
635           getCopyMapping(DstRB->getID(), SrcRB->getID(), Size),
636           // We only care about the mapping of the destination.
637           /*NumOperands*/ 1);
638     }
639     // Both registers are generic, use G_BITCAST.
640     LLVM_FALLTHROUGH;
641   }
642   case TargetOpcode::G_BITCAST: {
643     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
644     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
645     unsigned Size = DstTy.getSizeInBits();
646     bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64;
647     bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64;
648     const RegisterBank &DstRB =
649         DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
650     const RegisterBank &SrcRB =
651         SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
652     return getInstructionMapping(
653         DefaultMappingID, copyCost(DstRB, SrcRB, Size),
654         getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
655         // We only care about the mapping of the destination for COPY.
656         /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1);
657   }
658   default:
659     break;
660   }
661 
662   unsigned NumOperands = MI.getNumOperands();
663 
664   // Track the size and bank of each register.  We don't do partial mappings.
665   SmallVector<unsigned, 4> OpSize(NumOperands);
666   SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
667   for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
668     auto &MO = MI.getOperand(Idx);
669     if (!MO.isReg() || !MO.getReg())
670       continue;
671 
672     LLT Ty = MRI.getType(MO.getReg());
673     OpSize[Idx] = Ty.getSizeInBits();
674 
675     // As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs.
676     // For floating-point instructions, scalars go in FPRs.
677     if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc) ||
678         Ty.getSizeInBits() > 64)
679       OpRegBankIdx[Idx] = PMI_FirstFPR;
680     else
681       OpRegBankIdx[Idx] = PMI_FirstGPR;
682   }
683 
684   unsigned Cost = 1;
685   // Some of the floating-point instructions have mixed GPR and FPR operands:
686   // fine-tune the computed mapping.
687   switch (Opc) {
688   case AArch64::G_DUP: {
689     Register ScalarReg = MI.getOperand(1).getReg();
690     LLT ScalarTy = MRI.getType(ScalarReg);
691     auto ScalarDef = MRI.getVRegDef(ScalarReg);
692     // s8 is an exception for G_DUP, which we always want on gpr.
693     if (ScalarTy.getSizeInBits() != 8 &&
694         (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
695          onlyDefinesFP(*ScalarDef, MRI, TRI)))
696       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
697     else
698       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
699     break;
700   }
701   case TargetOpcode::G_TRUNC: {
702     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
703     if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128)
704       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
705     break;
706   }
707   case TargetOpcode::G_SITOFP:
708   case TargetOpcode::G_UITOFP: {
709     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
710       break;
711     // Integer to FP conversions don't necessarily happen between GPR -> FPR
712     // regbanks. They can also be done within an FPR register.
713     Register SrcReg = MI.getOperand(1).getReg();
714     if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
715       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
716     else
717       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
718     break;
719   }
720   case TargetOpcode::G_FPTOSI:
721   case TargetOpcode::G_FPTOUI:
722     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
723       break;
724     OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
725     break;
726   case TargetOpcode::G_FCMP: {
727     // If the result is a vector, it must use a FPR.
728     AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 =
729         MRI.getType(MI.getOperand(0).getReg()).isVector() ? PMI_FirstFPR
730                                                           : PMI_FirstGPR;
731     OpRegBankIdx = {Idx0,
732                     /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR};
733     break;
734   }
735   case TargetOpcode::G_BITCAST:
736     // This is going to be a cross register bank copy and this is expensive.
737     if (OpRegBankIdx[0] != OpRegBankIdx[1])
738       Cost = copyCost(
739           *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank,
740           *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
741           OpSize[0]);
742     break;
743   case TargetOpcode::G_LOAD:
744     // Loading in vector unit is slightly more expensive.
745     // This is actually only true for the LD1R and co instructions,
746     // but anyway for the fast mode this number does not matter and
747     // for the greedy mode the cost of the cross bank copy will
748     // offset this number.
749     // FIXME: Should be derived from the scheduling model.
750     if (OpRegBankIdx[0] != PMI_FirstGPR)
751       Cost = 2;
752     else
753       // Check if that load feeds fp instructions.
754       // In that case, we want the default mapping to be on FPR
755       // instead of blind map every scalar to GPR.
756       for (const MachineInstr &UseMI :
757            MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
758         // If we have at least one direct use in a FP instruction,
759         // assume this was a floating point load in the IR.
760         // If it was not, we would have had a bitcast before
761         // reaching that instruction.
762         // Int->FP conversion operations are also captured in onlyDefinesFP().
763         if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) {
764           OpRegBankIdx[0] = PMI_FirstFPR;
765           break;
766         }
767       }
768     break;
769   case TargetOpcode::G_STORE:
770     // Check if that store is fed by fp instructions.
771     if (OpRegBankIdx[0] == PMI_FirstGPR) {
772       Register VReg = MI.getOperand(0).getReg();
773       if (!VReg)
774         break;
775       MachineInstr *DefMI = MRI.getVRegDef(VReg);
776       if (onlyDefinesFP(*DefMI, MRI, TRI))
777         OpRegBankIdx[0] = PMI_FirstFPR;
778       break;
779     }
780     break;
781   case TargetOpcode::G_SELECT: {
782     // If the destination is FPR, preserve that.
783     if (OpRegBankIdx[0] != PMI_FirstGPR)
784       break;
785 
786     // If we're taking in vectors, we have no choice but to put everything on
787     // FPRs, except for the condition. The condition must always be on a GPR.
788     LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
789     if (SrcTy.isVector()) {
790       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
791       break;
792     }
793 
794     // Try to minimize the number of copies. If we have more floating point
795     // constrained values than not, then we'll put everything on FPR. Otherwise,
796     // everything has to be on GPR.
797     unsigned NumFP = 0;
798 
799     // Check if the uses of the result always produce floating point values.
800     //
801     // For example:
802     //
803     // %z = G_SELECT %cond %x %y
804     // fpr = G_FOO %z ...
805     if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
806                [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
807       ++NumFP;
808 
809     // Check if the defs of the source values always produce floating point
810     // values.
811     //
812     // For example:
813     //
814     // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
815     // %z = G_SELECT %cond %x %y
816     //
817     // Also check whether or not the sources have already been decided to be
818     // FPR. Keep track of this.
819     //
820     // This doesn't check the condition, since it's just whatever is in NZCV.
821     // This isn't passed explicitly in a register to fcsel/csel.
822     for (unsigned Idx = 2; Idx < 4; ++Idx) {
823       Register VReg = MI.getOperand(Idx).getReg();
824       MachineInstr *DefMI = MRI.getVRegDef(VReg);
825       if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
826           onlyDefinesFP(*DefMI, MRI, TRI))
827         ++NumFP;
828     }
829 
830     // If we have more FP constraints than not, then move everything over to
831     // FPR.
832     if (NumFP >= 2)
833       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
834 
835     break;
836   }
837   case TargetOpcode::G_UNMERGE_VALUES: {
838     // If the first operand belongs to a FPR register bank, then make sure that
839     // we preserve that.
840     if (OpRegBankIdx[0] != PMI_FirstGPR)
841       break;
842 
843     LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
844     // UNMERGE into scalars from a vector should always use FPR.
845     // Likewise if any of the uses are FP instructions.
846     if (SrcTy.isVector() || SrcTy == LLT::scalar(128) ||
847         any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
848                [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
849       // Set the register bank of every operand to FPR.
850       for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
851            Idx < NumOperands; ++Idx)
852         OpRegBankIdx[Idx] = PMI_FirstFPR;
853     }
854     break;
855   }
856   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
857     // Destination and source need to be FPRs.
858     OpRegBankIdx[0] = PMI_FirstFPR;
859     OpRegBankIdx[1] = PMI_FirstFPR;
860 
861     // Index needs to be a GPR.
862     OpRegBankIdx[2] = PMI_FirstGPR;
863     break;
864   case TargetOpcode::G_INSERT_VECTOR_ELT:
865     OpRegBankIdx[0] = PMI_FirstFPR;
866     OpRegBankIdx[1] = PMI_FirstFPR;
867 
868     // The element may be either a GPR or FPR. Preserve that behaviour.
869     if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank)
870       OpRegBankIdx[2] = PMI_FirstFPR;
871     else
872       OpRegBankIdx[2] = PMI_FirstGPR;
873 
874     // Index needs to be a GPR.
875     OpRegBankIdx[3] = PMI_FirstGPR;
876     break;
877   case TargetOpcode::G_EXTRACT: {
878     // For s128 sources we have to use fpr unless we know otherwise.
879     auto Src = MI.getOperand(1).getReg();
880     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
881     if (SrcTy.getSizeInBits() != 128)
882       break;
883     auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass
884                    ? PMI_FirstGPR
885                    : PMI_FirstFPR;
886     OpRegBankIdx[0] = Idx;
887     OpRegBankIdx[1] = Idx;
888     break;
889   }
890   case TargetOpcode::G_BUILD_VECTOR: {
891     // If the first source operand belongs to a FPR register bank, then make
892     // sure that we preserve that.
893     if (OpRegBankIdx[1] != PMI_FirstGPR)
894       break;
895     Register VReg = MI.getOperand(1).getReg();
896     if (!VReg)
897       break;
898 
899     // Get the instruction that defined the source operand reg, and check if
900     // it's a floating point operation. Or, if it's a type like s16 which
901     // doesn't have a exact size gpr register class. The exception is if the
902     // build_vector has all constant operands, which may be better to leave as
903     // gpr without copies, so it can be matched in imported patterns.
904     MachineInstr *DefMI = MRI.getVRegDef(VReg);
905     unsigned DefOpc = DefMI->getOpcode();
906     const LLT SrcTy = MRI.getType(VReg);
907     if (all_of(MI.operands(), [&](const MachineOperand &Op) {
908           return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() ==
909                                    TargetOpcode::G_CONSTANT;
910         }))
911       break;
912     if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
913         SrcTy.getSizeInBits() < 32 ||
914         getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) {
915       // Have a floating point op.
916       // Make sure every operand gets mapped to a FPR register class.
917       unsigned NumOperands = MI.getNumOperands();
918       for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
919         OpRegBankIdx[Idx] = PMI_FirstFPR;
920     }
921     break;
922   }
923   case TargetOpcode::G_VECREDUCE_FADD:
924   case TargetOpcode::G_VECREDUCE_FMUL:
925   case TargetOpcode::G_VECREDUCE_FMAX:
926   case TargetOpcode::G_VECREDUCE_FMIN:
927   case TargetOpcode::G_VECREDUCE_ADD:
928   case TargetOpcode::G_VECREDUCE_MUL:
929   case TargetOpcode::G_VECREDUCE_AND:
930   case TargetOpcode::G_VECREDUCE_OR:
931   case TargetOpcode::G_VECREDUCE_XOR:
932   case TargetOpcode::G_VECREDUCE_SMAX:
933   case TargetOpcode::G_VECREDUCE_SMIN:
934   case TargetOpcode::G_VECREDUCE_UMAX:
935   case TargetOpcode::G_VECREDUCE_UMIN:
936     // Reductions produce a scalar value from a vector, the scalar should be on
937     // FPR bank.
938     OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
939     break;
940   case TargetOpcode::G_VECREDUCE_SEQ_FADD:
941   case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
942     // These reductions also take a scalar accumulator input.
943     // Assign them FPR for now.
944     OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
945     break;
946   case TargetOpcode::G_INTRINSIC: {
947     // Check if we know that the intrinsic has any constraints on its register
948     // banks. If it does, then update the mapping accordingly.
949     unsigned ID = MI.getIntrinsicID();
950     unsigned Idx = 0;
951     if (!isFPIntrinsic(ID))
952       break;
953     for (const auto &Op : MI.explicit_operands()) {
954       if (Op.isReg())
955         OpRegBankIdx[Idx] = PMI_FirstFPR;
956       ++Idx;
957     }
958     break;
959   }
960   }
961 
962   // Finally construct the computed mapping.
963   SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
964   for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
965     if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) {
966       auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
967       if (!Mapping->isValid())
968         return getInvalidInstructionMapping();
969 
970       OpdsMapping[Idx] = Mapping;
971     }
972   }
973 
974   return getInstructionMapping(DefaultMappingID, Cost,
975                                getOperandsMapping(OpdsMapping), NumOperands);
976 }
977