1 //===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the RegisterBankInfo class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64RegisterBankInfo.h"
15 #include "AArch64InstrInfo.h"
16 #include "AArch64RegisterInfo.h"
17 #include "MCTargetDesc/AArch64MCTargetDesc.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
21 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
22 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
23 #include "llvm/CodeGen/GlobalISel/Utils.h"
24 #include "llvm/CodeGen/LowLevelType.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstr.h"
27 #include "llvm/CodeGen/MachineOperand.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/TargetOpcodes.h"
30 #include "llvm/CodeGen/TargetRegisterInfo.h"
31 #include "llvm/CodeGen/TargetSubtargetInfo.h"
32 #include "llvm/IR/IntrinsicsAArch64.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include <algorithm>
35 #include <cassert>
36 
37 #define GET_TARGET_REGBANK_IMPL
38 #include "AArch64GenRegisterBank.inc"
39 
40 // This file will be TableGen'ed at some point.
41 #include "AArch64GenRegisterBankInfo.def"
42 
43 using namespace llvm;
44 
45 AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
46     : AArch64GenRegisterBankInfo() {
47   static llvm::once_flag InitializeRegisterBankFlag;
48 
49   static auto InitializeRegisterBankOnce = [&]() {
50     // We have only one set of register banks, whatever the subtarget
51     // is. Therefore, the initialization of the RegBanks table should be
52     // done only once. Indeed the table of all register banks
53     // (AArch64::RegBanks) is unique in the compiler. At some point, it
54     // will get tablegen'ed and the whole constructor becomes empty.
55 
56     const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID);
57     (void)RBGPR;
58     assert(&AArch64::GPRRegBank == &RBGPR &&
59            "The order in RegBanks is messed up");
60 
61     const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID);
62     (void)RBFPR;
63     assert(&AArch64::FPRRegBank == &RBFPR &&
64            "The order in RegBanks is messed up");
65 
66     const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID);
67     (void)RBCCR;
68     assert(&AArch64::CCRegBank == &RBCCR &&
69            "The order in RegBanks is messed up");
70 
71     // The GPR register bank is fully defined by all the registers in
72     // GR64all + its subclasses.
73     assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) &&
74            "Subclass not added?");
75     assert(RBGPR.getSize() == 128 && "GPRs should hold up to 128-bit");
76 
77     // The FPR register bank is fully defined by all the registers in
78     // GR64all + its subclasses.
79     assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) &&
80            "Subclass not added?");
81     assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) &&
82            "Subclass not added?");
83     assert(RBFPR.getSize() == 512 &&
84            "FPRs should hold up to 512-bit via QQQQ sequence");
85 
86     assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) &&
87            "Class not added?");
88     assert(RBCCR.getSize() == 32 && "CCR should hold up to 32-bit");
89 
90     // Check that the TableGen'ed like file is in sync we our expectations.
91     // First, the Idx.
92     assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
93                                   {PMI_GPR32, PMI_GPR64, PMI_GPR128}) &&
94            "PartialMappingIdx's are incorrectly ordered");
95     assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR,
96                                   {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128,
97                                    PMI_FPR256, PMI_FPR512}) &&
98            "PartialMappingIdx's are incorrectly ordered");
99 // Now, the content.
100 // Check partial mapping.
101 #define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB)                      \
102   do {                                                                         \
103     assert(                                                                    \
104         checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \
105         #Idx " is incorrectly initialized");                                   \
106   } while (false)
107 
108     CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
109     CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
110     CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR);
111     CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR);
112     CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
113     CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
114     CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
115     CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR);
116     CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR);
117 
118 // Check value mapping.
119 #define CHECK_VALUEMAP_IMPL(RBName, Size, Offset)                              \
120   do {                                                                         \
121     assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size,            \
122                              PartialMappingIdx::PMI_First##RBName, Size,       \
123                              Offset) &&                                        \
124            #RBName #Size " " #Offset " is incorrectly initialized");           \
125   } while (false)
126 
127 #define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
128 
129     CHECK_VALUEMAP(GPR, 32);
130     CHECK_VALUEMAP(GPR, 64);
131     CHECK_VALUEMAP(GPR, 128);
132     CHECK_VALUEMAP(FPR, 16);
133     CHECK_VALUEMAP(FPR, 32);
134     CHECK_VALUEMAP(FPR, 64);
135     CHECK_VALUEMAP(FPR, 128);
136     CHECK_VALUEMAP(FPR, 256);
137     CHECK_VALUEMAP(FPR, 512);
138 
139 // Check the value mapping for 3-operands instructions where all the operands
140 // map to the same value mapping.
141 #define CHECK_VALUEMAP_3OPS(RBName, Size)                                      \
142   do {                                                                         \
143     CHECK_VALUEMAP_IMPL(RBName, Size, 0);                                      \
144     CHECK_VALUEMAP_IMPL(RBName, Size, 1);                                      \
145     CHECK_VALUEMAP_IMPL(RBName, Size, 2);                                      \
146   } while (false)
147 
148     CHECK_VALUEMAP_3OPS(GPR, 32);
149     CHECK_VALUEMAP_3OPS(GPR, 64);
150     CHECK_VALUEMAP_3OPS(GPR, 128);
151     CHECK_VALUEMAP_3OPS(FPR, 32);
152     CHECK_VALUEMAP_3OPS(FPR, 64);
153     CHECK_VALUEMAP_3OPS(FPR, 128);
154     CHECK_VALUEMAP_3OPS(FPR, 256);
155     CHECK_VALUEMAP_3OPS(FPR, 512);
156 
157 #define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size)                 \
158   do {                                                                         \
159     unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min;               \
160     unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min;               \
161     (void)PartialMapDstIdx;                                                    \
162     (void)PartialMapSrcIdx;                                                    \
163     const ValueMapping *Map = getCopyMapping(                                  \
164         AArch64::RBNameDst##RegBankID, AArch64::RBNameSrc##RegBankID, Size);  \
165     (void)Map;                                                                 \
166     assert(Map[0].BreakDown ==                                                 \
167                &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] &&  \
168            Map[0].NumBreakDowns == 1 && #RBNameDst #Size                       \
169            " Dst is incorrectly initialized");                                 \
170     assert(Map[1].BreakDown ==                                                 \
171                &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] &&  \
172            Map[1].NumBreakDowns == 1 && #RBNameSrc #Size                       \
173            " Src is incorrectly initialized");                                 \
174                                                                                \
175   } while (false)
176 
177     CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
178     CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32);
179     CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64);
180     CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64);
181     CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32);
182     CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32);
183     CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64);
184     CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64);
185 
186 #define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize)                                 \
187   do {                                                                         \
188     unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min;                    \
189     unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min;                    \
190     (void)PartialMapDstIdx;                                                    \
191     (void)PartialMapSrcIdx;                                                    \
192     const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize);               \
193     (void)Map;                                                                 \
194     assert(Map[0].BreakDown ==                                                 \
195                &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] &&  \
196            Map[0].NumBreakDowns == 1 && "FPR" #DstSize                         \
197                                         " Dst is incorrectly initialized");    \
198     assert(Map[1].BreakDown ==                                                 \
199                &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] &&  \
200            Map[1].NumBreakDowns == 1 && "FPR" #SrcSize                         \
201                                         " Src is incorrectly initialized");    \
202                                                                                \
203   } while (false)
204 
205     CHECK_VALUEMAP_FPEXT(32, 16);
206     CHECK_VALUEMAP_FPEXT(64, 16);
207     CHECK_VALUEMAP_FPEXT(64, 32);
208     CHECK_VALUEMAP_FPEXT(128, 64);
209 
210     assert(verify(TRI) && "Invalid register bank information");
211   };
212 
213   llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
214 }
215 
216 unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A,
217                                            const RegisterBank &B,
218                                            unsigned Size) const {
219   // What do we do with different size?
220   // copy are same size.
221   // Will introduce other hooks for different size:
222   // * extract cost.
223   // * build_sequence cost.
224 
225   // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV.
226   // FIXME: This should be deduced from the scheduling model.
227   if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank)
228     // FMOVXDr or FMOVWSr.
229     return 5;
230   if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank)
231     // FMOVDXr or FMOVSWr.
232     return 4;
233 
234   return RegisterBankInfo::copyCost(A, B, Size);
235 }
236 
237 const RegisterBank &
238 AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
239                                                 LLT) const {
240   switch (RC.getID()) {
241   case AArch64::FPR8RegClassID:
242   case AArch64::FPR16RegClassID:
243   case AArch64::FPR16_loRegClassID:
244   case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID:
245   case AArch64::FPR32RegClassID:
246   case AArch64::FPR64RegClassID:
247   case AArch64::FPR64_loRegClassID:
248   case AArch64::FPR128RegClassID:
249   case AArch64::FPR128_loRegClassID:
250   case AArch64::DDRegClassID:
251   case AArch64::DDDRegClassID:
252   case AArch64::DDDDRegClassID:
253   case AArch64::QQRegClassID:
254   case AArch64::QQQRegClassID:
255   case AArch64::QQQQRegClassID:
256     return getRegBank(AArch64::FPRRegBankID);
257   case AArch64::GPR32commonRegClassID:
258   case AArch64::GPR32RegClassID:
259   case AArch64::GPR32spRegClassID:
260   case AArch64::GPR32sponlyRegClassID:
261   case AArch64::GPR32argRegClassID:
262   case AArch64::GPR32allRegClassID:
263   case AArch64::GPR64commonRegClassID:
264   case AArch64::GPR64RegClassID:
265   case AArch64::GPR64spRegClassID:
266   case AArch64::GPR64sponlyRegClassID:
267   case AArch64::GPR64argRegClassID:
268   case AArch64::GPR64allRegClassID:
269   case AArch64::GPR64noipRegClassID:
270   case AArch64::GPR64common_and_GPR64noipRegClassID:
271   case AArch64::GPR64noip_and_tcGPR64RegClassID:
272   case AArch64::tcGPR64RegClassID:
273   case AArch64::rtcGPR64RegClassID:
274   case AArch64::WSeqPairsClassRegClassID:
275   case AArch64::XSeqPairsClassRegClassID:
276   case AArch64::MatrixIndexGPR32_12_15RegClassID:
277   case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_12_15RegClassID:
278     return getRegBank(AArch64::GPRRegBankID);
279   case AArch64::CCRRegClassID:
280     return getRegBank(AArch64::CCRegBankID);
281   default:
282     llvm_unreachable("Register class not supported");
283   }
284 }
285 
286 RegisterBankInfo::InstructionMappings
287 AArch64RegisterBankInfo::getInstrAlternativeMappings(
288     const MachineInstr &MI) const {
289   const MachineFunction &MF = *MI.getParent()->getParent();
290   const TargetSubtargetInfo &STI = MF.getSubtarget();
291   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
292   const MachineRegisterInfo &MRI = MF.getRegInfo();
293 
294   switch (MI.getOpcode()) {
295   case TargetOpcode::G_OR: {
296     // 32 and 64-bit or can be mapped on either FPR or
297     // GPR for the same cost.
298     unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
299     if (Size != 32 && Size != 64)
300       break;
301 
302     // If the instruction has any implicit-defs or uses,
303     // do not mess with it.
304     if (MI.getNumOperands() != 3)
305       break;
306     InstructionMappings AltMappings;
307     const InstructionMapping &GPRMapping = getInstructionMapping(
308         /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size),
309         /*NumOperands*/ 3);
310     const InstructionMapping &FPRMapping = getInstructionMapping(
311         /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size),
312         /*NumOperands*/ 3);
313 
314     AltMappings.push_back(&GPRMapping);
315     AltMappings.push_back(&FPRMapping);
316     return AltMappings;
317   }
318   case TargetOpcode::G_BITCAST: {
319     unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
320     if (Size != 32 && Size != 64)
321       break;
322 
323     // If the instruction has any implicit-defs or uses,
324     // do not mess with it.
325     if (MI.getNumOperands() != 2)
326       break;
327 
328     InstructionMappings AltMappings;
329     const InstructionMapping &GPRMapping = getInstructionMapping(
330         /*ID*/ 1, /*Cost*/ 1,
331         getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size),
332         /*NumOperands*/ 2);
333     const InstructionMapping &FPRMapping = getInstructionMapping(
334         /*ID*/ 2, /*Cost*/ 1,
335         getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size),
336         /*NumOperands*/ 2);
337     const InstructionMapping &GPRToFPRMapping = getInstructionMapping(
338         /*ID*/ 3,
339         /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
340         getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size),
341         /*NumOperands*/ 2);
342     const InstructionMapping &FPRToGPRMapping = getInstructionMapping(
343         /*ID*/ 3,
344         /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
345         getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size),
346         /*NumOperands*/ 2);
347 
348     AltMappings.push_back(&GPRMapping);
349     AltMappings.push_back(&FPRMapping);
350     AltMappings.push_back(&GPRToFPRMapping);
351     AltMappings.push_back(&FPRToGPRMapping);
352     return AltMappings;
353   }
354   case TargetOpcode::G_LOAD: {
355     unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
356     if (Size != 64)
357       break;
358 
359     // If the instruction has any implicit-defs or uses,
360     // do not mess with it.
361     if (MI.getNumOperands() != 2)
362       break;
363 
364     InstructionMappings AltMappings;
365     const InstructionMapping &GPRMapping = getInstructionMapping(
366         /*ID*/ 1, /*Cost*/ 1,
367         getOperandsMapping({getValueMapping(PMI_FirstGPR, Size),
368                             // Addresses are GPR 64-bit.
369                             getValueMapping(PMI_FirstGPR, 64)}),
370         /*NumOperands*/ 2);
371     const InstructionMapping &FPRMapping = getInstructionMapping(
372         /*ID*/ 2, /*Cost*/ 1,
373         getOperandsMapping({getValueMapping(PMI_FirstFPR, Size),
374                             // Addresses are GPR 64-bit.
375                             getValueMapping(PMI_FirstGPR, 64)}),
376         /*NumOperands*/ 2);
377 
378     AltMappings.push_back(&GPRMapping);
379     AltMappings.push_back(&FPRMapping);
380     return AltMappings;
381   }
382   default:
383     break;
384   }
385   return RegisterBankInfo::getInstrAlternativeMappings(MI);
386 }
387 
388 void AArch64RegisterBankInfo::applyMappingImpl(
389     const OperandsMapper &OpdMapper) const {
390   switch (OpdMapper.getMI().getOpcode()) {
391   case TargetOpcode::G_OR:
392   case TargetOpcode::G_BITCAST:
393   case TargetOpcode::G_LOAD:
394     // Those ID must match getInstrAlternativeMappings.
395     assert((OpdMapper.getInstrMapping().getID() >= 1 &&
396             OpdMapper.getInstrMapping().getID() <= 4) &&
397            "Don't know how to handle that ID");
398     return applyDefaultMapping(OpdMapper);
399   default:
400     llvm_unreachable("Don't know how to handle that operation");
401   }
402 }
403 
404 /// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
405 /// having only floating-point operands.
406 static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
407   switch (Opc) {
408   case TargetOpcode::G_FADD:
409   case TargetOpcode::G_FSUB:
410   case TargetOpcode::G_FMUL:
411   case TargetOpcode::G_FMA:
412   case TargetOpcode::G_FDIV:
413   case TargetOpcode::G_FCONSTANT:
414   case TargetOpcode::G_FPEXT:
415   case TargetOpcode::G_FPTRUNC:
416   case TargetOpcode::G_FCEIL:
417   case TargetOpcode::G_FFLOOR:
418   case TargetOpcode::G_FNEARBYINT:
419   case TargetOpcode::G_FNEG:
420   case TargetOpcode::G_FCOS:
421   case TargetOpcode::G_FSIN:
422   case TargetOpcode::G_FLOG10:
423   case TargetOpcode::G_FLOG:
424   case TargetOpcode::G_FLOG2:
425   case TargetOpcode::G_FSQRT:
426   case TargetOpcode::G_FABS:
427   case TargetOpcode::G_FEXP:
428   case TargetOpcode::G_FRINT:
429   case TargetOpcode::G_INTRINSIC_TRUNC:
430   case TargetOpcode::G_INTRINSIC_ROUND:
431   case TargetOpcode::G_FMAXNUM:
432   case TargetOpcode::G_FMINNUM:
433   case TargetOpcode::G_FMAXIMUM:
434   case TargetOpcode::G_FMINIMUM:
435     return true;
436   }
437   return false;
438 }
439 
440 const RegisterBankInfo::InstructionMapping &
441 AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
442     const MachineInstr &MI) const {
443   const unsigned Opc = MI.getOpcode();
444   const MachineFunction &MF = *MI.getParent()->getParent();
445   const MachineRegisterInfo &MRI = MF.getRegInfo();
446 
447   unsigned NumOperands = MI.getNumOperands();
448   assert(NumOperands <= 3 &&
449          "This code is for instructions with 3 or less operands");
450 
451   LLT Ty = MRI.getType(MI.getOperand(0).getReg());
452   unsigned Size = Ty.getSizeInBits();
453   bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
454 
455   PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR;
456 
457 #ifndef NDEBUG
458   // Make sure all the operands are using similar size and type.
459   // Should probably be checked by the machine verifier.
460   // This code won't catch cases where the number of lanes is
461   // different between the operands.
462   // If we want to go to that level of details, it is probably
463   // best to check that the types are the same, period.
464   // Currently, we just check that the register banks are the same
465   // for each types.
466   for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
467     LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
468     assert(
469         AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(
470             RBIdx, OpTy.getSizeInBits()) ==
471             AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) &&
472         "Operand has incompatible size");
473     bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
474     (void)OpIsFPR;
475     assert(IsFPR == OpIsFPR && "Operand has incompatible type");
476   }
477 #endif // End NDEBUG.
478 
479   return getInstructionMapping(DefaultMappingID, 1,
480                                getValueMapping(RBIdx, Size), NumOperands);
481 }
482 
483 /// \returns true if a given intrinsic \p ID only uses and defines FPRs.
484 static bool isFPIntrinsic(unsigned ID) {
485   // TODO: Add more intrinsics.
486   switch (ID) {
487   default:
488     return false;
489   case Intrinsic::aarch64_neon_uaddlv:
490     return true;
491   }
492 }
493 
494 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
495                                                const MachineRegisterInfo &MRI,
496                                                const TargetRegisterInfo &TRI,
497                                                unsigned Depth) const {
498   unsigned Op = MI.getOpcode();
499   if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MI.getIntrinsicID()))
500     return true;
501 
502   // Do we have an explicit floating point instruction?
503   if (isPreISelGenericFloatingPointOpcode(Op))
504     return true;
505 
506   // No. Check if we have a copy-like instruction. If we do, then we could
507   // still be fed by floating point instructions.
508   if (Op != TargetOpcode::COPY && !MI.isPHI() &&
509       !isPreISelGenericOptimizationHint(Op))
510     return false;
511 
512   // Check if we already know the register bank.
513   auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
514   if (RB == &AArch64::FPRRegBank)
515     return true;
516   if (RB == &AArch64::GPRRegBank)
517     return false;
518 
519   // We don't know anything.
520   //
521   // If we have a phi, we may be able to infer that it will be assigned a FPR
522   // based off of its inputs.
523   if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
524     return false;
525 
526   return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
527     return Op.isReg() &&
528            onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
529   });
530 }
531 
532 bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
533                                          const MachineRegisterInfo &MRI,
534                                          const TargetRegisterInfo &TRI,
535                                          unsigned Depth) const {
536   switch (MI.getOpcode()) {
537   case TargetOpcode::G_FPTOSI:
538   case TargetOpcode::G_FPTOUI:
539   case TargetOpcode::G_FCMP:
540   case TargetOpcode::G_LROUND:
541   case TargetOpcode::G_LLROUND:
542     return true;
543   default:
544     break;
545   }
546   return hasFPConstraints(MI, MRI, TRI, Depth);
547 }
548 
549 bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
550                                             const MachineRegisterInfo &MRI,
551                                             const TargetRegisterInfo &TRI,
552                                             unsigned Depth) const {
553   switch (MI.getOpcode()) {
554   case AArch64::G_DUP:
555   case TargetOpcode::G_SITOFP:
556   case TargetOpcode::G_UITOFP:
557   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
558   case TargetOpcode::G_INSERT_VECTOR_ELT:
559   case TargetOpcode::G_BUILD_VECTOR:
560   case TargetOpcode::G_BUILD_VECTOR_TRUNC:
561     return true;
562   default:
563     break;
564   }
565   return hasFPConstraints(MI, MRI, TRI, Depth);
566 }
567 
568 const RegisterBankInfo::InstructionMapping &
569 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
570   const unsigned Opc = MI.getOpcode();
571 
572   // Try the default logic for non-generic instructions that are either copies
573   // or already have some operands assigned to banks.
574   if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) ||
575       Opc == TargetOpcode::G_PHI) {
576     const RegisterBankInfo::InstructionMapping &Mapping =
577         getInstrMappingImpl(MI);
578     if (Mapping.isValid())
579       return Mapping;
580   }
581 
582   const MachineFunction &MF = *MI.getParent()->getParent();
583   const MachineRegisterInfo &MRI = MF.getRegInfo();
584   const TargetSubtargetInfo &STI = MF.getSubtarget();
585   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
586 
587   switch (Opc) {
588     // G_{F|S|U}REM are not listed because they are not legal.
589     // Arithmetic ops.
590   case TargetOpcode::G_ADD:
591   case TargetOpcode::G_SUB:
592   case TargetOpcode::G_PTR_ADD:
593   case TargetOpcode::G_MUL:
594   case TargetOpcode::G_SDIV:
595   case TargetOpcode::G_UDIV:
596     // Bitwise ops.
597   case TargetOpcode::G_AND:
598   case TargetOpcode::G_OR:
599   case TargetOpcode::G_XOR:
600     // Floating point ops.
601   case TargetOpcode::G_FADD:
602   case TargetOpcode::G_FSUB:
603   case TargetOpcode::G_FMUL:
604   case TargetOpcode::G_FDIV:
605   case TargetOpcode::G_FMAXIMUM:
606   case TargetOpcode::G_FMINIMUM:
607     return getSameKindOfOperandsMapping(MI);
608   case TargetOpcode::G_FPEXT: {
609     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
610     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
611     return getInstructionMapping(
612         DefaultMappingID, /*Cost*/ 1,
613         getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()),
614         /*NumOperands*/ 2);
615   }
616     // Shifts.
617   case TargetOpcode::G_SHL:
618   case TargetOpcode::G_LSHR:
619   case TargetOpcode::G_ASHR: {
620     LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg());
621     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
622     if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32)
623       return getInstructionMapping(DefaultMappingID, 1,
624                                    &ValMappings[Shift64Imm], 3);
625     return getSameKindOfOperandsMapping(MI);
626   }
627   case TargetOpcode::COPY: {
628     Register DstReg = MI.getOperand(0).getReg();
629     Register SrcReg = MI.getOperand(1).getReg();
630     // Check if one of the register is not a generic register.
631     if ((Register::isPhysicalRegister(DstReg) ||
632          !MRI.getType(DstReg).isValid()) ||
633         (Register::isPhysicalRegister(SrcReg) ||
634          !MRI.getType(SrcReg).isValid())) {
635       const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI);
636       const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI);
637       if (!DstRB)
638         DstRB = SrcRB;
639       else if (!SrcRB)
640         SrcRB = DstRB;
641       // If both RB are null that means both registers are generic.
642       // We shouldn't be here.
643       assert(DstRB && SrcRB && "Both RegBank were nullptr");
644       unsigned Size = getSizeInBits(DstReg, MRI, TRI);
645       return getInstructionMapping(
646           DefaultMappingID, copyCost(*DstRB, *SrcRB, Size),
647           getCopyMapping(DstRB->getID(), SrcRB->getID(), Size),
648           // We only care about the mapping of the destination.
649           /*NumOperands*/ 1);
650     }
651     // Both registers are generic, use G_BITCAST.
652     LLVM_FALLTHROUGH;
653   }
654   case TargetOpcode::G_BITCAST: {
655     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
656     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
657     unsigned Size = DstTy.getSizeInBits();
658     bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64;
659     bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64;
660     const RegisterBank &DstRB =
661         DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
662     const RegisterBank &SrcRB =
663         SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
664     return getInstructionMapping(
665         DefaultMappingID, copyCost(DstRB, SrcRB, Size),
666         getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
667         // We only care about the mapping of the destination for COPY.
668         /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1);
669   }
670   default:
671     break;
672   }
673 
674   unsigned NumOperands = MI.getNumOperands();
675 
676   // Track the size and bank of each register.  We don't do partial mappings.
677   SmallVector<unsigned, 4> OpSize(NumOperands);
678   SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
679   for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
680     auto &MO = MI.getOperand(Idx);
681     if (!MO.isReg() || !MO.getReg())
682       continue;
683 
684     LLT Ty = MRI.getType(MO.getReg());
685     OpSize[Idx] = Ty.getSizeInBits();
686 
687     // As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs.
688     // For floating-point instructions, scalars go in FPRs.
689     if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc) ||
690         Ty.getSizeInBits() > 64)
691       OpRegBankIdx[Idx] = PMI_FirstFPR;
692     else
693       OpRegBankIdx[Idx] = PMI_FirstGPR;
694   }
695 
696   unsigned Cost = 1;
697   // Some of the floating-point instructions have mixed GPR and FPR operands:
698   // fine-tune the computed mapping.
699   switch (Opc) {
700   case AArch64::G_DUP: {
701     Register ScalarReg = MI.getOperand(1).getReg();
702     LLT ScalarTy = MRI.getType(ScalarReg);
703     auto ScalarDef = MRI.getVRegDef(ScalarReg);
704     // s8 is an exception for G_DUP, which we always want on gpr.
705     if (ScalarTy.getSizeInBits() != 8 &&
706         (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
707          onlyDefinesFP(*ScalarDef, MRI, TRI)))
708       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
709     else
710       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
711     break;
712   }
713   case TargetOpcode::G_TRUNC: {
714     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
715     if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128)
716       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
717     break;
718   }
719   case TargetOpcode::G_SITOFP:
720   case TargetOpcode::G_UITOFP: {
721     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
722       break;
723     // Integer to FP conversions don't necessarily happen between GPR -> FPR
724     // regbanks. They can also be done within an FPR register.
725     Register SrcReg = MI.getOperand(1).getReg();
726     if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
727       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
728     else
729       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
730     break;
731   }
732   case TargetOpcode::G_FPTOSI:
733   case TargetOpcode::G_FPTOUI:
734     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
735       break;
736     OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
737     break;
738   case TargetOpcode::G_FCMP: {
739     // If the result is a vector, it must use a FPR.
740     AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 =
741         MRI.getType(MI.getOperand(0).getReg()).isVector() ? PMI_FirstFPR
742                                                           : PMI_FirstGPR;
743     OpRegBankIdx = {Idx0,
744                     /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR};
745     break;
746   }
747   case TargetOpcode::G_BITCAST:
748     // This is going to be a cross register bank copy and this is expensive.
749     if (OpRegBankIdx[0] != OpRegBankIdx[1])
750       Cost = copyCost(
751           *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank,
752           *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
753           OpSize[0]);
754     break;
755   case TargetOpcode::G_LOAD:
756     // Loading in vector unit is slightly more expensive.
757     // This is actually only true for the LD1R and co instructions,
758     // but anyway for the fast mode this number does not matter and
759     // for the greedy mode the cost of the cross bank copy will
760     // offset this number.
761     // FIXME: Should be derived from the scheduling model.
762     if (OpRegBankIdx[0] != PMI_FirstGPR) {
763       Cost = 2;
764       break;
765     }
766 
767     if (cast<GLoad>(MI).isAtomic()) {
768       // Atomics always use GPR destinations. Don't refine any further.
769       OpRegBankIdx[0] = PMI_FirstGPR;
770       break;
771     }
772 
773     // Check if that load feeds fp instructions.
774     // In that case, we want the default mapping to be on FPR
775     // instead of blind map every scalar to GPR.
776     if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
777                [&](const MachineInstr &UseMI) {
778                  // If we have at least one direct use in a FP instruction,
779                  // assume this was a floating point load in the IR. If it was
780                  // not, we would have had a bitcast before reaching that
781                  // instruction.
782                  //
783                  // Int->FP conversion operations are also captured in
784                  // onlyDefinesFP().
785                  return onlyUsesFP(UseMI, MRI, TRI) ||
786                         onlyDefinesFP(UseMI, MRI, TRI);
787                }))
788       OpRegBankIdx[0] = PMI_FirstFPR;
789     break;
790   case TargetOpcode::G_STORE:
791     // Check if that store is fed by fp instructions.
792     if (OpRegBankIdx[0] == PMI_FirstGPR) {
793       Register VReg = MI.getOperand(0).getReg();
794       if (!VReg)
795         break;
796       MachineInstr *DefMI = MRI.getVRegDef(VReg);
797       if (onlyDefinesFP(*DefMI, MRI, TRI))
798         OpRegBankIdx[0] = PMI_FirstFPR;
799       break;
800     }
801     break;
802   case TargetOpcode::G_SELECT: {
803     // If the destination is FPR, preserve that.
804     if (OpRegBankIdx[0] != PMI_FirstGPR)
805       break;
806 
807     // If we're taking in vectors, we have no choice but to put everything on
808     // FPRs, except for the condition. The condition must always be on a GPR.
809     LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
810     if (SrcTy.isVector()) {
811       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
812       break;
813     }
814 
815     // Try to minimize the number of copies. If we have more floating point
816     // constrained values than not, then we'll put everything on FPR. Otherwise,
817     // everything has to be on GPR.
818     unsigned NumFP = 0;
819 
820     // Check if the uses of the result always produce floating point values.
821     //
822     // For example:
823     //
824     // %z = G_SELECT %cond %x %y
825     // fpr = G_FOO %z ...
826     if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
827                [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
828       ++NumFP;
829 
830     // Check if the defs of the source values always produce floating point
831     // values.
832     //
833     // For example:
834     //
835     // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
836     // %z = G_SELECT %cond %x %y
837     //
838     // Also check whether or not the sources have already been decided to be
839     // FPR. Keep track of this.
840     //
841     // This doesn't check the condition, since it's just whatever is in NZCV.
842     // This isn't passed explicitly in a register to fcsel/csel.
843     for (unsigned Idx = 2; Idx < 4; ++Idx) {
844       Register VReg = MI.getOperand(Idx).getReg();
845       MachineInstr *DefMI = MRI.getVRegDef(VReg);
846       if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
847           onlyDefinesFP(*DefMI, MRI, TRI))
848         ++NumFP;
849     }
850 
851     // If we have more FP constraints than not, then move everything over to
852     // FPR.
853     if (NumFP >= 2)
854       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
855 
856     break;
857   }
858   case TargetOpcode::G_UNMERGE_VALUES: {
859     // If the first operand belongs to a FPR register bank, then make sure that
860     // we preserve that.
861     if (OpRegBankIdx[0] != PMI_FirstGPR)
862       break;
863 
864     LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
865     // UNMERGE into scalars from a vector should always use FPR.
866     // Likewise if any of the uses are FP instructions.
867     if (SrcTy.isVector() || SrcTy == LLT::scalar(128) ||
868         any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
869                [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
870       // Set the register bank of every operand to FPR.
871       for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
872            Idx < NumOperands; ++Idx)
873         OpRegBankIdx[Idx] = PMI_FirstFPR;
874     }
875     break;
876   }
877   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
878     // Destination and source need to be FPRs.
879     OpRegBankIdx[0] = PMI_FirstFPR;
880     OpRegBankIdx[1] = PMI_FirstFPR;
881 
882     // Index needs to be a GPR.
883     OpRegBankIdx[2] = PMI_FirstGPR;
884     break;
885   case TargetOpcode::G_INSERT_VECTOR_ELT:
886     OpRegBankIdx[0] = PMI_FirstFPR;
887     OpRegBankIdx[1] = PMI_FirstFPR;
888 
889     // The element may be either a GPR or FPR. Preserve that behaviour.
890     if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank)
891       OpRegBankIdx[2] = PMI_FirstFPR;
892     else
893       OpRegBankIdx[2] = PMI_FirstGPR;
894 
895     // Index needs to be a GPR.
896     OpRegBankIdx[3] = PMI_FirstGPR;
897     break;
898   case TargetOpcode::G_EXTRACT: {
899     // For s128 sources we have to use fpr unless we know otherwise.
900     auto Src = MI.getOperand(1).getReg();
901     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
902     if (SrcTy.getSizeInBits() != 128)
903       break;
904     auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass
905                    ? PMI_FirstGPR
906                    : PMI_FirstFPR;
907     OpRegBankIdx[0] = Idx;
908     OpRegBankIdx[1] = Idx;
909     break;
910   }
911   case TargetOpcode::G_BUILD_VECTOR: {
912     // If the first source operand belongs to a FPR register bank, then make
913     // sure that we preserve that.
914     if (OpRegBankIdx[1] != PMI_FirstGPR)
915       break;
916     Register VReg = MI.getOperand(1).getReg();
917     if (!VReg)
918       break;
919 
920     // Get the instruction that defined the source operand reg, and check if
921     // it's a floating point operation. Or, if it's a type like s16 which
922     // doesn't have a exact size gpr register class. The exception is if the
923     // build_vector has all constant operands, which may be better to leave as
924     // gpr without copies, so it can be matched in imported patterns.
925     MachineInstr *DefMI = MRI.getVRegDef(VReg);
926     unsigned DefOpc = DefMI->getOpcode();
927     const LLT SrcTy = MRI.getType(VReg);
928     if (all_of(MI.operands(), [&](const MachineOperand &Op) {
929           return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() ==
930                                    TargetOpcode::G_CONSTANT;
931         }))
932       break;
933     if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
934         SrcTy.getSizeInBits() < 32 ||
935         getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) {
936       // Have a floating point op.
937       // Make sure every operand gets mapped to a FPR register class.
938       unsigned NumOperands = MI.getNumOperands();
939       for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
940         OpRegBankIdx[Idx] = PMI_FirstFPR;
941     }
942     break;
943   }
944   case TargetOpcode::G_VECREDUCE_FADD:
945   case TargetOpcode::G_VECREDUCE_FMUL:
946   case TargetOpcode::G_VECREDUCE_FMAX:
947   case TargetOpcode::G_VECREDUCE_FMIN:
948   case TargetOpcode::G_VECREDUCE_ADD:
949   case TargetOpcode::G_VECREDUCE_MUL:
950   case TargetOpcode::G_VECREDUCE_AND:
951   case TargetOpcode::G_VECREDUCE_OR:
952   case TargetOpcode::G_VECREDUCE_XOR:
953   case TargetOpcode::G_VECREDUCE_SMAX:
954   case TargetOpcode::G_VECREDUCE_SMIN:
955   case TargetOpcode::G_VECREDUCE_UMAX:
956   case TargetOpcode::G_VECREDUCE_UMIN:
957     // Reductions produce a scalar value from a vector, the scalar should be on
958     // FPR bank.
959     OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
960     break;
961   case TargetOpcode::G_VECREDUCE_SEQ_FADD:
962   case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
963     // These reductions also take a scalar accumulator input.
964     // Assign them FPR for now.
965     OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
966     break;
967   case TargetOpcode::G_INTRINSIC: {
968     // Check if we know that the intrinsic has any constraints on its register
969     // banks. If it does, then update the mapping accordingly.
970     unsigned ID = MI.getIntrinsicID();
971     unsigned Idx = 0;
972     if (!isFPIntrinsic(ID))
973       break;
974     for (const auto &Op : MI.explicit_operands()) {
975       if (Op.isReg())
976         OpRegBankIdx[Idx] = PMI_FirstFPR;
977       ++Idx;
978     }
979     break;
980   }
981   case TargetOpcode::G_LROUND:
982   case TargetOpcode::G_LLROUND: {
983     // Source is always floating point and destination is always integer.
984     OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
985     break;
986   }
987   }
988 
989   // Finally construct the computed mapping.
990   SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
991   for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
992     if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) {
993       auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
994       if (!Mapping->isValid())
995         return getInvalidInstructionMapping();
996 
997       OpdsMapping[Idx] = Mapping;
998     }
999   }
1000 
1001   return getInstructionMapping(DefaultMappingID, Cost,
1002                                getOperandsMapping(OpdsMapping), NumOperands);
1003 }
1004