1 //===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the RegisterBankInfo class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64RegisterBankInfo.h"
15 #include "AArch64RegisterInfo.h"
16 #include "MCTargetDesc/AArch64MCTargetDesc.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20 #include "llvm/CodeGen/GlobalISel/Utils.h"
21 #include "llvm/CodeGen/LowLevelType.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstr.h"
24 #include "llvm/CodeGen/MachineOperand.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/RegisterBank.h"
27 #include "llvm/CodeGen/RegisterBankInfo.h"
28 #include "llvm/CodeGen/TargetOpcodes.h"
29 #include "llvm/CodeGen/TargetRegisterInfo.h"
30 #include "llvm/CodeGen/TargetSubtargetInfo.h"
31 #include "llvm/IR/IntrinsicsAArch64.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include <algorithm>
34 #include <cassert>
35 
36 #define GET_TARGET_REGBANK_IMPL
37 #include "AArch64GenRegisterBank.inc"
38 
39 // This file will be TableGen'ed at some point.
40 #include "AArch64GenRegisterBankInfo.def"
41 
42 using namespace llvm;
43 
44 AArch64RegisterBankInfo::AArch64RegisterBankInfo(
45     const TargetRegisterInfo &TRI) {
46   static llvm::once_flag InitializeRegisterBankFlag;
47 
48   static auto InitializeRegisterBankOnce = [&]() {
49     // We have only one set of register banks, whatever the subtarget
50     // is. Therefore, the initialization of the RegBanks table should be
51     // done only once. Indeed the table of all register banks
52     // (AArch64::RegBanks) is unique in the compiler. At some point, it
53     // will get tablegen'ed and the whole constructor becomes empty.
54 
55     const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID);
56     (void)RBGPR;
57     assert(&AArch64::GPRRegBank == &RBGPR &&
58            "The order in RegBanks is messed up");
59 
60     const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID);
61     (void)RBFPR;
62     assert(&AArch64::FPRRegBank == &RBFPR &&
63            "The order in RegBanks is messed up");
64 
65     const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID);
66     (void)RBCCR;
67     assert(&AArch64::CCRegBank == &RBCCR &&
68            "The order in RegBanks is messed up");
69 
70     // The GPR register bank is fully defined by all the registers in
71     // GR64all + its subclasses.
72     assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) &&
73            "Subclass not added?");
74     assert(RBGPR.getSize() == 128 && "GPRs should hold up to 128-bit");
75 
76     // The FPR register bank is fully defined by all the registers in
77     // GR64all + its subclasses.
78     assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) &&
79            "Subclass not added?");
80     assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) &&
81            "Subclass not added?");
82     assert(RBFPR.getSize() == 512 &&
83            "FPRs should hold up to 512-bit via QQQQ sequence");
84 
85     assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) &&
86            "Class not added?");
87     assert(RBCCR.getSize() == 32 && "CCR should hold up to 32-bit");
88 
89     // Check that the TableGen'ed like file is in sync we our expectations.
90     // First, the Idx.
91     assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
92                                   {PMI_GPR32, PMI_GPR64, PMI_GPR128}) &&
93            "PartialMappingIdx's are incorrectly ordered");
94     assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR,
95                                   {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128,
96                                    PMI_FPR256, PMI_FPR512}) &&
97            "PartialMappingIdx's are incorrectly ordered");
98 // Now, the content.
99 // Check partial mapping.
100 #define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB)                      \
101   do {                                                                         \
102     assert(                                                                    \
103         checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \
104         #Idx " is incorrectly initialized");                                   \
105   } while (false)
106 
107     CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
108     CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
109     CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR);
110     CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR);
111     CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
112     CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
113     CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
114     CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR);
115     CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR);
116 
117 // Check value mapping.
118 #define CHECK_VALUEMAP_IMPL(RBName, Size, Offset)                              \
119   do {                                                                         \
120     assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size,            \
121                              PartialMappingIdx::PMI_First##RBName, Size,       \
122                              Offset) &&                                        \
123            #RBName #Size " " #Offset " is incorrectly initialized");           \
124   } while (false)
125 
126 #define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
127 
128     CHECK_VALUEMAP(GPR, 32);
129     CHECK_VALUEMAP(GPR, 64);
130     CHECK_VALUEMAP(GPR, 128);
131     CHECK_VALUEMAP(FPR, 16);
132     CHECK_VALUEMAP(FPR, 32);
133     CHECK_VALUEMAP(FPR, 64);
134     CHECK_VALUEMAP(FPR, 128);
135     CHECK_VALUEMAP(FPR, 256);
136     CHECK_VALUEMAP(FPR, 512);
137 
138 // Check the value mapping for 3-operands instructions where all the operands
139 // map to the same value mapping.
140 #define CHECK_VALUEMAP_3OPS(RBName, Size)                                      \
141   do {                                                                         \
142     CHECK_VALUEMAP_IMPL(RBName, Size, 0);                                      \
143     CHECK_VALUEMAP_IMPL(RBName, Size, 1);                                      \
144     CHECK_VALUEMAP_IMPL(RBName, Size, 2);                                      \
145   } while (false)
146 
147     CHECK_VALUEMAP_3OPS(GPR, 32);
148     CHECK_VALUEMAP_3OPS(GPR, 64);
149     CHECK_VALUEMAP_3OPS(GPR, 128);
150     CHECK_VALUEMAP_3OPS(FPR, 32);
151     CHECK_VALUEMAP_3OPS(FPR, 64);
152     CHECK_VALUEMAP_3OPS(FPR, 128);
153     CHECK_VALUEMAP_3OPS(FPR, 256);
154     CHECK_VALUEMAP_3OPS(FPR, 512);
155 
156 #define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size)                 \
157   do {                                                                         \
158     unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min;               \
159     unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min;               \
160     (void)PartialMapDstIdx;                                                    \
161     (void)PartialMapSrcIdx;                                                    \
162     const ValueMapping *Map = getCopyMapping(                                  \
163         AArch64::RBNameDst##RegBankID, AArch64::RBNameSrc##RegBankID, Size);  \
164     (void)Map;                                                                 \
165     assert(Map[0].BreakDown ==                                                 \
166                &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] &&  \
167            Map[0].NumBreakDowns == 1 && #RBNameDst #Size                       \
168            " Dst is incorrectly initialized");                                 \
169     assert(Map[1].BreakDown ==                                                 \
170                &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] &&  \
171            Map[1].NumBreakDowns == 1 && #RBNameSrc #Size                       \
172            " Src is incorrectly initialized");                                 \
173                                                                                \
174   } while (false)
175 
176     CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
177     CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32);
178     CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64);
179     CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64);
180     CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32);
181     CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32);
182     CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64);
183     CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64);
184 
185 #define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize)                                 \
186   do {                                                                         \
187     unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min;                    \
188     unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min;                    \
189     (void)PartialMapDstIdx;                                                    \
190     (void)PartialMapSrcIdx;                                                    \
191     const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize);               \
192     (void)Map;                                                                 \
193     assert(Map[0].BreakDown ==                                                 \
194                &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] &&  \
195            Map[0].NumBreakDowns == 1 && "FPR" #DstSize                         \
196                                         " Dst is incorrectly initialized");    \
197     assert(Map[1].BreakDown ==                                                 \
198                &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] &&  \
199            Map[1].NumBreakDowns == 1 && "FPR" #SrcSize                         \
200                                         " Src is incorrectly initialized");    \
201                                                                                \
202   } while (false)
203 
204     CHECK_VALUEMAP_FPEXT(32, 16);
205     CHECK_VALUEMAP_FPEXT(64, 16);
206     CHECK_VALUEMAP_FPEXT(64, 32);
207     CHECK_VALUEMAP_FPEXT(128, 64);
208 
209     assert(verify(TRI) && "Invalid register bank information");
210   };
211 
212   llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
213 }
214 
215 unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A,
216                                            const RegisterBank &B,
217                                            unsigned Size) const {
218   // What do we do with different size?
219   // copy are same size.
220   // Will introduce other hooks for different size:
221   // * extract cost.
222   // * build_sequence cost.
223 
224   // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV.
225   // FIXME: This should be deduced from the scheduling model.
226   if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank)
227     // FMOVXDr or FMOVWSr.
228     return 5;
229   if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank)
230     // FMOVDXr or FMOVSWr.
231     return 4;
232 
233   return RegisterBankInfo::copyCost(A, B, Size);
234 }
235 
236 const RegisterBank &
237 AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
238                                                 LLT) const {
239   switch (RC.getID()) {
240   case AArch64::FPR8RegClassID:
241   case AArch64::FPR16RegClassID:
242   case AArch64::FPR16_loRegClassID:
243   case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID:
244   case AArch64::FPR32RegClassID:
245   case AArch64::FPR64RegClassID:
246   case AArch64::FPR64_loRegClassID:
247   case AArch64::FPR128RegClassID:
248   case AArch64::FPR128_loRegClassID:
249   case AArch64::DDRegClassID:
250   case AArch64::DDDRegClassID:
251   case AArch64::DDDDRegClassID:
252   case AArch64::QQRegClassID:
253   case AArch64::QQQRegClassID:
254   case AArch64::QQQQRegClassID:
255     return getRegBank(AArch64::FPRRegBankID);
256   case AArch64::GPR32commonRegClassID:
257   case AArch64::GPR32RegClassID:
258   case AArch64::GPR32spRegClassID:
259   case AArch64::GPR32sponlyRegClassID:
260   case AArch64::GPR32argRegClassID:
261   case AArch64::GPR32allRegClassID:
262   case AArch64::GPR64commonRegClassID:
263   case AArch64::GPR64RegClassID:
264   case AArch64::GPR64spRegClassID:
265   case AArch64::GPR64sponlyRegClassID:
266   case AArch64::GPR64argRegClassID:
267   case AArch64::GPR64allRegClassID:
268   case AArch64::GPR64noipRegClassID:
269   case AArch64::GPR64common_and_GPR64noipRegClassID:
270   case AArch64::GPR64noip_and_tcGPR64RegClassID:
271   case AArch64::tcGPR64RegClassID:
272   case AArch64::rtcGPR64RegClassID:
273   case AArch64::WSeqPairsClassRegClassID:
274   case AArch64::XSeqPairsClassRegClassID:
275   case AArch64::MatrixIndexGPR32_8_11RegClassID:
276   case AArch64::MatrixIndexGPR32_12_15RegClassID:
277   case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_8_11RegClassID:
278   case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_12_15RegClassID:
279     return getRegBank(AArch64::GPRRegBankID);
280   case AArch64::CCRRegClassID:
281     return getRegBank(AArch64::CCRegBankID);
282   default:
283     llvm_unreachable("Register class not supported");
284   }
285 }
286 
287 RegisterBankInfo::InstructionMappings
288 AArch64RegisterBankInfo::getInstrAlternativeMappings(
289     const MachineInstr &MI) const {
290   const MachineFunction &MF = *MI.getParent()->getParent();
291   const TargetSubtargetInfo &STI = MF.getSubtarget();
292   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
293   const MachineRegisterInfo &MRI = MF.getRegInfo();
294 
295   switch (MI.getOpcode()) {
296   case TargetOpcode::G_OR: {
297     // 32 and 64-bit or can be mapped on either FPR or
298     // GPR for the same cost.
299     unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
300     if (Size != 32 && Size != 64)
301       break;
302 
303     // If the instruction has any implicit-defs or uses,
304     // do not mess with it.
305     if (MI.getNumOperands() != 3)
306       break;
307     InstructionMappings AltMappings;
308     const InstructionMapping &GPRMapping = getInstructionMapping(
309         /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size),
310         /*NumOperands*/ 3);
311     const InstructionMapping &FPRMapping = getInstructionMapping(
312         /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size),
313         /*NumOperands*/ 3);
314 
315     AltMappings.push_back(&GPRMapping);
316     AltMappings.push_back(&FPRMapping);
317     return AltMappings;
318   }
319   case TargetOpcode::G_BITCAST: {
320     unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
321     if (Size != 32 && Size != 64)
322       break;
323 
324     // If the instruction has any implicit-defs or uses,
325     // do not mess with it.
326     if (MI.getNumOperands() != 2)
327       break;
328 
329     InstructionMappings AltMappings;
330     const InstructionMapping &GPRMapping = getInstructionMapping(
331         /*ID*/ 1, /*Cost*/ 1,
332         getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size),
333         /*NumOperands*/ 2);
334     const InstructionMapping &FPRMapping = getInstructionMapping(
335         /*ID*/ 2, /*Cost*/ 1,
336         getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size),
337         /*NumOperands*/ 2);
338     const InstructionMapping &GPRToFPRMapping = getInstructionMapping(
339         /*ID*/ 3,
340         /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
341         getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size),
342         /*NumOperands*/ 2);
343     const InstructionMapping &FPRToGPRMapping = getInstructionMapping(
344         /*ID*/ 3,
345         /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
346         getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size),
347         /*NumOperands*/ 2);
348 
349     AltMappings.push_back(&GPRMapping);
350     AltMappings.push_back(&FPRMapping);
351     AltMappings.push_back(&GPRToFPRMapping);
352     AltMappings.push_back(&FPRToGPRMapping);
353     return AltMappings;
354   }
355   case TargetOpcode::G_LOAD: {
356     unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
357     if (Size != 64)
358       break;
359 
360     // If the instruction has any implicit-defs or uses,
361     // do not mess with it.
362     if (MI.getNumOperands() != 2)
363       break;
364 
365     InstructionMappings AltMappings;
366     const InstructionMapping &GPRMapping = getInstructionMapping(
367         /*ID*/ 1, /*Cost*/ 1,
368         getOperandsMapping({getValueMapping(PMI_FirstGPR, Size),
369                             // Addresses are GPR 64-bit.
370                             getValueMapping(PMI_FirstGPR, 64)}),
371         /*NumOperands*/ 2);
372     const InstructionMapping &FPRMapping = getInstructionMapping(
373         /*ID*/ 2, /*Cost*/ 1,
374         getOperandsMapping({getValueMapping(PMI_FirstFPR, Size),
375                             // Addresses are GPR 64-bit.
376                             getValueMapping(PMI_FirstGPR, 64)}),
377         /*NumOperands*/ 2);
378 
379     AltMappings.push_back(&GPRMapping);
380     AltMappings.push_back(&FPRMapping);
381     return AltMappings;
382   }
383   default:
384     break;
385   }
386   return RegisterBankInfo::getInstrAlternativeMappings(MI);
387 }
388 
389 void AArch64RegisterBankInfo::applyMappingImpl(
390     const OperandsMapper &OpdMapper) const {
391   switch (OpdMapper.getMI().getOpcode()) {
392   case TargetOpcode::G_OR:
393   case TargetOpcode::G_BITCAST:
394   case TargetOpcode::G_LOAD:
395     // Those ID must match getInstrAlternativeMappings.
396     assert((OpdMapper.getInstrMapping().getID() >= 1 &&
397             OpdMapper.getInstrMapping().getID() <= 4) &&
398            "Don't know how to handle that ID");
399     return applyDefaultMapping(OpdMapper);
400   default:
401     llvm_unreachable("Don't know how to handle that operation");
402   }
403 }
404 
405 /// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
406 /// having only floating-point operands.
407 static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
408   switch (Opc) {
409   case TargetOpcode::G_FADD:
410   case TargetOpcode::G_FSUB:
411   case TargetOpcode::G_FMUL:
412   case TargetOpcode::G_FMA:
413   case TargetOpcode::G_FDIV:
414   case TargetOpcode::G_FCONSTANT:
415   case TargetOpcode::G_FPEXT:
416   case TargetOpcode::G_FPTRUNC:
417   case TargetOpcode::G_FCEIL:
418   case TargetOpcode::G_FFLOOR:
419   case TargetOpcode::G_FNEARBYINT:
420   case TargetOpcode::G_FNEG:
421   case TargetOpcode::G_FCOS:
422   case TargetOpcode::G_FSIN:
423   case TargetOpcode::G_FLOG10:
424   case TargetOpcode::G_FLOG:
425   case TargetOpcode::G_FLOG2:
426   case TargetOpcode::G_FSQRT:
427   case TargetOpcode::G_FABS:
428   case TargetOpcode::G_FEXP:
429   case TargetOpcode::G_FRINT:
430   case TargetOpcode::G_INTRINSIC_TRUNC:
431   case TargetOpcode::G_INTRINSIC_ROUND:
432   case TargetOpcode::G_FMAXNUM:
433   case TargetOpcode::G_FMINNUM:
434   case TargetOpcode::G_FMAXIMUM:
435   case TargetOpcode::G_FMINIMUM:
436     return true;
437   }
438   return false;
439 }
440 
441 const RegisterBankInfo::InstructionMapping &
442 AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
443     const MachineInstr &MI) const {
444   const unsigned Opc = MI.getOpcode();
445   const MachineFunction &MF = *MI.getParent()->getParent();
446   const MachineRegisterInfo &MRI = MF.getRegInfo();
447 
448   unsigned NumOperands = MI.getNumOperands();
449   assert(NumOperands <= 3 &&
450          "This code is for instructions with 3 or less operands");
451 
452   LLT Ty = MRI.getType(MI.getOperand(0).getReg());
453   unsigned Size = Ty.getSizeInBits();
454   bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
455 
456   PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR;
457 
458 #ifndef NDEBUG
459   // Make sure all the operands are using similar size and type.
460   // Should probably be checked by the machine verifier.
461   // This code won't catch cases where the number of lanes is
462   // different between the operands.
463   // If we want to go to that level of details, it is probably
464   // best to check that the types are the same, period.
465   // Currently, we just check that the register banks are the same
466   // for each types.
467   for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
468     LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
469     assert(
470         AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(
471             RBIdx, OpTy.getSizeInBits()) ==
472             AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) &&
473         "Operand has incompatible size");
474     bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
475     (void)OpIsFPR;
476     assert(IsFPR == OpIsFPR && "Operand has incompatible type");
477   }
478 #endif // End NDEBUG.
479 
480   return getInstructionMapping(DefaultMappingID, 1,
481                                getValueMapping(RBIdx, Size), NumOperands);
482 }
483 
484 /// \returns true if a given intrinsic \p ID only uses and defines FPRs.
485 static bool isFPIntrinsic(unsigned ID) {
486   // TODO: Add more intrinsics.
487   switch (ID) {
488   default:
489     return false;
490   case Intrinsic::aarch64_neon_uaddlv:
491     return true;
492   }
493 }
494 
495 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
496                                                const MachineRegisterInfo &MRI,
497                                                const TargetRegisterInfo &TRI,
498                                                unsigned Depth) const {
499   unsigned Op = MI.getOpcode();
500   if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MI.getIntrinsicID()))
501     return true;
502 
503   // Do we have an explicit floating point instruction?
504   if (isPreISelGenericFloatingPointOpcode(Op))
505     return true;
506 
507   // No. Check if we have a copy-like instruction. If we do, then we could
508   // still be fed by floating point instructions.
509   if (Op != TargetOpcode::COPY && !MI.isPHI() &&
510       !isPreISelGenericOptimizationHint(Op))
511     return false;
512 
513   // Check if we already know the register bank.
514   auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
515   if (RB == &AArch64::FPRRegBank)
516     return true;
517   if (RB == &AArch64::GPRRegBank)
518     return false;
519 
520   // We don't know anything.
521   //
522   // If we have a phi, we may be able to infer that it will be assigned a FPR
523   // based off of its inputs.
524   if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
525     return false;
526 
527   return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
528     return Op.isReg() &&
529            onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
530   });
531 }
532 
533 bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
534                                          const MachineRegisterInfo &MRI,
535                                          const TargetRegisterInfo &TRI,
536                                          unsigned Depth) const {
537   switch (MI.getOpcode()) {
538   case TargetOpcode::G_FPTOSI:
539   case TargetOpcode::G_FPTOUI:
540   case TargetOpcode::G_FCMP:
541   case TargetOpcode::G_LROUND:
542   case TargetOpcode::G_LLROUND:
543     return true;
544   default:
545     break;
546   }
547   return hasFPConstraints(MI, MRI, TRI, Depth);
548 }
549 
550 bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
551                                             const MachineRegisterInfo &MRI,
552                                             const TargetRegisterInfo &TRI,
553                                             unsigned Depth) const {
554   switch (MI.getOpcode()) {
555   case AArch64::G_DUP:
556   case TargetOpcode::G_SITOFP:
557   case TargetOpcode::G_UITOFP:
558   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
559   case TargetOpcode::G_INSERT_VECTOR_ELT:
560   case TargetOpcode::G_BUILD_VECTOR:
561   case TargetOpcode::G_BUILD_VECTOR_TRUNC:
562     return true;
563   default:
564     break;
565   }
566   return hasFPConstraints(MI, MRI, TRI, Depth);
567 }
568 
569 const RegisterBankInfo::InstructionMapping &
570 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
571   const unsigned Opc = MI.getOpcode();
572 
573   // Try the default logic for non-generic instructions that are either copies
574   // or already have some operands assigned to banks.
575   if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) ||
576       Opc == TargetOpcode::G_PHI) {
577     const RegisterBankInfo::InstructionMapping &Mapping =
578         getInstrMappingImpl(MI);
579     if (Mapping.isValid())
580       return Mapping;
581   }
582 
583   const MachineFunction &MF = *MI.getParent()->getParent();
584   const MachineRegisterInfo &MRI = MF.getRegInfo();
585   const TargetSubtargetInfo &STI = MF.getSubtarget();
586   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
587 
588   switch (Opc) {
589     // G_{F|S|U}REM are not listed because they are not legal.
590     // Arithmetic ops.
591   case TargetOpcode::G_ADD:
592   case TargetOpcode::G_SUB:
593   case TargetOpcode::G_PTR_ADD:
594   case TargetOpcode::G_MUL:
595   case TargetOpcode::G_SDIV:
596   case TargetOpcode::G_UDIV:
597     // Bitwise ops.
598   case TargetOpcode::G_AND:
599   case TargetOpcode::G_OR:
600   case TargetOpcode::G_XOR:
601     // Floating point ops.
602   case TargetOpcode::G_FADD:
603   case TargetOpcode::G_FSUB:
604   case TargetOpcode::G_FMUL:
605   case TargetOpcode::G_FDIV:
606   case TargetOpcode::G_FMAXIMUM:
607   case TargetOpcode::G_FMINIMUM:
608     return getSameKindOfOperandsMapping(MI);
609   case TargetOpcode::G_FPEXT: {
610     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
611     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
612     return getInstructionMapping(
613         DefaultMappingID, /*Cost*/ 1,
614         getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()),
615         /*NumOperands*/ 2);
616   }
617     // Shifts.
618   case TargetOpcode::G_SHL:
619   case TargetOpcode::G_LSHR:
620   case TargetOpcode::G_ASHR: {
621     LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg());
622     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
623     if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32)
624       return getInstructionMapping(DefaultMappingID, 1,
625                                    &ValMappings[Shift64Imm], 3);
626     return getSameKindOfOperandsMapping(MI);
627   }
628   case TargetOpcode::COPY: {
629     Register DstReg = MI.getOperand(0).getReg();
630     Register SrcReg = MI.getOperand(1).getReg();
631     // Check if one of the register is not a generic register.
632     if ((DstReg.isPhysical() || !MRI.getType(DstReg).isValid()) ||
633         (SrcReg.isPhysical() || !MRI.getType(SrcReg).isValid())) {
634       const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI);
635       const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI);
636       if (!DstRB)
637         DstRB = SrcRB;
638       else if (!SrcRB)
639         SrcRB = DstRB;
640       // If both RB are null that means both registers are generic.
641       // We shouldn't be here.
642       assert(DstRB && SrcRB && "Both RegBank were nullptr");
643       unsigned Size = getSizeInBits(DstReg, MRI, TRI);
644       return getInstructionMapping(
645           DefaultMappingID, copyCost(*DstRB, *SrcRB, Size),
646           getCopyMapping(DstRB->getID(), SrcRB->getID(), Size),
647           // We only care about the mapping of the destination.
648           /*NumOperands*/ 1);
649     }
650     // Both registers are generic, use G_BITCAST.
651     [[fallthrough]];
652   }
653   case TargetOpcode::G_BITCAST: {
654     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
655     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
656     unsigned Size = DstTy.getSizeInBits();
657     bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64;
658     bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64;
659     const RegisterBank &DstRB =
660         DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
661     const RegisterBank &SrcRB =
662         SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
663     return getInstructionMapping(
664         DefaultMappingID, copyCost(DstRB, SrcRB, Size),
665         getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
666         // We only care about the mapping of the destination for COPY.
667         /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1);
668   }
669   default:
670     break;
671   }
672 
673   unsigned NumOperands = MI.getNumOperands();
674 
675   // Track the size and bank of each register.  We don't do partial mappings.
676   SmallVector<unsigned, 4> OpSize(NumOperands);
677   SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
678   for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
679     auto &MO = MI.getOperand(Idx);
680     if (!MO.isReg() || !MO.getReg())
681       continue;
682 
683     LLT Ty = MRI.getType(MO.getReg());
684     if (!Ty.isValid())
685       continue;
686     OpSize[Idx] = Ty.getSizeInBits();
687 
688     // As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs.
689     // For floating-point instructions, scalars go in FPRs.
690     if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc) ||
691         Ty.getSizeInBits() > 64)
692       OpRegBankIdx[Idx] = PMI_FirstFPR;
693     else
694       OpRegBankIdx[Idx] = PMI_FirstGPR;
695   }
696 
697   unsigned Cost = 1;
698   // Some of the floating-point instructions have mixed GPR and FPR operands:
699   // fine-tune the computed mapping.
700   switch (Opc) {
701   case AArch64::G_DUP: {
702     Register ScalarReg = MI.getOperand(1).getReg();
703     LLT ScalarTy = MRI.getType(ScalarReg);
704     auto ScalarDef = MRI.getVRegDef(ScalarReg);
705     // s8 is an exception for G_DUP, which we always want on gpr.
706     if (ScalarTy.getSizeInBits() != 8 &&
707         (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
708          onlyDefinesFP(*ScalarDef, MRI, TRI)))
709       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
710     else
711       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
712     break;
713   }
714   case TargetOpcode::G_TRUNC: {
715     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
716     if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128)
717       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
718     break;
719   }
720   case TargetOpcode::G_SITOFP:
721   case TargetOpcode::G_UITOFP: {
722     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
723       break;
724     // Integer to FP conversions don't necessarily happen between GPR -> FPR
725     // regbanks. They can also be done within an FPR register.
726     Register SrcReg = MI.getOperand(1).getReg();
727     if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
728       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
729     else
730       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
731     break;
732   }
733   case TargetOpcode::G_FPTOSI:
734   case TargetOpcode::G_FPTOUI:
735     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
736       break;
737     OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
738     break;
739   case TargetOpcode::G_FCMP: {
740     // If the result is a vector, it must use a FPR.
741     AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 =
742         MRI.getType(MI.getOperand(0).getReg()).isVector() ? PMI_FirstFPR
743                                                           : PMI_FirstGPR;
744     OpRegBankIdx = {Idx0,
745                     /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR};
746     break;
747   }
748   case TargetOpcode::G_BITCAST:
749     // This is going to be a cross register bank copy and this is expensive.
750     if (OpRegBankIdx[0] != OpRegBankIdx[1])
751       Cost = copyCost(
752           *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank,
753           *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
754           OpSize[0]);
755     break;
756   case TargetOpcode::G_LOAD:
757     // Loading in vector unit is slightly more expensive.
758     // This is actually only true for the LD1R and co instructions,
759     // but anyway for the fast mode this number does not matter and
760     // for the greedy mode the cost of the cross bank copy will
761     // offset this number.
762     // FIXME: Should be derived from the scheduling model.
763     if (OpRegBankIdx[0] != PMI_FirstGPR) {
764       Cost = 2;
765       break;
766     }
767 
768     if (cast<GLoad>(MI).isAtomic()) {
769       // Atomics always use GPR destinations. Don't refine any further.
770       OpRegBankIdx[0] = PMI_FirstGPR;
771       break;
772     }
773 
774     // Check if that load feeds fp instructions.
775     // In that case, we want the default mapping to be on FPR
776     // instead of blind map every scalar to GPR.
777     if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
778                [&](const MachineInstr &UseMI) {
779                  // If we have at least one direct use in a FP instruction,
780                  // assume this was a floating point load in the IR. If it was
781                  // not, we would have had a bitcast before reaching that
782                  // instruction.
783                  //
784                  // Int->FP conversion operations are also captured in
785                  // onlyDefinesFP().
786                  return onlyUsesFP(UseMI, MRI, TRI) ||
787                         onlyDefinesFP(UseMI, MRI, TRI);
788                }))
789       OpRegBankIdx[0] = PMI_FirstFPR;
790     break;
791   case TargetOpcode::G_STORE:
792     // Check if that store is fed by fp instructions.
793     if (OpRegBankIdx[0] == PMI_FirstGPR) {
794       Register VReg = MI.getOperand(0).getReg();
795       if (!VReg)
796         break;
797       MachineInstr *DefMI = MRI.getVRegDef(VReg);
798       if (onlyDefinesFP(*DefMI, MRI, TRI))
799         OpRegBankIdx[0] = PMI_FirstFPR;
800       break;
801     }
802     break;
803   case TargetOpcode::G_SELECT: {
804     // If the destination is FPR, preserve that.
805     if (OpRegBankIdx[0] != PMI_FirstGPR)
806       break;
807 
808     // If we're taking in vectors, we have no choice but to put everything on
809     // FPRs, except for the condition. The condition must always be on a GPR.
810     LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
811     if (SrcTy.isVector()) {
812       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
813       break;
814     }
815 
816     // Try to minimize the number of copies. If we have more floating point
817     // constrained values than not, then we'll put everything on FPR. Otherwise,
818     // everything has to be on GPR.
819     unsigned NumFP = 0;
820 
821     // Check if the uses of the result always produce floating point values.
822     //
823     // For example:
824     //
825     // %z = G_SELECT %cond %x %y
826     // fpr = G_FOO %z ...
827     if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
828                [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
829       ++NumFP;
830 
831     // Check if the defs of the source values always produce floating point
832     // values.
833     //
834     // For example:
835     //
836     // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
837     // %z = G_SELECT %cond %x %y
838     //
839     // Also check whether or not the sources have already been decided to be
840     // FPR. Keep track of this.
841     //
842     // This doesn't check the condition, since it's just whatever is in NZCV.
843     // This isn't passed explicitly in a register to fcsel/csel.
844     for (unsigned Idx = 2; Idx < 4; ++Idx) {
845       Register VReg = MI.getOperand(Idx).getReg();
846       MachineInstr *DefMI = MRI.getVRegDef(VReg);
847       if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
848           onlyDefinesFP(*DefMI, MRI, TRI))
849         ++NumFP;
850     }
851 
852     // If we have more FP constraints than not, then move everything over to
853     // FPR.
854     if (NumFP >= 2)
855       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
856 
857     break;
858   }
859   case TargetOpcode::G_UNMERGE_VALUES: {
860     // If the first operand belongs to a FPR register bank, then make sure that
861     // we preserve that.
862     if (OpRegBankIdx[0] != PMI_FirstGPR)
863       break;
864 
865     LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
866     // UNMERGE into scalars from a vector should always use FPR.
867     // Likewise if any of the uses are FP instructions.
868     if (SrcTy.isVector() || SrcTy == LLT::scalar(128) ||
869         any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
870                [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
871       // Set the register bank of every operand to FPR.
872       for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
873            Idx < NumOperands; ++Idx)
874         OpRegBankIdx[Idx] = PMI_FirstFPR;
875     }
876     break;
877   }
878   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
879     // Destination and source need to be FPRs.
880     OpRegBankIdx[0] = PMI_FirstFPR;
881     OpRegBankIdx[1] = PMI_FirstFPR;
882 
883     // Index needs to be a GPR.
884     OpRegBankIdx[2] = PMI_FirstGPR;
885     break;
886   case TargetOpcode::G_INSERT_VECTOR_ELT:
887     OpRegBankIdx[0] = PMI_FirstFPR;
888     OpRegBankIdx[1] = PMI_FirstFPR;
889 
890     // The element may be either a GPR or FPR. Preserve that behaviour.
891     if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank)
892       OpRegBankIdx[2] = PMI_FirstFPR;
893     else
894       OpRegBankIdx[2] = PMI_FirstGPR;
895 
896     // Index needs to be a GPR.
897     OpRegBankIdx[3] = PMI_FirstGPR;
898     break;
899   case TargetOpcode::G_EXTRACT: {
900     // For s128 sources we have to use fpr unless we know otherwise.
901     auto Src = MI.getOperand(1).getReg();
902     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
903     if (SrcTy.getSizeInBits() != 128)
904       break;
905     auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass
906                    ? PMI_FirstGPR
907                    : PMI_FirstFPR;
908     OpRegBankIdx[0] = Idx;
909     OpRegBankIdx[1] = Idx;
910     break;
911   }
912   case TargetOpcode::G_BUILD_VECTOR: {
913     // If the first source operand belongs to a FPR register bank, then make
914     // sure that we preserve that.
915     if (OpRegBankIdx[1] != PMI_FirstGPR)
916       break;
917     Register VReg = MI.getOperand(1).getReg();
918     if (!VReg)
919       break;
920 
921     // Get the instruction that defined the source operand reg, and check if
922     // it's a floating point operation. Or, if it's a type like s16 which
923     // doesn't have a exact size gpr register class. The exception is if the
924     // build_vector has all constant operands, which may be better to leave as
925     // gpr without copies, so it can be matched in imported patterns.
926     MachineInstr *DefMI = MRI.getVRegDef(VReg);
927     unsigned DefOpc = DefMI->getOpcode();
928     const LLT SrcTy = MRI.getType(VReg);
929     if (all_of(MI.operands(), [&](const MachineOperand &Op) {
930           return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() ==
931                                    TargetOpcode::G_CONSTANT;
932         }))
933       break;
934     if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
935         SrcTy.getSizeInBits() < 32 ||
936         getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) {
937       // Have a floating point op.
938       // Make sure every operand gets mapped to a FPR register class.
939       unsigned NumOperands = MI.getNumOperands();
940       for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
941         OpRegBankIdx[Idx] = PMI_FirstFPR;
942     }
943     break;
944   }
945   case TargetOpcode::G_VECREDUCE_FADD:
946   case TargetOpcode::G_VECREDUCE_FMUL:
947   case TargetOpcode::G_VECREDUCE_FMAX:
948   case TargetOpcode::G_VECREDUCE_FMIN:
949   case TargetOpcode::G_VECREDUCE_ADD:
950   case TargetOpcode::G_VECREDUCE_MUL:
951   case TargetOpcode::G_VECREDUCE_AND:
952   case TargetOpcode::G_VECREDUCE_OR:
953   case TargetOpcode::G_VECREDUCE_XOR:
954   case TargetOpcode::G_VECREDUCE_SMAX:
955   case TargetOpcode::G_VECREDUCE_SMIN:
956   case TargetOpcode::G_VECREDUCE_UMAX:
957   case TargetOpcode::G_VECREDUCE_UMIN:
958     // Reductions produce a scalar value from a vector, the scalar should be on
959     // FPR bank.
960     OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
961     break;
962   case TargetOpcode::G_VECREDUCE_SEQ_FADD:
963   case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
964     // These reductions also take a scalar accumulator input.
965     // Assign them FPR for now.
966     OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
967     break;
968   case TargetOpcode::G_INTRINSIC: {
969     // Check if we know that the intrinsic has any constraints on its register
970     // banks. If it does, then update the mapping accordingly.
971     unsigned ID = MI.getIntrinsicID();
972     unsigned Idx = 0;
973     if (!isFPIntrinsic(ID))
974       break;
975     for (const auto &Op : MI.explicit_operands()) {
976       if (Op.isReg())
977         OpRegBankIdx[Idx] = PMI_FirstFPR;
978       ++Idx;
979     }
980     break;
981   }
982   case TargetOpcode::G_LROUND:
983   case TargetOpcode::G_LLROUND: {
984     // Source is always floating point and destination is always integer.
985     OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
986     break;
987   }
988   }
989 
990   // Finally construct the computed mapping.
991   SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
992   for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
993     if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) {
994       LLT Ty = MRI.getType(MI.getOperand(Idx).getReg());
995       if (!Ty.isValid())
996         continue;
997       auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
998       if (!Mapping->isValid())
999         return getInvalidInstructionMapping();
1000 
1001       OpdsMapping[Idx] = Mapping;
1002     }
1003   }
1004 
1005   return getInstructionMapping(DefaultMappingID, Cost,
1006                                getOperandsMapping(OpdsMapping), NumOperands);
1007 }
1008