1 //===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the RegisterBankInfo class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64RegisterBankInfo.h"
15 #include "AArch64RegisterInfo.h"
16 #include "MCTargetDesc/AArch64MCTargetDesc.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20 #include "llvm/CodeGen/GlobalISel/Utils.h"
21 #include "llvm/CodeGen/LowLevelTypeUtils.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstr.h"
24 #include "llvm/CodeGen/MachineOperand.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/RegisterBank.h"
27 #include "llvm/CodeGen/RegisterBankInfo.h"
28 #include "llvm/CodeGen/TargetOpcodes.h"
29 #include "llvm/CodeGen/TargetRegisterInfo.h"
30 #include "llvm/CodeGen/TargetSubtargetInfo.h"
31 #include "llvm/IR/IntrinsicsAArch64.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/Threading.h"
34 #include <algorithm>
35 #include <cassert>
36 
37 #define GET_TARGET_REGBANK_IMPL
38 #include "AArch64GenRegisterBank.inc"
39 
40 // This file will be TableGen'ed at some point.
41 #include "AArch64GenRegisterBankInfo.def"
42 
43 using namespace llvm;
44 
AArch64RegisterBankInfo(const TargetRegisterInfo & TRI)45 AArch64RegisterBankInfo::AArch64RegisterBankInfo(
46     const TargetRegisterInfo &TRI) {
47   static llvm::once_flag InitializeRegisterBankFlag;
48 
49   static auto InitializeRegisterBankOnce = [&]() {
50     // We have only one set of register banks, whatever the subtarget
51     // is. Therefore, the initialization of the RegBanks table should be
52     // done only once. Indeed the table of all register banks
53     // (AArch64::RegBanks) is unique in the compiler. At some point, it
54     // will get tablegen'ed and the whole constructor becomes empty.
55 
56     const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID);
57     (void)RBGPR;
58     assert(&AArch64::GPRRegBank == &RBGPR &&
59            "The order in RegBanks is messed up");
60 
61     const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID);
62     (void)RBFPR;
63     assert(&AArch64::FPRRegBank == &RBFPR &&
64            "The order in RegBanks is messed up");
65 
66     const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID);
67     (void)RBCCR;
68     assert(&AArch64::CCRegBank == &RBCCR &&
69            "The order in RegBanks is messed up");
70 
71     // The GPR register bank is fully defined by all the registers in
72     // GR64all + its subclasses.
73     assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) &&
74            "Subclass not added?");
75     assert(getMaximumSize(RBGPR.getID()) == 128 &&
76            "GPRs should hold up to 128-bit");
77 
78     // The FPR register bank is fully defined by all the registers in
79     // GR64all + its subclasses.
80     assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) &&
81            "Subclass not added?");
82     assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) &&
83            "Subclass not added?");
84     assert(getMaximumSize(RBFPR.getID()) == 512 &&
85            "FPRs should hold up to 512-bit via QQQQ sequence");
86 
87     assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) &&
88            "Class not added?");
89     assert(getMaximumSize(RBCCR.getID()) == 32 &&
90            "CCR should hold up to 32-bit");
91 
92     // Check that the TableGen'ed like file is in sync we our expectations.
93     // First, the Idx.
94     assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
95                                   {PMI_GPR32, PMI_GPR64, PMI_GPR128}) &&
96            "PartialMappingIdx's are incorrectly ordered");
97     assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR,
98                                   {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128,
99                                    PMI_FPR256, PMI_FPR512}) &&
100            "PartialMappingIdx's are incorrectly ordered");
101 // Now, the content.
102 // Check partial mapping.
103 #define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB)                      \
104   do {                                                                         \
105     assert(                                                                    \
106         checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \
107         #Idx " is incorrectly initialized");                                   \
108   } while (false)
109 
110     CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
111     CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
112     CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR);
113     CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR);
114     CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
115     CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
116     CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
117     CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR);
118     CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR);
119 
120 // Check value mapping.
121 #define CHECK_VALUEMAP_IMPL(RBName, Size, Offset)                              \
122   do {                                                                         \
123     assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size,            \
124                              PartialMappingIdx::PMI_First##RBName, Size,       \
125                              Offset) &&                                        \
126            #RBName #Size " " #Offset " is incorrectly initialized");           \
127   } while (false)
128 
129 #define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
130 
131     CHECK_VALUEMAP(GPR, 32);
132     CHECK_VALUEMAP(GPR, 64);
133     CHECK_VALUEMAP(GPR, 128);
134     CHECK_VALUEMAP(FPR, 16);
135     CHECK_VALUEMAP(FPR, 32);
136     CHECK_VALUEMAP(FPR, 64);
137     CHECK_VALUEMAP(FPR, 128);
138     CHECK_VALUEMAP(FPR, 256);
139     CHECK_VALUEMAP(FPR, 512);
140 
141 // Check the value mapping for 3-operands instructions where all the operands
142 // map to the same value mapping.
143 #define CHECK_VALUEMAP_3OPS(RBName, Size)                                      \
144   do {                                                                         \
145     CHECK_VALUEMAP_IMPL(RBName, Size, 0);                                      \
146     CHECK_VALUEMAP_IMPL(RBName, Size, 1);                                      \
147     CHECK_VALUEMAP_IMPL(RBName, Size, 2);                                      \
148   } while (false)
149 
150     CHECK_VALUEMAP_3OPS(GPR, 32);
151     CHECK_VALUEMAP_3OPS(GPR, 64);
152     CHECK_VALUEMAP_3OPS(GPR, 128);
153     CHECK_VALUEMAP_3OPS(FPR, 32);
154     CHECK_VALUEMAP_3OPS(FPR, 64);
155     CHECK_VALUEMAP_3OPS(FPR, 128);
156     CHECK_VALUEMAP_3OPS(FPR, 256);
157     CHECK_VALUEMAP_3OPS(FPR, 512);
158 
159 #define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size)                 \
160   do {                                                                         \
161     unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min;               \
162     unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min;               \
163     (void)PartialMapDstIdx;                                                    \
164     (void)PartialMapSrcIdx;                                                    \
165     const ValueMapping *Map = getCopyMapping(                                  \
166         AArch64::RBNameDst##RegBankID, AArch64::RBNameSrc##RegBankID, Size);  \
167     (void)Map;                                                                 \
168     assert(Map[0].BreakDown ==                                                 \
169                &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] &&  \
170            Map[0].NumBreakDowns == 1 && #RBNameDst #Size                       \
171            " Dst is incorrectly initialized");                                 \
172     assert(Map[1].BreakDown ==                                                 \
173                &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] &&  \
174            Map[1].NumBreakDowns == 1 && #RBNameSrc #Size                       \
175            " Src is incorrectly initialized");                                 \
176                                                                                \
177   } while (false)
178 
179     CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
180     CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32);
181     CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64);
182     CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64);
183     CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32);
184     CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32);
185     CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64);
186     CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64);
187 
188 #define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize)                                 \
189   do {                                                                         \
190     unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min;                    \
191     unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min;                    \
192     (void)PartialMapDstIdx;                                                    \
193     (void)PartialMapSrcIdx;                                                    \
194     const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize);               \
195     (void)Map;                                                                 \
196     assert(Map[0].BreakDown ==                                                 \
197                &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] &&  \
198            Map[0].NumBreakDowns == 1 && "FPR" #DstSize                         \
199                                         " Dst is incorrectly initialized");    \
200     assert(Map[1].BreakDown ==                                                 \
201                &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] &&  \
202            Map[1].NumBreakDowns == 1 && "FPR" #SrcSize                         \
203                                         " Src is incorrectly initialized");    \
204                                                                                \
205   } while (false)
206 
207     CHECK_VALUEMAP_FPEXT(32, 16);
208     CHECK_VALUEMAP_FPEXT(64, 16);
209     CHECK_VALUEMAP_FPEXT(64, 32);
210     CHECK_VALUEMAP_FPEXT(128, 64);
211 
212     assert(verify(TRI) && "Invalid register bank information");
213   };
214 
215   llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
216 }
217 
copyCost(const RegisterBank & A,const RegisterBank & B,TypeSize Size) const218 unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A,
219                                            const RegisterBank &B,
220                                            TypeSize Size) const {
221   // What do we do with different size?
222   // copy are same size.
223   // Will introduce other hooks for different size:
224   // * extract cost.
225   // * build_sequence cost.
226 
227   // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV.
228   // FIXME: This should be deduced from the scheduling model.
229   if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank)
230     // FMOVXDr or FMOVWSr.
231     return 5;
232   if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank)
233     // FMOVDXr or FMOVSWr.
234     return 4;
235 
236   return RegisterBankInfo::copyCost(A, B, Size);
237 }
238 
239 const RegisterBank &
getRegBankFromRegClass(const TargetRegisterClass & RC,LLT) const240 AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
241                                                 LLT) const {
242   switch (RC.getID()) {
243   case AArch64::FPR8RegClassID:
244   case AArch64::FPR16RegClassID:
245   case AArch64::FPR16_loRegClassID:
246   case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID:
247   case AArch64::FPR32RegClassID:
248   case AArch64::FPR64RegClassID:
249   case AArch64::FPR128RegClassID:
250   case AArch64::FPR64_loRegClassID:
251   case AArch64::FPR128_loRegClassID:
252   case AArch64::FPR128_0to7RegClassID:
253   case AArch64::DDRegClassID:
254   case AArch64::DDDRegClassID:
255   case AArch64::DDDDRegClassID:
256   case AArch64::QQRegClassID:
257   case AArch64::QQQRegClassID:
258   case AArch64::QQQQRegClassID:
259     return getRegBank(AArch64::FPRRegBankID);
260   case AArch64::GPR32commonRegClassID:
261   case AArch64::GPR32RegClassID:
262   case AArch64::GPR32spRegClassID:
263   case AArch64::GPR32sponlyRegClassID:
264   case AArch64::GPR32argRegClassID:
265   case AArch64::GPR32allRegClassID:
266   case AArch64::GPR64commonRegClassID:
267   case AArch64::GPR64RegClassID:
268   case AArch64::GPR64spRegClassID:
269   case AArch64::GPR64sponlyRegClassID:
270   case AArch64::GPR64argRegClassID:
271   case AArch64::GPR64allRegClassID:
272   case AArch64::GPR64noipRegClassID:
273   case AArch64::GPR64common_and_GPR64noipRegClassID:
274   case AArch64::GPR64noip_and_tcGPR64RegClassID:
275   case AArch64::tcGPR64RegClassID:
276   case AArch64::rtcGPR64RegClassID:
277   case AArch64::WSeqPairsClassRegClassID:
278   case AArch64::XSeqPairsClassRegClassID:
279   case AArch64::MatrixIndexGPR32_8_11RegClassID:
280   case AArch64::MatrixIndexGPR32_12_15RegClassID:
281   case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_8_11RegClassID:
282   case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_12_15RegClassID:
283     return getRegBank(AArch64::GPRRegBankID);
284   case AArch64::CCRRegClassID:
285     return getRegBank(AArch64::CCRegBankID);
286   default:
287     llvm_unreachable("Register class not supported");
288   }
289 }
290 
291 RegisterBankInfo::InstructionMappings
getInstrAlternativeMappings(const MachineInstr & MI) const292 AArch64RegisterBankInfo::getInstrAlternativeMappings(
293     const MachineInstr &MI) const {
294   const MachineFunction &MF = *MI.getParent()->getParent();
295   const TargetSubtargetInfo &STI = MF.getSubtarget();
296   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
297   const MachineRegisterInfo &MRI = MF.getRegInfo();
298 
299   switch (MI.getOpcode()) {
300   case TargetOpcode::G_OR: {
301     // 32 and 64-bit or can be mapped on either FPR or
302     // GPR for the same cost.
303     unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
304     if (Size != 32 && Size != 64)
305       break;
306 
307     // If the instruction has any implicit-defs or uses,
308     // do not mess with it.
309     if (MI.getNumOperands() != 3)
310       break;
311     InstructionMappings AltMappings;
312     const InstructionMapping &GPRMapping = getInstructionMapping(
313         /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size),
314         /*NumOperands*/ 3);
315     const InstructionMapping &FPRMapping = getInstructionMapping(
316         /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size),
317         /*NumOperands*/ 3);
318 
319     AltMappings.push_back(&GPRMapping);
320     AltMappings.push_back(&FPRMapping);
321     return AltMappings;
322   }
323   case TargetOpcode::G_BITCAST: {
324     unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
325     if (Size != 32 && Size != 64)
326       break;
327 
328     // If the instruction has any implicit-defs or uses,
329     // do not mess with it.
330     if (MI.getNumOperands() != 2)
331       break;
332 
333     InstructionMappings AltMappings;
334     const InstructionMapping &GPRMapping = getInstructionMapping(
335         /*ID*/ 1, /*Cost*/ 1,
336         getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size),
337         /*NumOperands*/ 2);
338     const InstructionMapping &FPRMapping = getInstructionMapping(
339         /*ID*/ 2, /*Cost*/ 1,
340         getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size),
341         /*NumOperands*/ 2);
342     const InstructionMapping &GPRToFPRMapping = getInstructionMapping(
343         /*ID*/ 3,
344         /*Cost*/
345         copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank,
346                  TypeSize::getFixed(Size)),
347         getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size),
348         /*NumOperands*/ 2);
349     const InstructionMapping &FPRToGPRMapping = getInstructionMapping(
350         /*ID*/ 3,
351         /*Cost*/
352         copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank,
353                  TypeSize::getFixed(Size)),
354         getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size),
355         /*NumOperands*/ 2);
356 
357     AltMappings.push_back(&GPRMapping);
358     AltMappings.push_back(&FPRMapping);
359     AltMappings.push_back(&GPRToFPRMapping);
360     AltMappings.push_back(&FPRToGPRMapping);
361     return AltMappings;
362   }
363   case TargetOpcode::G_LOAD: {
364     unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
365     if (Size != 64)
366       break;
367 
368     // If the instruction has any implicit-defs or uses,
369     // do not mess with it.
370     if (MI.getNumOperands() != 2)
371       break;
372 
373     InstructionMappings AltMappings;
374     const InstructionMapping &GPRMapping = getInstructionMapping(
375         /*ID*/ 1, /*Cost*/ 1,
376         getOperandsMapping({getValueMapping(PMI_FirstGPR, Size),
377                             // Addresses are GPR 64-bit.
378                             getValueMapping(PMI_FirstGPR, 64)}),
379         /*NumOperands*/ 2);
380     const InstructionMapping &FPRMapping = getInstructionMapping(
381         /*ID*/ 2, /*Cost*/ 1,
382         getOperandsMapping({getValueMapping(PMI_FirstFPR, Size),
383                             // Addresses are GPR 64-bit.
384                             getValueMapping(PMI_FirstGPR, 64)}),
385         /*NumOperands*/ 2);
386 
387     AltMappings.push_back(&GPRMapping);
388     AltMappings.push_back(&FPRMapping);
389     return AltMappings;
390   }
391   default:
392     break;
393   }
394   return RegisterBankInfo::getInstrAlternativeMappings(MI);
395 }
396 
applyMappingImpl(MachineIRBuilder & Builder,const OperandsMapper & OpdMapper) const397 void AArch64RegisterBankInfo::applyMappingImpl(
398     MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const {
399   switch (OpdMapper.getMI().getOpcode()) {
400   case TargetOpcode::G_OR:
401   case TargetOpcode::G_BITCAST:
402   case TargetOpcode::G_LOAD:
403     // Those ID must match getInstrAlternativeMappings.
404     assert((OpdMapper.getInstrMapping().getID() >= 1 &&
405             OpdMapper.getInstrMapping().getID() <= 4) &&
406            "Don't know how to handle that ID");
407     return applyDefaultMapping(OpdMapper);
408   default:
409     llvm_unreachable("Don't know how to handle that operation");
410   }
411 }
412 
413 /// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
414 /// having only floating-point operands.
isPreISelGenericFloatingPointOpcode(unsigned Opc)415 static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
416   switch (Opc) {
417   case TargetOpcode::G_FADD:
418   case TargetOpcode::G_FSUB:
419   case TargetOpcode::G_FMUL:
420   case TargetOpcode::G_FMA:
421   case TargetOpcode::G_FDIV:
422   case TargetOpcode::G_FCONSTANT:
423   case TargetOpcode::G_FPEXT:
424   case TargetOpcode::G_FPTRUNC:
425   case TargetOpcode::G_FCEIL:
426   case TargetOpcode::G_FFLOOR:
427   case TargetOpcode::G_FNEARBYINT:
428   case TargetOpcode::G_FNEG:
429   case TargetOpcode::G_FCOS:
430   case TargetOpcode::G_FSIN:
431   case TargetOpcode::G_FLOG10:
432   case TargetOpcode::G_FLOG:
433   case TargetOpcode::G_FLOG2:
434   case TargetOpcode::G_FSQRT:
435   case TargetOpcode::G_FABS:
436   case TargetOpcode::G_FEXP:
437   case TargetOpcode::G_FRINT:
438   case TargetOpcode::G_INTRINSIC_TRUNC:
439   case TargetOpcode::G_INTRINSIC_ROUND:
440   case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
441   case TargetOpcode::G_FMAXNUM:
442   case TargetOpcode::G_FMINNUM:
443   case TargetOpcode::G_FMAXIMUM:
444   case TargetOpcode::G_FMINIMUM:
445     return true;
446   }
447   return false;
448 }
449 
450 const RegisterBankInfo::InstructionMapping &
getSameKindOfOperandsMapping(const MachineInstr & MI) const451 AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
452     const MachineInstr &MI) const {
453   const unsigned Opc = MI.getOpcode();
454   const MachineFunction &MF = *MI.getParent()->getParent();
455   const MachineRegisterInfo &MRI = MF.getRegInfo();
456 
457   unsigned NumOperands = MI.getNumOperands();
458   assert(NumOperands <= 3 &&
459          "This code is for instructions with 3 or less operands");
460 
461   LLT Ty = MRI.getType(MI.getOperand(0).getReg());
462   unsigned Size = Ty.getSizeInBits();
463   bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
464 
465   PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR;
466 
467 #ifndef NDEBUG
468   // Make sure all the operands are using similar size and type.
469   // Should probably be checked by the machine verifier.
470   // This code won't catch cases where the number of lanes is
471   // different between the operands.
472   // If we want to go to that level of details, it is probably
473   // best to check that the types are the same, period.
474   // Currently, we just check that the register banks are the same
475   // for each types.
476   for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
477     LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
478     assert(
479         AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(
480             RBIdx, OpTy.getSizeInBits()) ==
481             AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) &&
482         "Operand has incompatible size");
483     bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
484     (void)OpIsFPR;
485     assert(IsFPR == OpIsFPR && "Operand has incompatible type");
486   }
487 #endif // End NDEBUG.
488 
489   return getInstructionMapping(DefaultMappingID, 1,
490                                getValueMapping(RBIdx, Size), NumOperands);
491 }
492 
493 /// \returns true if a given intrinsic only uses and defines FPRs.
isFPIntrinsic(const MachineRegisterInfo & MRI,const MachineInstr & MI)494 static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
495                           const MachineInstr &MI) {
496   // TODO: Add more intrinsics.
497   switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
498   default:
499     return false;
500   case Intrinsic::aarch64_neon_uaddlv:
501   case Intrinsic::aarch64_neon_uaddv:
502   case Intrinsic::aarch64_neon_saddv:
503   case Intrinsic::aarch64_neon_umaxv:
504   case Intrinsic::aarch64_neon_smaxv:
505   case Intrinsic::aarch64_neon_uminv:
506   case Intrinsic::aarch64_neon_sminv:
507   case Intrinsic::aarch64_neon_faddv:
508   case Intrinsic::aarch64_neon_fmaxv:
509   case Intrinsic::aarch64_neon_fminv:
510   case Intrinsic::aarch64_neon_fmaxnmv:
511   case Intrinsic::aarch64_neon_fminnmv:
512     return true;
513   case Intrinsic::aarch64_neon_saddlv: {
514     const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
515     return SrcTy.getElementType().getSizeInBits() >= 16 &&
516            SrcTy.getElementCount().getFixedValue() >= 4;
517   }
518   }
519 }
520 
hasFPConstraints(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,unsigned Depth) const521 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
522                                                const MachineRegisterInfo &MRI,
523                                                const TargetRegisterInfo &TRI,
524                                                unsigned Depth) const {
525   unsigned Op = MI.getOpcode();
526   if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI))
527     return true;
528 
529   // Do we have an explicit floating point instruction?
530   if (isPreISelGenericFloatingPointOpcode(Op))
531     return true;
532 
533   // No. Check if we have a copy-like instruction. If we do, then we could
534   // still be fed by floating point instructions.
535   if (Op != TargetOpcode::COPY && !MI.isPHI() &&
536       !isPreISelGenericOptimizationHint(Op))
537     return false;
538 
539   // Check if we already know the register bank.
540   auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
541   if (RB == &AArch64::FPRRegBank)
542     return true;
543   if (RB == &AArch64::GPRRegBank)
544     return false;
545 
546   // We don't know anything.
547   //
548   // If we have a phi, we may be able to infer that it will be assigned a FPR
549   // based off of its inputs.
550   if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
551     return false;
552 
553   return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
554     return Op.isReg() &&
555            onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
556   });
557 }
558 
onlyUsesFP(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,unsigned Depth) const559 bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
560                                          const MachineRegisterInfo &MRI,
561                                          const TargetRegisterInfo &TRI,
562                                          unsigned Depth) const {
563   switch (MI.getOpcode()) {
564   case TargetOpcode::G_FPTOSI:
565   case TargetOpcode::G_FPTOUI:
566   case TargetOpcode::G_FCMP:
567   case TargetOpcode::G_LROUND:
568   case TargetOpcode::G_LLROUND:
569     return true;
570   default:
571     break;
572   }
573   return hasFPConstraints(MI, MRI, TRI, Depth);
574 }
575 
onlyDefinesFP(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,unsigned Depth) const576 bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
577                                             const MachineRegisterInfo &MRI,
578                                             const TargetRegisterInfo &TRI,
579                                             unsigned Depth) const {
580   switch (MI.getOpcode()) {
581   case AArch64::G_DUP:
582   case TargetOpcode::G_SITOFP:
583   case TargetOpcode::G_UITOFP:
584   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
585   case TargetOpcode::G_INSERT_VECTOR_ELT:
586   case TargetOpcode::G_BUILD_VECTOR:
587   case TargetOpcode::G_BUILD_VECTOR_TRUNC:
588     return true;
589   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
590     switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
591     case Intrinsic::aarch64_neon_ld1x2:
592     case Intrinsic::aarch64_neon_ld1x3:
593     case Intrinsic::aarch64_neon_ld1x4:
594     case Intrinsic::aarch64_neon_ld2:
595     case Intrinsic::aarch64_neon_ld2lane:
596     case Intrinsic::aarch64_neon_ld2r:
597     case Intrinsic::aarch64_neon_ld3:
598     case Intrinsic::aarch64_neon_ld3lane:
599     case Intrinsic::aarch64_neon_ld3r:
600     case Intrinsic::aarch64_neon_ld4:
601     case Intrinsic::aarch64_neon_ld4lane:
602     case Intrinsic::aarch64_neon_ld4r:
603       return true;
604     default:
605       break;
606     }
607     break;
608   default:
609     break;
610   }
611   return hasFPConstraints(MI, MRI, TRI, Depth);
612 }
613 
isLoadFromFPType(const MachineInstr & MI) const614 bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const {
615   // GMemOperation because we also want to match indexed loads.
616   auto *MemOp = cast<GMemOperation>(&MI);
617   const Value *LdVal = MemOp->getMMO().getValue();
618   if (!LdVal)
619     return false;
620 
621   Type *EltTy = nullptr;
622   if (const GlobalValue *GV = dyn_cast<GlobalValue>(LdVal)) {
623     EltTy = GV->getValueType();
624     // Look at the first element of the struct to determine the type we are
625     // loading
626     while (StructType *StructEltTy = dyn_cast<StructType>(EltTy)) {
627       if (StructEltTy->getNumElements() == 0)
628         break;
629       EltTy = StructEltTy->getTypeAtIndex(0U);
630     }
631     // Look at the first element of the array to determine its type
632     if (isa<ArrayType>(EltTy))
633       EltTy = EltTy->getArrayElementType();
634   } else {
635     // FIXME: grubbing around uses is pretty ugly, but with no more
636     // `getPointerElementType` there's not much else we can do.
637     for (const auto *LdUser : LdVal->users()) {
638       if (isa<LoadInst>(LdUser)) {
639         EltTy = LdUser->getType();
640         break;
641       }
642       if (isa<StoreInst>(LdUser) && LdUser->getOperand(1) == LdVal) {
643         EltTy = LdUser->getOperand(0)->getType();
644         break;
645       }
646     }
647   }
648   return EltTy && EltTy->isFPOrFPVectorTy();
649 }
650 
651 const RegisterBankInfo::InstructionMapping &
getInstrMapping(const MachineInstr & MI) const652 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
653   const unsigned Opc = MI.getOpcode();
654 
655   // Try the default logic for non-generic instructions that are either copies
656   // or already have some operands assigned to banks.
657   if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) ||
658       Opc == TargetOpcode::G_PHI) {
659     const RegisterBankInfo::InstructionMapping &Mapping =
660         getInstrMappingImpl(MI);
661     if (Mapping.isValid())
662       return Mapping;
663   }
664 
665   const MachineFunction &MF = *MI.getParent()->getParent();
666   const MachineRegisterInfo &MRI = MF.getRegInfo();
667   const TargetSubtargetInfo &STI = MF.getSubtarget();
668   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
669 
670   switch (Opc) {
671     // G_{F|S|U}REM are not listed because they are not legal.
672     // Arithmetic ops.
673   case TargetOpcode::G_ADD:
674   case TargetOpcode::G_SUB:
675   case TargetOpcode::G_PTR_ADD:
676   case TargetOpcode::G_MUL:
677   case TargetOpcode::G_SDIV:
678   case TargetOpcode::G_UDIV:
679     // Bitwise ops.
680   case TargetOpcode::G_AND:
681   case TargetOpcode::G_OR:
682   case TargetOpcode::G_XOR:
683     // Floating point ops.
684   case TargetOpcode::G_FADD:
685   case TargetOpcode::G_FSUB:
686   case TargetOpcode::G_FMUL:
687   case TargetOpcode::G_FDIV:
688   case TargetOpcode::G_FMAXIMUM:
689   case TargetOpcode::G_FMINIMUM:
690     return getSameKindOfOperandsMapping(MI);
691   case TargetOpcode::G_FPEXT: {
692     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
693     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
694     return getInstructionMapping(
695         DefaultMappingID, /*Cost*/ 1,
696         getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()),
697         /*NumOperands*/ 2);
698   }
699     // Shifts.
700   case TargetOpcode::G_SHL:
701   case TargetOpcode::G_LSHR:
702   case TargetOpcode::G_ASHR: {
703     LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg());
704     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
705     if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32)
706       return getInstructionMapping(DefaultMappingID, 1,
707                                    &ValMappings[Shift64Imm], 3);
708     return getSameKindOfOperandsMapping(MI);
709   }
710   case TargetOpcode::COPY: {
711     Register DstReg = MI.getOperand(0).getReg();
712     Register SrcReg = MI.getOperand(1).getReg();
713     // Check if one of the register is not a generic register.
714     if ((DstReg.isPhysical() || !MRI.getType(DstReg).isValid()) ||
715         (SrcReg.isPhysical() || !MRI.getType(SrcReg).isValid())) {
716       const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI);
717       const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI);
718       if (!DstRB)
719         DstRB = SrcRB;
720       else if (!SrcRB)
721         SrcRB = DstRB;
722       // If both RB are null that means both registers are generic.
723       // We shouldn't be here.
724       assert(DstRB && SrcRB && "Both RegBank were nullptr");
725       unsigned Size = getSizeInBits(DstReg, MRI, TRI);
726       return getInstructionMapping(
727           DefaultMappingID, copyCost(*DstRB, *SrcRB, TypeSize::getFixed(Size)),
728           getCopyMapping(DstRB->getID(), SrcRB->getID(), Size),
729           // We only care about the mapping of the destination.
730           /*NumOperands*/ 1);
731     }
732     // Both registers are generic, use G_BITCAST.
733     [[fallthrough]];
734   }
735   case TargetOpcode::G_BITCAST: {
736     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
737     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
738     unsigned Size = DstTy.getSizeInBits();
739     bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64;
740     bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64;
741     const RegisterBank &DstRB =
742         DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
743     const RegisterBank &SrcRB =
744         SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
745     return getInstructionMapping(
746         DefaultMappingID, copyCost(DstRB, SrcRB, TypeSize::getFixed(Size)),
747         getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
748         // We only care about the mapping of the destination for COPY.
749         /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1);
750   }
751   default:
752     break;
753   }
754 
755   unsigned NumOperands = MI.getNumOperands();
756 
757   // Track the size and bank of each register.  We don't do partial mappings.
758   SmallVector<unsigned, 4> OpSize(NumOperands);
759   SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
760   for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
761     auto &MO = MI.getOperand(Idx);
762     if (!MO.isReg() || !MO.getReg())
763       continue;
764 
765     LLT Ty = MRI.getType(MO.getReg());
766     if (!Ty.isValid())
767       continue;
768     OpSize[Idx] = Ty.getSizeInBits();
769 
770     // As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs.
771     // For floating-point instructions, scalars go in FPRs.
772     if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc) ||
773         Ty.getSizeInBits() > 64)
774       OpRegBankIdx[Idx] = PMI_FirstFPR;
775     else
776       OpRegBankIdx[Idx] = PMI_FirstGPR;
777   }
778 
779   unsigned Cost = 1;
780   // Some of the floating-point instructions have mixed GPR and FPR operands:
781   // fine-tune the computed mapping.
782   switch (Opc) {
783   case AArch64::G_DUP: {
784     Register ScalarReg = MI.getOperand(1).getReg();
785     LLT ScalarTy = MRI.getType(ScalarReg);
786     auto ScalarDef = MRI.getVRegDef(ScalarReg);
787     // We want to select dup(load) into LD1R.
788     if (ScalarDef->getOpcode() == TargetOpcode::G_LOAD)
789       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
790     // s8 is an exception for G_DUP, which we always want on gpr.
791     else if (ScalarTy.getSizeInBits() != 8 &&
792              (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
793               onlyDefinesFP(*ScalarDef, MRI, TRI)))
794       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
795     else
796       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
797     break;
798   }
799   case TargetOpcode::G_TRUNC: {
800     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
801     if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128)
802       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
803     break;
804   }
805   case TargetOpcode::G_SITOFP:
806   case TargetOpcode::G_UITOFP: {
807     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
808       break;
809     // Integer to FP conversions don't necessarily happen between GPR -> FPR
810     // regbanks. They can also be done within an FPR register.
811     Register SrcReg = MI.getOperand(1).getReg();
812     if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
813       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
814     else
815       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
816     break;
817   }
818   case TargetOpcode::G_FPTOSI:
819   case TargetOpcode::G_FPTOUI:
820     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
821       break;
822     OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
823     break;
824   case TargetOpcode::G_FCMP: {
825     // If the result is a vector, it must use a FPR.
826     AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 =
827         MRI.getType(MI.getOperand(0).getReg()).isVector() ? PMI_FirstFPR
828                                                           : PMI_FirstGPR;
829     OpRegBankIdx = {Idx0,
830                     /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR};
831     break;
832   }
833   case TargetOpcode::G_BITCAST:
834     // This is going to be a cross register bank copy and this is expensive.
835     if (OpRegBankIdx[0] != OpRegBankIdx[1])
836       Cost = copyCost(
837           *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank,
838           *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
839           TypeSize::getFixed(OpSize[0]));
840     break;
841   case TargetOpcode::G_LOAD: {
842     // Loading in vector unit is slightly more expensive.
843     // This is actually only true for the LD1R and co instructions,
844     // but anyway for the fast mode this number does not matter and
845     // for the greedy mode the cost of the cross bank copy will
846     // offset this number.
847     // FIXME: Should be derived from the scheduling model.
848     if (OpRegBankIdx[0] != PMI_FirstGPR) {
849       Cost = 2;
850       break;
851     }
852 
853     if (cast<GLoad>(MI).isAtomic()) {
854       // Atomics always use GPR destinations. Don't refine any further.
855       OpRegBankIdx[0] = PMI_FirstGPR;
856       break;
857     }
858 
859     // Try to guess the type of the load from the MMO.
860     if (isLoadFromFPType(MI)) {
861       OpRegBankIdx[0] = PMI_FirstFPR;
862       break;
863     }
864 
865     // Check if that load feeds fp instructions.
866     // In that case, we want the default mapping to be on FPR
867     // instead of blind map every scalar to GPR.
868     if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
869                [&](const MachineInstr &UseMI) {
870                  // If we have at least one direct use in a FP instruction,
871                  // assume this was a floating point load in the IR. If it was
872                  // not, we would have had a bitcast before reaching that
873                  // instruction.
874                  //
875                  // Int->FP conversion operations are also captured in
876                  // onlyDefinesFP().
877                  return onlyUsesFP(UseMI, MRI, TRI) ||
878                         onlyDefinesFP(UseMI, MRI, TRI);
879                }))
880       OpRegBankIdx[0] = PMI_FirstFPR;
881     break;
882   }
883   case TargetOpcode::G_STORE:
884     // Check if that store is fed by fp instructions.
885     if (OpRegBankIdx[0] == PMI_FirstGPR) {
886       Register VReg = MI.getOperand(0).getReg();
887       if (!VReg)
888         break;
889       MachineInstr *DefMI = MRI.getVRegDef(VReg);
890       if (onlyDefinesFP(*DefMI, MRI, TRI))
891         OpRegBankIdx[0] = PMI_FirstFPR;
892       break;
893     }
894     break;
895   case TargetOpcode::G_INDEXED_STORE:
896     if (OpRegBankIdx[1] == PMI_FirstGPR) {
897       Register VReg = MI.getOperand(1).getReg();
898       if (!VReg)
899         break;
900       MachineInstr *DefMI = MRI.getVRegDef(VReg);
901       if (onlyDefinesFP(*DefMI, MRI, TRI))
902         OpRegBankIdx[1] = PMI_FirstFPR;
903       break;
904     }
905     break;
906   case TargetOpcode::G_INDEXED_SEXTLOAD:
907   case TargetOpcode::G_INDEXED_ZEXTLOAD:
908     // These should always be GPR.
909     OpRegBankIdx[0] = PMI_FirstGPR;
910     break;
911   case TargetOpcode::G_INDEXED_LOAD: {
912     if (isLoadFromFPType(MI))
913       OpRegBankIdx[0] = PMI_FirstFPR;
914     break;
915   }
916   case TargetOpcode::G_SELECT: {
917     // If the destination is FPR, preserve that.
918     if (OpRegBankIdx[0] != PMI_FirstGPR)
919       break;
920 
921     // If we're taking in vectors, we have no choice but to put everything on
922     // FPRs, except for the condition. The condition must always be on a GPR.
923     LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
924     if (SrcTy.isVector()) {
925       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
926       break;
927     }
928 
929     // Try to minimize the number of copies. If we have more floating point
930     // constrained values than not, then we'll put everything on FPR. Otherwise,
931     // everything has to be on GPR.
932     unsigned NumFP = 0;
933 
934     // Check if the uses of the result always produce floating point values.
935     //
936     // For example:
937     //
938     // %z = G_SELECT %cond %x %y
939     // fpr = G_FOO %z ...
940     if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
941                [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
942       ++NumFP;
943 
944     // Check if the defs of the source values always produce floating point
945     // values.
946     //
947     // For example:
948     //
949     // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
950     // %z = G_SELECT %cond %x %y
951     //
952     // Also check whether or not the sources have already been decided to be
953     // FPR. Keep track of this.
954     //
955     // This doesn't check the condition, since it's just whatever is in NZCV.
956     // This isn't passed explicitly in a register to fcsel/csel.
957     for (unsigned Idx = 2; Idx < 4; ++Idx) {
958       Register VReg = MI.getOperand(Idx).getReg();
959       MachineInstr *DefMI = MRI.getVRegDef(VReg);
960       if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
961           onlyDefinesFP(*DefMI, MRI, TRI))
962         ++NumFP;
963     }
964 
965     // If we have more FP constraints than not, then move everything over to
966     // FPR.
967     if (NumFP >= 2)
968       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
969 
970     break;
971   }
972   case TargetOpcode::G_UNMERGE_VALUES: {
973     // If the first operand belongs to a FPR register bank, then make sure that
974     // we preserve that.
975     if (OpRegBankIdx[0] != PMI_FirstGPR)
976       break;
977 
978     LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
979     // UNMERGE into scalars from a vector should always use FPR.
980     // Likewise if any of the uses are FP instructions.
981     if (SrcTy.isVector() || SrcTy == LLT::scalar(128) ||
982         any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
983                [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
984       // Set the register bank of every operand to FPR.
985       for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
986            Idx < NumOperands; ++Idx)
987         OpRegBankIdx[Idx] = PMI_FirstFPR;
988     }
989     break;
990   }
991   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
992     // Destination and source need to be FPRs.
993     OpRegBankIdx[0] = PMI_FirstFPR;
994     OpRegBankIdx[1] = PMI_FirstFPR;
995 
996     // Index needs to be a GPR.
997     OpRegBankIdx[2] = PMI_FirstGPR;
998     break;
999   case TargetOpcode::G_INSERT_VECTOR_ELT:
1000     OpRegBankIdx[0] = PMI_FirstFPR;
1001     OpRegBankIdx[1] = PMI_FirstFPR;
1002 
1003     // The element may be either a GPR or FPR. Preserve that behaviour.
1004     if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank)
1005       OpRegBankIdx[2] = PMI_FirstFPR;
1006     else
1007       OpRegBankIdx[2] = PMI_FirstGPR;
1008 
1009     // Index needs to be a GPR.
1010     OpRegBankIdx[3] = PMI_FirstGPR;
1011     break;
1012   case TargetOpcode::G_EXTRACT: {
1013     // For s128 sources we have to use fpr unless we know otherwise.
1014     auto Src = MI.getOperand(1).getReg();
1015     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
1016     if (SrcTy.getSizeInBits() != 128)
1017       break;
1018     auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass
1019                    ? PMI_FirstGPR
1020                    : PMI_FirstFPR;
1021     OpRegBankIdx[0] = Idx;
1022     OpRegBankIdx[1] = Idx;
1023     break;
1024   }
1025   case TargetOpcode::G_BUILD_VECTOR: {
1026     // If the first source operand belongs to a FPR register bank, then make
1027     // sure that we preserve that.
1028     if (OpRegBankIdx[1] != PMI_FirstGPR)
1029       break;
1030     Register VReg = MI.getOperand(1).getReg();
1031     if (!VReg)
1032       break;
1033 
1034     // Get the instruction that defined the source operand reg, and check if
1035     // it's a floating point operation. Or, if it's a type like s16 which
1036     // doesn't have a exact size gpr register class. The exception is if the
1037     // build_vector has all constant operands, which may be better to leave as
1038     // gpr without copies, so it can be matched in imported patterns.
1039     MachineInstr *DefMI = MRI.getVRegDef(VReg);
1040     unsigned DefOpc = DefMI->getOpcode();
1041     const LLT SrcTy = MRI.getType(VReg);
1042     if (all_of(MI.operands(), [&](const MachineOperand &Op) {
1043           return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() ==
1044                                    TargetOpcode::G_CONSTANT;
1045         }))
1046       break;
1047     if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
1048         SrcTy.getSizeInBits() < 32 ||
1049         getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) {
1050       // Have a floating point op.
1051       // Make sure every operand gets mapped to a FPR register class.
1052       unsigned NumOperands = MI.getNumOperands();
1053       for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
1054         OpRegBankIdx[Idx] = PMI_FirstFPR;
1055     }
1056     break;
1057   }
1058   case TargetOpcode::G_VECREDUCE_FADD:
1059   case TargetOpcode::G_VECREDUCE_FMUL:
1060   case TargetOpcode::G_VECREDUCE_FMAX:
1061   case TargetOpcode::G_VECREDUCE_FMIN:
1062   case TargetOpcode::G_VECREDUCE_FMAXIMUM:
1063   case TargetOpcode::G_VECREDUCE_FMINIMUM:
1064   case TargetOpcode::G_VECREDUCE_ADD:
1065   case TargetOpcode::G_VECREDUCE_MUL:
1066   case TargetOpcode::G_VECREDUCE_AND:
1067   case TargetOpcode::G_VECREDUCE_OR:
1068   case TargetOpcode::G_VECREDUCE_XOR:
1069   case TargetOpcode::G_VECREDUCE_SMAX:
1070   case TargetOpcode::G_VECREDUCE_SMIN:
1071   case TargetOpcode::G_VECREDUCE_UMAX:
1072   case TargetOpcode::G_VECREDUCE_UMIN:
1073     // Reductions produce a scalar value from a vector, the scalar should be on
1074     // FPR bank.
1075     OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
1076     break;
1077   case TargetOpcode::G_VECREDUCE_SEQ_FADD:
1078   case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
1079     // These reductions also take a scalar accumulator input.
1080     // Assign them FPR for now.
1081     OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
1082     break;
1083   case TargetOpcode::G_INTRINSIC:
1084   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
1085     // Check if we know that the intrinsic has any constraints on its register
1086     // banks. If it does, then update the mapping accordingly.
1087     unsigned Idx = 0;
1088     if (onlyDefinesFP(MI, MRI, TRI))
1089       for (const auto &Op : MI.defs()) {
1090         if (Op.isReg())
1091           OpRegBankIdx[Idx] = PMI_FirstFPR;
1092         ++Idx;
1093       }
1094     else
1095       Idx += MI.getNumExplicitDefs();
1096 
1097     if (onlyUsesFP(MI, MRI, TRI))
1098       for (const auto &Op : MI.explicit_uses()) {
1099         if (Op.isReg())
1100           OpRegBankIdx[Idx] = PMI_FirstFPR;
1101         ++Idx;
1102       }
1103     break;
1104   }
1105   case TargetOpcode::G_LROUND:
1106   case TargetOpcode::G_LLROUND: {
1107     // Source is always floating point and destination is always integer.
1108     OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
1109     break;
1110   }
1111   }
1112 
1113   // Finally construct the computed mapping.
1114   SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
1115   for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
1116     if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) {
1117       LLT Ty = MRI.getType(MI.getOperand(Idx).getReg());
1118       if (!Ty.isValid())
1119         continue;
1120       auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
1121       if (!Mapping->isValid())
1122         return getInvalidInstructionMapping();
1123 
1124       OpdsMapping[Idx] = Mapping;
1125     }
1126   }
1127 
1128   return getInstructionMapping(DefaultMappingID, Cost,
1129                                getOperandsMapping(OpdsMapping), NumOperands);
1130 }
1131