1 //===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the RegisterBankInfo class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13
14 #include "AArch64RegisterBankInfo.h"
15 #include "AArch64RegisterInfo.h"
16 #include "MCTargetDesc/AArch64MCTargetDesc.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20 #include "llvm/CodeGen/GlobalISel/Utils.h"
21 #include "llvm/CodeGen/LowLevelTypeUtils.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstr.h"
24 #include "llvm/CodeGen/MachineOperand.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/RegisterBank.h"
27 #include "llvm/CodeGen/RegisterBankInfo.h"
28 #include "llvm/CodeGen/TargetOpcodes.h"
29 #include "llvm/CodeGen/TargetRegisterInfo.h"
30 #include "llvm/CodeGen/TargetSubtargetInfo.h"
31 #include "llvm/IR/IntrinsicsAArch64.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/Threading.h"
34 #include <algorithm>
35 #include <cassert>
36
37 #define GET_TARGET_REGBANK_IMPL
38 #include "AArch64GenRegisterBank.inc"
39
40 // This file will be TableGen'ed at some point.
41 #include "AArch64GenRegisterBankInfo.def"
42
43 using namespace llvm;
44
AArch64RegisterBankInfo(const TargetRegisterInfo & TRI)45 AArch64RegisterBankInfo::AArch64RegisterBankInfo(
46 const TargetRegisterInfo &TRI) {
47 static llvm::once_flag InitializeRegisterBankFlag;
48
49 static auto InitializeRegisterBankOnce = [&]() {
50 // We have only one set of register banks, whatever the subtarget
51 // is. Therefore, the initialization of the RegBanks table should be
52 // done only once. Indeed the table of all register banks
53 // (AArch64::RegBanks) is unique in the compiler. At some point, it
54 // will get tablegen'ed and the whole constructor becomes empty.
55
56 const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID);
57 (void)RBGPR;
58 assert(&AArch64::GPRRegBank == &RBGPR &&
59 "The order in RegBanks is messed up");
60
61 const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID);
62 (void)RBFPR;
63 assert(&AArch64::FPRRegBank == &RBFPR &&
64 "The order in RegBanks is messed up");
65
66 const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID);
67 (void)RBCCR;
68 assert(&AArch64::CCRegBank == &RBCCR &&
69 "The order in RegBanks is messed up");
70
71 // The GPR register bank is fully defined by all the registers in
72 // GR64all + its subclasses.
73 assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) &&
74 "Subclass not added?");
75 assert(getMaximumSize(RBGPR.getID()) == 128 &&
76 "GPRs should hold up to 128-bit");
77
78 // The FPR register bank is fully defined by all the registers in
79 // GR64all + its subclasses.
80 assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) &&
81 "Subclass not added?");
82 assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) &&
83 "Subclass not added?");
84 assert(getMaximumSize(RBFPR.getID()) == 512 &&
85 "FPRs should hold up to 512-bit via QQQQ sequence");
86
87 assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) &&
88 "Class not added?");
89 assert(getMaximumSize(RBCCR.getID()) == 32 &&
90 "CCR should hold up to 32-bit");
91
92 // Check that the TableGen'ed like file is in sync we our expectations.
93 // First, the Idx.
94 assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
95 {PMI_GPR32, PMI_GPR64, PMI_GPR128}) &&
96 "PartialMappingIdx's are incorrectly ordered");
97 assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR,
98 {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128,
99 PMI_FPR256, PMI_FPR512}) &&
100 "PartialMappingIdx's are incorrectly ordered");
101 // Now, the content.
102 // Check partial mapping.
103 #define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB) \
104 do { \
105 assert( \
106 checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \
107 #Idx " is incorrectly initialized"); \
108 } while (false)
109
110 CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
111 CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
112 CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR);
113 CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR);
114 CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
115 CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
116 CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
117 CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR);
118 CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR);
119
120 // Check value mapping.
121 #define CHECK_VALUEMAP_IMPL(RBName, Size, Offset) \
122 do { \
123 assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size, \
124 PartialMappingIdx::PMI_First##RBName, Size, \
125 Offset) && \
126 #RBName #Size " " #Offset " is incorrectly initialized"); \
127 } while (false)
128
129 #define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
130
131 CHECK_VALUEMAP(GPR, 32);
132 CHECK_VALUEMAP(GPR, 64);
133 CHECK_VALUEMAP(GPR, 128);
134 CHECK_VALUEMAP(FPR, 16);
135 CHECK_VALUEMAP(FPR, 32);
136 CHECK_VALUEMAP(FPR, 64);
137 CHECK_VALUEMAP(FPR, 128);
138 CHECK_VALUEMAP(FPR, 256);
139 CHECK_VALUEMAP(FPR, 512);
140
141 // Check the value mapping for 3-operands instructions where all the operands
142 // map to the same value mapping.
143 #define CHECK_VALUEMAP_3OPS(RBName, Size) \
144 do { \
145 CHECK_VALUEMAP_IMPL(RBName, Size, 0); \
146 CHECK_VALUEMAP_IMPL(RBName, Size, 1); \
147 CHECK_VALUEMAP_IMPL(RBName, Size, 2); \
148 } while (false)
149
150 CHECK_VALUEMAP_3OPS(GPR, 32);
151 CHECK_VALUEMAP_3OPS(GPR, 64);
152 CHECK_VALUEMAP_3OPS(GPR, 128);
153 CHECK_VALUEMAP_3OPS(FPR, 32);
154 CHECK_VALUEMAP_3OPS(FPR, 64);
155 CHECK_VALUEMAP_3OPS(FPR, 128);
156 CHECK_VALUEMAP_3OPS(FPR, 256);
157 CHECK_VALUEMAP_3OPS(FPR, 512);
158
159 #define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size) \
160 do { \
161 unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min; \
162 unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \
163 (void)PartialMapDstIdx; \
164 (void)PartialMapSrcIdx; \
165 const ValueMapping *Map = getCopyMapping( \
166 AArch64::RBNameDst##RegBankID, AArch64::RBNameSrc##RegBankID, Size); \
167 (void)Map; \
168 assert(Map[0].BreakDown == \
169 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
170 Map[0].NumBreakDowns == 1 && #RBNameDst #Size \
171 " Dst is incorrectly initialized"); \
172 assert(Map[1].BreakDown == \
173 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
174 Map[1].NumBreakDowns == 1 && #RBNameSrc #Size \
175 " Src is incorrectly initialized"); \
176 \
177 } while (false)
178
179 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
180 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32);
181 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64);
182 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64);
183 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32);
184 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32);
185 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64);
186 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64);
187
188 #define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize) \
189 do { \
190 unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min; \
191 unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min; \
192 (void)PartialMapDstIdx; \
193 (void)PartialMapSrcIdx; \
194 const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize); \
195 (void)Map; \
196 assert(Map[0].BreakDown == \
197 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
198 Map[0].NumBreakDowns == 1 && "FPR" #DstSize \
199 " Dst is incorrectly initialized"); \
200 assert(Map[1].BreakDown == \
201 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
202 Map[1].NumBreakDowns == 1 && "FPR" #SrcSize \
203 " Src is incorrectly initialized"); \
204 \
205 } while (false)
206
207 CHECK_VALUEMAP_FPEXT(32, 16);
208 CHECK_VALUEMAP_FPEXT(64, 16);
209 CHECK_VALUEMAP_FPEXT(64, 32);
210 CHECK_VALUEMAP_FPEXT(128, 64);
211
212 assert(verify(TRI) && "Invalid register bank information");
213 };
214
215 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
216 }
217
copyCost(const RegisterBank & A,const RegisterBank & B,TypeSize Size) const218 unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A,
219 const RegisterBank &B,
220 TypeSize Size) const {
221 // What do we do with different size?
222 // copy are same size.
223 // Will introduce other hooks for different size:
224 // * extract cost.
225 // * build_sequence cost.
226
227 // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV.
228 // FIXME: This should be deduced from the scheduling model.
229 if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank)
230 // FMOVXDr or FMOVWSr.
231 return 5;
232 if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank)
233 // FMOVDXr or FMOVSWr.
234 return 4;
235
236 return RegisterBankInfo::copyCost(A, B, Size);
237 }
238
239 const RegisterBank &
getRegBankFromRegClass(const TargetRegisterClass & RC,LLT) const240 AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
241 LLT) const {
242 switch (RC.getID()) {
243 case AArch64::FPR8RegClassID:
244 case AArch64::FPR16RegClassID:
245 case AArch64::FPR16_loRegClassID:
246 case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID:
247 case AArch64::FPR32RegClassID:
248 case AArch64::FPR64RegClassID:
249 case AArch64::FPR128RegClassID:
250 case AArch64::FPR64_loRegClassID:
251 case AArch64::FPR128_loRegClassID:
252 case AArch64::FPR128_0to7RegClassID:
253 case AArch64::DDRegClassID:
254 case AArch64::DDDRegClassID:
255 case AArch64::DDDDRegClassID:
256 case AArch64::QQRegClassID:
257 case AArch64::QQQRegClassID:
258 case AArch64::QQQQRegClassID:
259 return getRegBank(AArch64::FPRRegBankID);
260 case AArch64::GPR32commonRegClassID:
261 case AArch64::GPR32RegClassID:
262 case AArch64::GPR32spRegClassID:
263 case AArch64::GPR32sponlyRegClassID:
264 case AArch64::GPR32argRegClassID:
265 case AArch64::GPR32allRegClassID:
266 case AArch64::GPR64commonRegClassID:
267 case AArch64::GPR64RegClassID:
268 case AArch64::GPR64spRegClassID:
269 case AArch64::GPR64sponlyRegClassID:
270 case AArch64::GPR64argRegClassID:
271 case AArch64::GPR64allRegClassID:
272 case AArch64::GPR64noipRegClassID:
273 case AArch64::GPR64common_and_GPR64noipRegClassID:
274 case AArch64::GPR64noip_and_tcGPR64RegClassID:
275 case AArch64::tcGPR64RegClassID:
276 case AArch64::rtcGPR64RegClassID:
277 case AArch64::WSeqPairsClassRegClassID:
278 case AArch64::XSeqPairsClassRegClassID:
279 case AArch64::MatrixIndexGPR32_8_11RegClassID:
280 case AArch64::MatrixIndexGPR32_12_15RegClassID:
281 case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_8_11RegClassID:
282 case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_12_15RegClassID:
283 return getRegBank(AArch64::GPRRegBankID);
284 case AArch64::CCRRegClassID:
285 return getRegBank(AArch64::CCRegBankID);
286 default:
287 llvm_unreachable("Register class not supported");
288 }
289 }
290
291 RegisterBankInfo::InstructionMappings
getInstrAlternativeMappings(const MachineInstr & MI) const292 AArch64RegisterBankInfo::getInstrAlternativeMappings(
293 const MachineInstr &MI) const {
294 const MachineFunction &MF = *MI.getParent()->getParent();
295 const TargetSubtargetInfo &STI = MF.getSubtarget();
296 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
297 const MachineRegisterInfo &MRI = MF.getRegInfo();
298
299 switch (MI.getOpcode()) {
300 case TargetOpcode::G_OR: {
301 // 32 and 64-bit or can be mapped on either FPR or
302 // GPR for the same cost.
303 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
304 if (Size != 32 && Size != 64)
305 break;
306
307 // If the instruction has any implicit-defs or uses,
308 // do not mess with it.
309 if (MI.getNumOperands() != 3)
310 break;
311 InstructionMappings AltMappings;
312 const InstructionMapping &GPRMapping = getInstructionMapping(
313 /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size),
314 /*NumOperands*/ 3);
315 const InstructionMapping &FPRMapping = getInstructionMapping(
316 /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size),
317 /*NumOperands*/ 3);
318
319 AltMappings.push_back(&GPRMapping);
320 AltMappings.push_back(&FPRMapping);
321 return AltMappings;
322 }
323 case TargetOpcode::G_BITCAST: {
324 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
325 if (Size != 32 && Size != 64)
326 break;
327
328 // If the instruction has any implicit-defs or uses,
329 // do not mess with it.
330 if (MI.getNumOperands() != 2)
331 break;
332
333 InstructionMappings AltMappings;
334 const InstructionMapping &GPRMapping = getInstructionMapping(
335 /*ID*/ 1, /*Cost*/ 1,
336 getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size),
337 /*NumOperands*/ 2);
338 const InstructionMapping &FPRMapping = getInstructionMapping(
339 /*ID*/ 2, /*Cost*/ 1,
340 getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size),
341 /*NumOperands*/ 2);
342 const InstructionMapping &GPRToFPRMapping = getInstructionMapping(
343 /*ID*/ 3,
344 /*Cost*/
345 copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank,
346 TypeSize::getFixed(Size)),
347 getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size),
348 /*NumOperands*/ 2);
349 const InstructionMapping &FPRToGPRMapping = getInstructionMapping(
350 /*ID*/ 3,
351 /*Cost*/
352 copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank,
353 TypeSize::getFixed(Size)),
354 getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size),
355 /*NumOperands*/ 2);
356
357 AltMappings.push_back(&GPRMapping);
358 AltMappings.push_back(&FPRMapping);
359 AltMappings.push_back(&GPRToFPRMapping);
360 AltMappings.push_back(&FPRToGPRMapping);
361 return AltMappings;
362 }
363 case TargetOpcode::G_LOAD: {
364 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
365 if (Size != 64)
366 break;
367
368 // If the instruction has any implicit-defs or uses,
369 // do not mess with it.
370 if (MI.getNumOperands() != 2)
371 break;
372
373 InstructionMappings AltMappings;
374 const InstructionMapping &GPRMapping = getInstructionMapping(
375 /*ID*/ 1, /*Cost*/ 1,
376 getOperandsMapping({getValueMapping(PMI_FirstGPR, Size),
377 // Addresses are GPR 64-bit.
378 getValueMapping(PMI_FirstGPR, 64)}),
379 /*NumOperands*/ 2);
380 const InstructionMapping &FPRMapping = getInstructionMapping(
381 /*ID*/ 2, /*Cost*/ 1,
382 getOperandsMapping({getValueMapping(PMI_FirstFPR, Size),
383 // Addresses are GPR 64-bit.
384 getValueMapping(PMI_FirstGPR, 64)}),
385 /*NumOperands*/ 2);
386
387 AltMappings.push_back(&GPRMapping);
388 AltMappings.push_back(&FPRMapping);
389 return AltMappings;
390 }
391 default:
392 break;
393 }
394 return RegisterBankInfo::getInstrAlternativeMappings(MI);
395 }
396
applyMappingImpl(MachineIRBuilder & Builder,const OperandsMapper & OpdMapper) const397 void AArch64RegisterBankInfo::applyMappingImpl(
398 MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const {
399 switch (OpdMapper.getMI().getOpcode()) {
400 case TargetOpcode::G_OR:
401 case TargetOpcode::G_BITCAST:
402 case TargetOpcode::G_LOAD:
403 // Those ID must match getInstrAlternativeMappings.
404 assert((OpdMapper.getInstrMapping().getID() >= 1 &&
405 OpdMapper.getInstrMapping().getID() <= 4) &&
406 "Don't know how to handle that ID");
407 return applyDefaultMapping(OpdMapper);
408 default:
409 llvm_unreachable("Don't know how to handle that operation");
410 }
411 }
412
413 /// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
414 /// having only floating-point operands.
isPreISelGenericFloatingPointOpcode(unsigned Opc)415 static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
416 switch (Opc) {
417 case TargetOpcode::G_FADD:
418 case TargetOpcode::G_FSUB:
419 case TargetOpcode::G_FMUL:
420 case TargetOpcode::G_FMA:
421 case TargetOpcode::G_FDIV:
422 case TargetOpcode::G_FCONSTANT:
423 case TargetOpcode::G_FPEXT:
424 case TargetOpcode::G_FPTRUNC:
425 case TargetOpcode::G_FCEIL:
426 case TargetOpcode::G_FFLOOR:
427 case TargetOpcode::G_FNEARBYINT:
428 case TargetOpcode::G_FNEG:
429 case TargetOpcode::G_FCOS:
430 case TargetOpcode::G_FSIN:
431 case TargetOpcode::G_FLOG10:
432 case TargetOpcode::G_FLOG:
433 case TargetOpcode::G_FLOG2:
434 case TargetOpcode::G_FSQRT:
435 case TargetOpcode::G_FABS:
436 case TargetOpcode::G_FEXP:
437 case TargetOpcode::G_FRINT:
438 case TargetOpcode::G_INTRINSIC_TRUNC:
439 case TargetOpcode::G_INTRINSIC_ROUND:
440 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
441 case TargetOpcode::G_FMAXNUM:
442 case TargetOpcode::G_FMINNUM:
443 case TargetOpcode::G_FMAXIMUM:
444 case TargetOpcode::G_FMINIMUM:
445 return true;
446 }
447 return false;
448 }
449
450 const RegisterBankInfo::InstructionMapping &
getSameKindOfOperandsMapping(const MachineInstr & MI) const451 AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
452 const MachineInstr &MI) const {
453 const unsigned Opc = MI.getOpcode();
454 const MachineFunction &MF = *MI.getParent()->getParent();
455 const MachineRegisterInfo &MRI = MF.getRegInfo();
456
457 unsigned NumOperands = MI.getNumOperands();
458 assert(NumOperands <= 3 &&
459 "This code is for instructions with 3 or less operands");
460
461 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
462 unsigned Size = Ty.getSizeInBits();
463 bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
464
465 PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR;
466
467 #ifndef NDEBUG
468 // Make sure all the operands are using similar size and type.
469 // Should probably be checked by the machine verifier.
470 // This code won't catch cases where the number of lanes is
471 // different between the operands.
472 // If we want to go to that level of details, it is probably
473 // best to check that the types are the same, period.
474 // Currently, we just check that the register banks are the same
475 // for each types.
476 for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
477 LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
478 assert(
479 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(
480 RBIdx, OpTy.getSizeInBits()) ==
481 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) &&
482 "Operand has incompatible size");
483 bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
484 (void)OpIsFPR;
485 assert(IsFPR == OpIsFPR && "Operand has incompatible type");
486 }
487 #endif // End NDEBUG.
488
489 return getInstructionMapping(DefaultMappingID, 1,
490 getValueMapping(RBIdx, Size), NumOperands);
491 }
492
493 /// \returns true if a given intrinsic only uses and defines FPRs.
isFPIntrinsic(const MachineRegisterInfo & MRI,const MachineInstr & MI)494 static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
495 const MachineInstr &MI) {
496 // TODO: Add more intrinsics.
497 switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
498 default:
499 return false;
500 case Intrinsic::aarch64_neon_uaddlv:
501 case Intrinsic::aarch64_neon_uaddv:
502 case Intrinsic::aarch64_neon_saddv:
503 case Intrinsic::aarch64_neon_umaxv:
504 case Intrinsic::aarch64_neon_smaxv:
505 case Intrinsic::aarch64_neon_uminv:
506 case Intrinsic::aarch64_neon_sminv:
507 case Intrinsic::aarch64_neon_faddv:
508 case Intrinsic::aarch64_neon_fmaxv:
509 case Intrinsic::aarch64_neon_fminv:
510 case Intrinsic::aarch64_neon_fmaxnmv:
511 case Intrinsic::aarch64_neon_fminnmv:
512 return true;
513 case Intrinsic::aarch64_neon_saddlv: {
514 const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
515 return SrcTy.getElementType().getSizeInBits() >= 16 &&
516 SrcTy.getElementCount().getFixedValue() >= 4;
517 }
518 }
519 }
520
hasFPConstraints(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,unsigned Depth) const521 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
522 const MachineRegisterInfo &MRI,
523 const TargetRegisterInfo &TRI,
524 unsigned Depth) const {
525 unsigned Op = MI.getOpcode();
526 if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI))
527 return true;
528
529 // Do we have an explicit floating point instruction?
530 if (isPreISelGenericFloatingPointOpcode(Op))
531 return true;
532
533 // No. Check if we have a copy-like instruction. If we do, then we could
534 // still be fed by floating point instructions.
535 if (Op != TargetOpcode::COPY && !MI.isPHI() &&
536 !isPreISelGenericOptimizationHint(Op))
537 return false;
538
539 // Check if we already know the register bank.
540 auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
541 if (RB == &AArch64::FPRRegBank)
542 return true;
543 if (RB == &AArch64::GPRRegBank)
544 return false;
545
546 // We don't know anything.
547 //
548 // If we have a phi, we may be able to infer that it will be assigned a FPR
549 // based off of its inputs.
550 if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
551 return false;
552
553 return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
554 return Op.isReg() &&
555 onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
556 });
557 }
558
onlyUsesFP(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,unsigned Depth) const559 bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
560 const MachineRegisterInfo &MRI,
561 const TargetRegisterInfo &TRI,
562 unsigned Depth) const {
563 switch (MI.getOpcode()) {
564 case TargetOpcode::G_FPTOSI:
565 case TargetOpcode::G_FPTOUI:
566 case TargetOpcode::G_FCMP:
567 case TargetOpcode::G_LROUND:
568 case TargetOpcode::G_LLROUND:
569 return true;
570 default:
571 break;
572 }
573 return hasFPConstraints(MI, MRI, TRI, Depth);
574 }
575
onlyDefinesFP(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,unsigned Depth) const576 bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
577 const MachineRegisterInfo &MRI,
578 const TargetRegisterInfo &TRI,
579 unsigned Depth) const {
580 switch (MI.getOpcode()) {
581 case AArch64::G_DUP:
582 case TargetOpcode::G_SITOFP:
583 case TargetOpcode::G_UITOFP:
584 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
585 case TargetOpcode::G_INSERT_VECTOR_ELT:
586 case TargetOpcode::G_BUILD_VECTOR:
587 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
588 return true;
589 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
590 switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
591 case Intrinsic::aarch64_neon_ld1x2:
592 case Intrinsic::aarch64_neon_ld1x3:
593 case Intrinsic::aarch64_neon_ld1x4:
594 case Intrinsic::aarch64_neon_ld2:
595 case Intrinsic::aarch64_neon_ld2lane:
596 case Intrinsic::aarch64_neon_ld2r:
597 case Intrinsic::aarch64_neon_ld3:
598 case Intrinsic::aarch64_neon_ld3lane:
599 case Intrinsic::aarch64_neon_ld3r:
600 case Intrinsic::aarch64_neon_ld4:
601 case Intrinsic::aarch64_neon_ld4lane:
602 case Intrinsic::aarch64_neon_ld4r:
603 return true;
604 default:
605 break;
606 }
607 break;
608 default:
609 break;
610 }
611 return hasFPConstraints(MI, MRI, TRI, Depth);
612 }
613
isLoadFromFPType(const MachineInstr & MI) const614 bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const {
615 // GMemOperation because we also want to match indexed loads.
616 auto *MemOp = cast<GMemOperation>(&MI);
617 const Value *LdVal = MemOp->getMMO().getValue();
618 if (!LdVal)
619 return false;
620
621 Type *EltTy = nullptr;
622 if (const GlobalValue *GV = dyn_cast<GlobalValue>(LdVal)) {
623 EltTy = GV->getValueType();
624 // Look at the first element of the struct to determine the type we are
625 // loading
626 while (StructType *StructEltTy = dyn_cast<StructType>(EltTy)) {
627 if (StructEltTy->getNumElements() == 0)
628 break;
629 EltTy = StructEltTy->getTypeAtIndex(0U);
630 }
631 // Look at the first element of the array to determine its type
632 if (isa<ArrayType>(EltTy))
633 EltTy = EltTy->getArrayElementType();
634 } else {
635 // FIXME: grubbing around uses is pretty ugly, but with no more
636 // `getPointerElementType` there's not much else we can do.
637 for (const auto *LdUser : LdVal->users()) {
638 if (isa<LoadInst>(LdUser)) {
639 EltTy = LdUser->getType();
640 break;
641 }
642 if (isa<StoreInst>(LdUser) && LdUser->getOperand(1) == LdVal) {
643 EltTy = LdUser->getOperand(0)->getType();
644 break;
645 }
646 }
647 }
648 return EltTy && EltTy->isFPOrFPVectorTy();
649 }
650
651 const RegisterBankInfo::InstructionMapping &
getInstrMapping(const MachineInstr & MI) const652 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
653 const unsigned Opc = MI.getOpcode();
654
655 // Try the default logic for non-generic instructions that are either copies
656 // or already have some operands assigned to banks.
657 if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) ||
658 Opc == TargetOpcode::G_PHI) {
659 const RegisterBankInfo::InstructionMapping &Mapping =
660 getInstrMappingImpl(MI);
661 if (Mapping.isValid())
662 return Mapping;
663 }
664
665 const MachineFunction &MF = *MI.getParent()->getParent();
666 const MachineRegisterInfo &MRI = MF.getRegInfo();
667 const TargetSubtargetInfo &STI = MF.getSubtarget();
668 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
669
670 switch (Opc) {
671 // G_{F|S|U}REM are not listed because they are not legal.
672 // Arithmetic ops.
673 case TargetOpcode::G_ADD:
674 case TargetOpcode::G_SUB:
675 case TargetOpcode::G_PTR_ADD:
676 case TargetOpcode::G_MUL:
677 case TargetOpcode::G_SDIV:
678 case TargetOpcode::G_UDIV:
679 // Bitwise ops.
680 case TargetOpcode::G_AND:
681 case TargetOpcode::G_OR:
682 case TargetOpcode::G_XOR:
683 // Floating point ops.
684 case TargetOpcode::G_FADD:
685 case TargetOpcode::G_FSUB:
686 case TargetOpcode::G_FMUL:
687 case TargetOpcode::G_FDIV:
688 case TargetOpcode::G_FMAXIMUM:
689 case TargetOpcode::G_FMINIMUM:
690 return getSameKindOfOperandsMapping(MI);
691 case TargetOpcode::G_FPEXT: {
692 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
693 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
694 return getInstructionMapping(
695 DefaultMappingID, /*Cost*/ 1,
696 getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()),
697 /*NumOperands*/ 2);
698 }
699 // Shifts.
700 case TargetOpcode::G_SHL:
701 case TargetOpcode::G_LSHR:
702 case TargetOpcode::G_ASHR: {
703 LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg());
704 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
705 if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32)
706 return getInstructionMapping(DefaultMappingID, 1,
707 &ValMappings[Shift64Imm], 3);
708 return getSameKindOfOperandsMapping(MI);
709 }
710 case TargetOpcode::COPY: {
711 Register DstReg = MI.getOperand(0).getReg();
712 Register SrcReg = MI.getOperand(1).getReg();
713 // Check if one of the register is not a generic register.
714 if ((DstReg.isPhysical() || !MRI.getType(DstReg).isValid()) ||
715 (SrcReg.isPhysical() || !MRI.getType(SrcReg).isValid())) {
716 const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI);
717 const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI);
718 if (!DstRB)
719 DstRB = SrcRB;
720 else if (!SrcRB)
721 SrcRB = DstRB;
722 // If both RB are null that means both registers are generic.
723 // We shouldn't be here.
724 assert(DstRB && SrcRB && "Both RegBank were nullptr");
725 unsigned Size = getSizeInBits(DstReg, MRI, TRI);
726 return getInstructionMapping(
727 DefaultMappingID, copyCost(*DstRB, *SrcRB, TypeSize::getFixed(Size)),
728 getCopyMapping(DstRB->getID(), SrcRB->getID(), Size),
729 // We only care about the mapping of the destination.
730 /*NumOperands*/ 1);
731 }
732 // Both registers are generic, use G_BITCAST.
733 [[fallthrough]];
734 }
735 case TargetOpcode::G_BITCAST: {
736 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
737 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
738 unsigned Size = DstTy.getSizeInBits();
739 bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64;
740 bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64;
741 const RegisterBank &DstRB =
742 DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
743 const RegisterBank &SrcRB =
744 SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
745 return getInstructionMapping(
746 DefaultMappingID, copyCost(DstRB, SrcRB, TypeSize::getFixed(Size)),
747 getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
748 // We only care about the mapping of the destination for COPY.
749 /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1);
750 }
751 default:
752 break;
753 }
754
755 unsigned NumOperands = MI.getNumOperands();
756
757 // Track the size and bank of each register. We don't do partial mappings.
758 SmallVector<unsigned, 4> OpSize(NumOperands);
759 SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
760 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
761 auto &MO = MI.getOperand(Idx);
762 if (!MO.isReg() || !MO.getReg())
763 continue;
764
765 LLT Ty = MRI.getType(MO.getReg());
766 if (!Ty.isValid())
767 continue;
768 OpSize[Idx] = Ty.getSizeInBits();
769
770 // As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs.
771 // For floating-point instructions, scalars go in FPRs.
772 if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc) ||
773 Ty.getSizeInBits() > 64)
774 OpRegBankIdx[Idx] = PMI_FirstFPR;
775 else
776 OpRegBankIdx[Idx] = PMI_FirstGPR;
777 }
778
779 unsigned Cost = 1;
780 // Some of the floating-point instructions have mixed GPR and FPR operands:
781 // fine-tune the computed mapping.
782 switch (Opc) {
783 case AArch64::G_DUP: {
784 Register ScalarReg = MI.getOperand(1).getReg();
785 LLT ScalarTy = MRI.getType(ScalarReg);
786 auto ScalarDef = MRI.getVRegDef(ScalarReg);
787 // We want to select dup(load) into LD1R.
788 if (ScalarDef->getOpcode() == TargetOpcode::G_LOAD)
789 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
790 // s8 is an exception for G_DUP, which we always want on gpr.
791 else if (ScalarTy.getSizeInBits() != 8 &&
792 (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
793 onlyDefinesFP(*ScalarDef, MRI, TRI)))
794 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
795 else
796 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
797 break;
798 }
799 case TargetOpcode::G_TRUNC: {
800 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
801 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128)
802 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
803 break;
804 }
805 case TargetOpcode::G_SITOFP:
806 case TargetOpcode::G_UITOFP: {
807 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
808 break;
809 // Integer to FP conversions don't necessarily happen between GPR -> FPR
810 // regbanks. They can also be done within an FPR register.
811 Register SrcReg = MI.getOperand(1).getReg();
812 if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
813 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
814 else
815 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
816 break;
817 }
818 case TargetOpcode::G_FPTOSI:
819 case TargetOpcode::G_FPTOUI:
820 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
821 break;
822 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
823 break;
824 case TargetOpcode::G_FCMP: {
825 // If the result is a vector, it must use a FPR.
826 AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 =
827 MRI.getType(MI.getOperand(0).getReg()).isVector() ? PMI_FirstFPR
828 : PMI_FirstGPR;
829 OpRegBankIdx = {Idx0,
830 /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR};
831 break;
832 }
833 case TargetOpcode::G_BITCAST:
834 // This is going to be a cross register bank copy and this is expensive.
835 if (OpRegBankIdx[0] != OpRegBankIdx[1])
836 Cost = copyCost(
837 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank,
838 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
839 TypeSize::getFixed(OpSize[0]));
840 break;
841 case TargetOpcode::G_LOAD: {
842 // Loading in vector unit is slightly more expensive.
843 // This is actually only true for the LD1R and co instructions,
844 // but anyway for the fast mode this number does not matter and
845 // for the greedy mode the cost of the cross bank copy will
846 // offset this number.
847 // FIXME: Should be derived from the scheduling model.
848 if (OpRegBankIdx[0] != PMI_FirstGPR) {
849 Cost = 2;
850 break;
851 }
852
853 if (cast<GLoad>(MI).isAtomic()) {
854 // Atomics always use GPR destinations. Don't refine any further.
855 OpRegBankIdx[0] = PMI_FirstGPR;
856 break;
857 }
858
859 // Try to guess the type of the load from the MMO.
860 if (isLoadFromFPType(MI)) {
861 OpRegBankIdx[0] = PMI_FirstFPR;
862 break;
863 }
864
865 // Check if that load feeds fp instructions.
866 // In that case, we want the default mapping to be on FPR
867 // instead of blind map every scalar to GPR.
868 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
869 [&](const MachineInstr &UseMI) {
870 // If we have at least one direct use in a FP instruction,
871 // assume this was a floating point load in the IR. If it was
872 // not, we would have had a bitcast before reaching that
873 // instruction.
874 //
875 // Int->FP conversion operations are also captured in
876 // onlyDefinesFP().
877 return onlyUsesFP(UseMI, MRI, TRI) ||
878 onlyDefinesFP(UseMI, MRI, TRI);
879 }))
880 OpRegBankIdx[0] = PMI_FirstFPR;
881 break;
882 }
883 case TargetOpcode::G_STORE:
884 // Check if that store is fed by fp instructions.
885 if (OpRegBankIdx[0] == PMI_FirstGPR) {
886 Register VReg = MI.getOperand(0).getReg();
887 if (!VReg)
888 break;
889 MachineInstr *DefMI = MRI.getVRegDef(VReg);
890 if (onlyDefinesFP(*DefMI, MRI, TRI))
891 OpRegBankIdx[0] = PMI_FirstFPR;
892 break;
893 }
894 break;
895 case TargetOpcode::G_INDEXED_STORE:
896 if (OpRegBankIdx[1] == PMI_FirstGPR) {
897 Register VReg = MI.getOperand(1).getReg();
898 if (!VReg)
899 break;
900 MachineInstr *DefMI = MRI.getVRegDef(VReg);
901 if (onlyDefinesFP(*DefMI, MRI, TRI))
902 OpRegBankIdx[1] = PMI_FirstFPR;
903 break;
904 }
905 break;
906 case TargetOpcode::G_INDEXED_SEXTLOAD:
907 case TargetOpcode::G_INDEXED_ZEXTLOAD:
908 // These should always be GPR.
909 OpRegBankIdx[0] = PMI_FirstGPR;
910 break;
911 case TargetOpcode::G_INDEXED_LOAD: {
912 if (isLoadFromFPType(MI))
913 OpRegBankIdx[0] = PMI_FirstFPR;
914 break;
915 }
916 case TargetOpcode::G_SELECT: {
917 // If the destination is FPR, preserve that.
918 if (OpRegBankIdx[0] != PMI_FirstGPR)
919 break;
920
921 // If we're taking in vectors, we have no choice but to put everything on
922 // FPRs, except for the condition. The condition must always be on a GPR.
923 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
924 if (SrcTy.isVector()) {
925 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
926 break;
927 }
928
929 // Try to minimize the number of copies. If we have more floating point
930 // constrained values than not, then we'll put everything on FPR. Otherwise,
931 // everything has to be on GPR.
932 unsigned NumFP = 0;
933
934 // Check if the uses of the result always produce floating point values.
935 //
936 // For example:
937 //
938 // %z = G_SELECT %cond %x %y
939 // fpr = G_FOO %z ...
940 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
941 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
942 ++NumFP;
943
944 // Check if the defs of the source values always produce floating point
945 // values.
946 //
947 // For example:
948 //
949 // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
950 // %z = G_SELECT %cond %x %y
951 //
952 // Also check whether or not the sources have already been decided to be
953 // FPR. Keep track of this.
954 //
955 // This doesn't check the condition, since it's just whatever is in NZCV.
956 // This isn't passed explicitly in a register to fcsel/csel.
957 for (unsigned Idx = 2; Idx < 4; ++Idx) {
958 Register VReg = MI.getOperand(Idx).getReg();
959 MachineInstr *DefMI = MRI.getVRegDef(VReg);
960 if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
961 onlyDefinesFP(*DefMI, MRI, TRI))
962 ++NumFP;
963 }
964
965 // If we have more FP constraints than not, then move everything over to
966 // FPR.
967 if (NumFP >= 2)
968 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
969
970 break;
971 }
972 case TargetOpcode::G_UNMERGE_VALUES: {
973 // If the first operand belongs to a FPR register bank, then make sure that
974 // we preserve that.
975 if (OpRegBankIdx[0] != PMI_FirstGPR)
976 break;
977
978 LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
979 // UNMERGE into scalars from a vector should always use FPR.
980 // Likewise if any of the uses are FP instructions.
981 if (SrcTy.isVector() || SrcTy == LLT::scalar(128) ||
982 any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
983 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
984 // Set the register bank of every operand to FPR.
985 for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
986 Idx < NumOperands; ++Idx)
987 OpRegBankIdx[Idx] = PMI_FirstFPR;
988 }
989 break;
990 }
991 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
992 // Destination and source need to be FPRs.
993 OpRegBankIdx[0] = PMI_FirstFPR;
994 OpRegBankIdx[1] = PMI_FirstFPR;
995
996 // Index needs to be a GPR.
997 OpRegBankIdx[2] = PMI_FirstGPR;
998 break;
999 case TargetOpcode::G_INSERT_VECTOR_ELT:
1000 OpRegBankIdx[0] = PMI_FirstFPR;
1001 OpRegBankIdx[1] = PMI_FirstFPR;
1002
1003 // The element may be either a GPR or FPR. Preserve that behaviour.
1004 if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank)
1005 OpRegBankIdx[2] = PMI_FirstFPR;
1006 else
1007 OpRegBankIdx[2] = PMI_FirstGPR;
1008
1009 // Index needs to be a GPR.
1010 OpRegBankIdx[3] = PMI_FirstGPR;
1011 break;
1012 case TargetOpcode::G_EXTRACT: {
1013 // For s128 sources we have to use fpr unless we know otherwise.
1014 auto Src = MI.getOperand(1).getReg();
1015 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
1016 if (SrcTy.getSizeInBits() != 128)
1017 break;
1018 auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass
1019 ? PMI_FirstGPR
1020 : PMI_FirstFPR;
1021 OpRegBankIdx[0] = Idx;
1022 OpRegBankIdx[1] = Idx;
1023 break;
1024 }
1025 case TargetOpcode::G_BUILD_VECTOR: {
1026 // If the first source operand belongs to a FPR register bank, then make
1027 // sure that we preserve that.
1028 if (OpRegBankIdx[1] != PMI_FirstGPR)
1029 break;
1030 Register VReg = MI.getOperand(1).getReg();
1031 if (!VReg)
1032 break;
1033
1034 // Get the instruction that defined the source operand reg, and check if
1035 // it's a floating point operation. Or, if it's a type like s16 which
1036 // doesn't have a exact size gpr register class. The exception is if the
1037 // build_vector has all constant operands, which may be better to leave as
1038 // gpr without copies, so it can be matched in imported patterns.
1039 MachineInstr *DefMI = MRI.getVRegDef(VReg);
1040 unsigned DefOpc = DefMI->getOpcode();
1041 const LLT SrcTy = MRI.getType(VReg);
1042 if (all_of(MI.operands(), [&](const MachineOperand &Op) {
1043 return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() ==
1044 TargetOpcode::G_CONSTANT;
1045 }))
1046 break;
1047 if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
1048 SrcTy.getSizeInBits() < 32 ||
1049 getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) {
1050 // Have a floating point op.
1051 // Make sure every operand gets mapped to a FPR register class.
1052 unsigned NumOperands = MI.getNumOperands();
1053 for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
1054 OpRegBankIdx[Idx] = PMI_FirstFPR;
1055 }
1056 break;
1057 }
1058 case TargetOpcode::G_VECREDUCE_FADD:
1059 case TargetOpcode::G_VECREDUCE_FMUL:
1060 case TargetOpcode::G_VECREDUCE_FMAX:
1061 case TargetOpcode::G_VECREDUCE_FMIN:
1062 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
1063 case TargetOpcode::G_VECREDUCE_FMINIMUM:
1064 case TargetOpcode::G_VECREDUCE_ADD:
1065 case TargetOpcode::G_VECREDUCE_MUL:
1066 case TargetOpcode::G_VECREDUCE_AND:
1067 case TargetOpcode::G_VECREDUCE_OR:
1068 case TargetOpcode::G_VECREDUCE_XOR:
1069 case TargetOpcode::G_VECREDUCE_SMAX:
1070 case TargetOpcode::G_VECREDUCE_SMIN:
1071 case TargetOpcode::G_VECREDUCE_UMAX:
1072 case TargetOpcode::G_VECREDUCE_UMIN:
1073 // Reductions produce a scalar value from a vector, the scalar should be on
1074 // FPR bank.
1075 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
1076 break;
1077 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
1078 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
1079 // These reductions also take a scalar accumulator input.
1080 // Assign them FPR for now.
1081 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
1082 break;
1083 case TargetOpcode::G_INTRINSIC:
1084 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
1085 // Check if we know that the intrinsic has any constraints on its register
1086 // banks. If it does, then update the mapping accordingly.
1087 unsigned Idx = 0;
1088 if (onlyDefinesFP(MI, MRI, TRI))
1089 for (const auto &Op : MI.defs()) {
1090 if (Op.isReg())
1091 OpRegBankIdx[Idx] = PMI_FirstFPR;
1092 ++Idx;
1093 }
1094 else
1095 Idx += MI.getNumExplicitDefs();
1096
1097 if (onlyUsesFP(MI, MRI, TRI))
1098 for (const auto &Op : MI.explicit_uses()) {
1099 if (Op.isReg())
1100 OpRegBankIdx[Idx] = PMI_FirstFPR;
1101 ++Idx;
1102 }
1103 break;
1104 }
1105 case TargetOpcode::G_LROUND:
1106 case TargetOpcode::G_LLROUND: {
1107 // Source is always floating point and destination is always integer.
1108 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
1109 break;
1110 }
1111 }
1112
1113 // Finally construct the computed mapping.
1114 SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
1115 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
1116 if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) {
1117 LLT Ty = MRI.getType(MI.getOperand(Idx).getReg());
1118 if (!Ty.isValid())
1119 continue;
1120 auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
1121 if (!Mapping->isValid())
1122 return getInvalidInstructionMapping();
1123
1124 OpdsMapping[Idx] = Mapping;
1125 }
1126 }
1127
1128 return getInstructionMapping(DefaultMappingID, Cost,
1129 getOperandsMapping(OpdsMapping), NumOperands);
1130 }
1131