1 //===- AArch64RegisterBankInfo.cpp ----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the RegisterBankInfo class for 10 /// AArch64. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64RegisterBankInfo.h" 15 #include "AArch64InstrInfo.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/CodeGen/GlobalISel/RegisterBank.h" 19 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" 20 #include "llvm/CodeGen/GlobalISel/Utils.h" 21 #include "llvm/CodeGen/LowLevelType.h" 22 #include "llvm/CodeGen/MachineFunction.h" 23 #include "llvm/CodeGen/MachineInstr.h" 24 #include "llvm/CodeGen/MachineOperand.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/TargetOpcodes.h" 27 #include "llvm/CodeGen/TargetRegisterInfo.h" 28 #include "llvm/CodeGen/TargetSubtargetInfo.h" 29 #include "llvm/IR/IntrinsicsAArch64.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include <algorithm> 32 #include <cassert> 33 34 #define GET_TARGET_REGBANK_IMPL 35 #include "AArch64GenRegisterBank.inc" 36 37 // This file will be TableGen'ed at some point. 38 #include "AArch64GenRegisterBankInfo.def" 39 40 using namespace llvm; 41 42 AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI) 43 : AArch64GenRegisterBankInfo() { 44 static llvm::once_flag InitializeRegisterBankFlag; 45 46 static auto InitializeRegisterBankOnce = [&]() { 47 // We have only one set of register banks, whatever the subtarget 48 // is. Therefore, the initialization of the RegBanks table should be 49 // done only once. Indeed the table of all register banks 50 // (AArch64::RegBanks) is unique in the compiler. At some point, it 51 // will get tablegen'ed and the whole constructor becomes empty. 52 53 const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID); 54 (void)RBGPR; 55 assert(&AArch64::GPRRegBank == &RBGPR && 56 "The order in RegBanks is messed up"); 57 58 const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID); 59 (void)RBFPR; 60 assert(&AArch64::FPRRegBank == &RBFPR && 61 "The order in RegBanks is messed up"); 62 63 const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID); 64 (void)RBCCR; 65 assert(&AArch64::CCRegBank == &RBCCR && 66 "The order in RegBanks is messed up"); 67 68 // The GPR register bank is fully defined by all the registers in 69 // GR64all + its subclasses. 70 assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) && 71 "Subclass not added?"); 72 assert(RBGPR.getSize() == 128 && "GPRs should hold up to 128-bit"); 73 74 // The FPR register bank is fully defined by all the registers in 75 // GR64all + its subclasses. 76 assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) && 77 "Subclass not added?"); 78 assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) && 79 "Subclass not added?"); 80 assert(RBFPR.getSize() == 512 && 81 "FPRs should hold up to 512-bit via QQQQ sequence"); 82 83 assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) && 84 "Class not added?"); 85 assert(RBCCR.getSize() == 32 && "CCR should hold up to 32-bit"); 86 87 // Check that the TableGen'ed like file is in sync we our expectations. 88 // First, the Idx. 89 assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR, 90 {PMI_GPR32, PMI_GPR64, PMI_GPR128}) && 91 "PartialMappingIdx's are incorrectly ordered"); 92 assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR, 93 {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128, 94 PMI_FPR256, PMI_FPR512}) && 95 "PartialMappingIdx's are incorrectly ordered"); 96 // Now, the content. 97 // Check partial mapping. 98 #define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB) \ 99 do { \ 100 assert( \ 101 checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \ 102 #Idx " is incorrectly initialized"); \ 103 } while (false) 104 105 CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR); 106 CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR); 107 CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR); 108 CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR); 109 CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR); 110 CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR); 111 CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR); 112 CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR); 113 CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR); 114 115 // Check value mapping. 116 #define CHECK_VALUEMAP_IMPL(RBName, Size, Offset) \ 117 do { \ 118 assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size, \ 119 PartialMappingIdx::PMI_First##RBName, Size, \ 120 Offset) && \ 121 #RBName #Size " " #Offset " is incorrectly initialized"); \ 122 } while (false) 123 124 #define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0) 125 126 CHECK_VALUEMAP(GPR, 32); 127 CHECK_VALUEMAP(GPR, 64); 128 CHECK_VALUEMAP(GPR, 128); 129 CHECK_VALUEMAP(FPR, 16); 130 CHECK_VALUEMAP(FPR, 32); 131 CHECK_VALUEMAP(FPR, 64); 132 CHECK_VALUEMAP(FPR, 128); 133 CHECK_VALUEMAP(FPR, 256); 134 CHECK_VALUEMAP(FPR, 512); 135 136 // Check the value mapping for 3-operands instructions where all the operands 137 // map to the same value mapping. 138 #define CHECK_VALUEMAP_3OPS(RBName, Size) \ 139 do { \ 140 CHECK_VALUEMAP_IMPL(RBName, Size, 0); \ 141 CHECK_VALUEMAP_IMPL(RBName, Size, 1); \ 142 CHECK_VALUEMAP_IMPL(RBName, Size, 2); \ 143 } while (false) 144 145 CHECK_VALUEMAP_3OPS(GPR, 32); 146 CHECK_VALUEMAP_3OPS(GPR, 64); 147 CHECK_VALUEMAP_3OPS(GPR, 128); 148 CHECK_VALUEMAP_3OPS(FPR, 32); 149 CHECK_VALUEMAP_3OPS(FPR, 64); 150 CHECK_VALUEMAP_3OPS(FPR, 128); 151 CHECK_VALUEMAP_3OPS(FPR, 256); 152 CHECK_VALUEMAP_3OPS(FPR, 512); 153 154 #define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size) \ 155 do { \ 156 unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min; \ 157 unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \ 158 (void)PartialMapDstIdx; \ 159 (void)PartialMapSrcIdx; \ 160 const ValueMapping *Map = getCopyMapping( \ 161 AArch64::RBNameDst##RegBankID, AArch64::RBNameSrc##RegBankID, Size); \ 162 (void)Map; \ 163 assert(Map[0].BreakDown == \ 164 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \ 165 Map[0].NumBreakDowns == 1 && #RBNameDst #Size \ 166 " Dst is incorrectly initialized"); \ 167 assert(Map[1].BreakDown == \ 168 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \ 169 Map[1].NumBreakDowns == 1 && #RBNameSrc #Size \ 170 " Src is incorrectly initialized"); \ 171 \ 172 } while (false) 173 174 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32); 175 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32); 176 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64); 177 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64); 178 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32); 179 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32); 180 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64); 181 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64); 182 183 #define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize) \ 184 do { \ 185 unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min; \ 186 unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min; \ 187 (void)PartialMapDstIdx; \ 188 (void)PartialMapSrcIdx; \ 189 const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize); \ 190 (void)Map; \ 191 assert(Map[0].BreakDown == \ 192 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \ 193 Map[0].NumBreakDowns == 1 && "FPR" #DstSize \ 194 " Dst is incorrectly initialized"); \ 195 assert(Map[1].BreakDown == \ 196 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \ 197 Map[1].NumBreakDowns == 1 && "FPR" #SrcSize \ 198 " Src is incorrectly initialized"); \ 199 \ 200 } while (false) 201 202 CHECK_VALUEMAP_FPEXT(32, 16); 203 CHECK_VALUEMAP_FPEXT(64, 16); 204 CHECK_VALUEMAP_FPEXT(64, 32); 205 CHECK_VALUEMAP_FPEXT(128, 64); 206 207 assert(verify(TRI) && "Invalid register bank information"); 208 }; 209 210 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce); 211 } 212 213 unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A, 214 const RegisterBank &B, 215 unsigned Size) const { 216 // What do we do with different size? 217 // copy are same size. 218 // Will introduce other hooks for different size: 219 // * extract cost. 220 // * build_sequence cost. 221 222 // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV. 223 // FIXME: This should be deduced from the scheduling model. 224 if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank) 225 // FMOVXDr or FMOVWSr. 226 return 5; 227 if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank) 228 // FMOVDXr or FMOVSWr. 229 return 4; 230 231 return RegisterBankInfo::copyCost(A, B, Size); 232 } 233 234 const RegisterBank & 235 AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, 236 LLT) const { 237 switch (RC.getID()) { 238 case AArch64::FPR8RegClassID: 239 case AArch64::FPR16RegClassID: 240 case AArch64::FPR16_loRegClassID: 241 case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID: 242 case AArch64::FPR32RegClassID: 243 case AArch64::FPR64RegClassID: 244 case AArch64::FPR64_loRegClassID: 245 case AArch64::FPR128RegClassID: 246 case AArch64::FPR128_loRegClassID: 247 case AArch64::DDRegClassID: 248 case AArch64::DDDRegClassID: 249 case AArch64::DDDDRegClassID: 250 case AArch64::QQRegClassID: 251 case AArch64::QQQRegClassID: 252 case AArch64::QQQQRegClassID: 253 return getRegBank(AArch64::FPRRegBankID); 254 case AArch64::GPR32commonRegClassID: 255 case AArch64::GPR32RegClassID: 256 case AArch64::GPR32spRegClassID: 257 case AArch64::GPR32sponlyRegClassID: 258 case AArch64::GPR32argRegClassID: 259 case AArch64::GPR32allRegClassID: 260 case AArch64::GPR64commonRegClassID: 261 case AArch64::GPR64RegClassID: 262 case AArch64::GPR64spRegClassID: 263 case AArch64::GPR64sponlyRegClassID: 264 case AArch64::GPR64argRegClassID: 265 case AArch64::GPR64allRegClassID: 266 case AArch64::GPR64noipRegClassID: 267 case AArch64::GPR64common_and_GPR64noipRegClassID: 268 case AArch64::GPR64noip_and_tcGPR64RegClassID: 269 case AArch64::tcGPR64RegClassID: 270 case AArch64::rtcGPR64RegClassID: 271 case AArch64::WSeqPairsClassRegClassID: 272 case AArch64::XSeqPairsClassRegClassID: 273 case AArch64::MatrixIndexGPR32_12_15RegClassID: 274 return getRegBank(AArch64::GPRRegBankID); 275 case AArch64::CCRRegClassID: 276 return getRegBank(AArch64::CCRegBankID); 277 default: 278 llvm_unreachable("Register class not supported"); 279 } 280 } 281 282 RegisterBankInfo::InstructionMappings 283 AArch64RegisterBankInfo::getInstrAlternativeMappings( 284 const MachineInstr &MI) const { 285 const MachineFunction &MF = *MI.getParent()->getParent(); 286 const TargetSubtargetInfo &STI = MF.getSubtarget(); 287 const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 288 const MachineRegisterInfo &MRI = MF.getRegInfo(); 289 290 switch (MI.getOpcode()) { 291 case TargetOpcode::G_OR: { 292 // 32 and 64-bit or can be mapped on either FPR or 293 // GPR for the same cost. 294 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); 295 if (Size != 32 && Size != 64) 296 break; 297 298 // If the instruction has any implicit-defs or uses, 299 // do not mess with it. 300 if (MI.getNumOperands() != 3) 301 break; 302 InstructionMappings AltMappings; 303 const InstructionMapping &GPRMapping = getInstructionMapping( 304 /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size), 305 /*NumOperands*/ 3); 306 const InstructionMapping &FPRMapping = getInstructionMapping( 307 /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size), 308 /*NumOperands*/ 3); 309 310 AltMappings.push_back(&GPRMapping); 311 AltMappings.push_back(&FPRMapping); 312 return AltMappings; 313 } 314 case TargetOpcode::G_BITCAST: { 315 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); 316 if (Size != 32 && Size != 64) 317 break; 318 319 // If the instruction has any implicit-defs or uses, 320 // do not mess with it. 321 if (MI.getNumOperands() != 2) 322 break; 323 324 InstructionMappings AltMappings; 325 const InstructionMapping &GPRMapping = getInstructionMapping( 326 /*ID*/ 1, /*Cost*/ 1, 327 getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size), 328 /*NumOperands*/ 2); 329 const InstructionMapping &FPRMapping = getInstructionMapping( 330 /*ID*/ 2, /*Cost*/ 1, 331 getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size), 332 /*NumOperands*/ 2); 333 const InstructionMapping &GPRToFPRMapping = getInstructionMapping( 334 /*ID*/ 3, 335 /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size), 336 getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size), 337 /*NumOperands*/ 2); 338 const InstructionMapping &FPRToGPRMapping = getInstructionMapping( 339 /*ID*/ 3, 340 /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size), 341 getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size), 342 /*NumOperands*/ 2); 343 344 AltMappings.push_back(&GPRMapping); 345 AltMappings.push_back(&FPRMapping); 346 AltMappings.push_back(&GPRToFPRMapping); 347 AltMappings.push_back(&FPRToGPRMapping); 348 return AltMappings; 349 } 350 case TargetOpcode::G_LOAD: { 351 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); 352 if (Size != 64) 353 break; 354 355 // If the instruction has any implicit-defs or uses, 356 // do not mess with it. 357 if (MI.getNumOperands() != 2) 358 break; 359 360 InstructionMappings AltMappings; 361 const InstructionMapping &GPRMapping = getInstructionMapping( 362 /*ID*/ 1, /*Cost*/ 1, 363 getOperandsMapping({getValueMapping(PMI_FirstGPR, Size), 364 // Addresses are GPR 64-bit. 365 getValueMapping(PMI_FirstGPR, 64)}), 366 /*NumOperands*/ 2); 367 const InstructionMapping &FPRMapping = getInstructionMapping( 368 /*ID*/ 2, /*Cost*/ 1, 369 getOperandsMapping({getValueMapping(PMI_FirstFPR, Size), 370 // Addresses are GPR 64-bit. 371 getValueMapping(PMI_FirstGPR, 64)}), 372 /*NumOperands*/ 2); 373 374 AltMappings.push_back(&GPRMapping); 375 AltMappings.push_back(&FPRMapping); 376 return AltMappings; 377 } 378 default: 379 break; 380 } 381 return RegisterBankInfo::getInstrAlternativeMappings(MI); 382 } 383 384 void AArch64RegisterBankInfo::applyMappingImpl( 385 const OperandsMapper &OpdMapper) const { 386 switch (OpdMapper.getMI().getOpcode()) { 387 case TargetOpcode::G_OR: 388 case TargetOpcode::G_BITCAST: 389 case TargetOpcode::G_LOAD: 390 // Those ID must match getInstrAlternativeMappings. 391 assert((OpdMapper.getInstrMapping().getID() >= 1 && 392 OpdMapper.getInstrMapping().getID() <= 4) && 393 "Don't know how to handle that ID"); 394 return applyDefaultMapping(OpdMapper); 395 default: 396 llvm_unreachable("Don't know how to handle that operation"); 397 } 398 } 399 400 /// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode, 401 /// having only floating-point operands. 402 static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) { 403 switch (Opc) { 404 case TargetOpcode::G_FADD: 405 case TargetOpcode::G_FSUB: 406 case TargetOpcode::G_FMUL: 407 case TargetOpcode::G_FMA: 408 case TargetOpcode::G_FDIV: 409 case TargetOpcode::G_FCONSTANT: 410 case TargetOpcode::G_FPEXT: 411 case TargetOpcode::G_FPTRUNC: 412 case TargetOpcode::G_FCEIL: 413 case TargetOpcode::G_FFLOOR: 414 case TargetOpcode::G_FNEARBYINT: 415 case TargetOpcode::G_FNEG: 416 case TargetOpcode::G_FCOS: 417 case TargetOpcode::G_FSIN: 418 case TargetOpcode::G_FLOG10: 419 case TargetOpcode::G_FLOG: 420 case TargetOpcode::G_FLOG2: 421 case TargetOpcode::G_FSQRT: 422 case TargetOpcode::G_FABS: 423 case TargetOpcode::G_FEXP: 424 case TargetOpcode::G_FRINT: 425 case TargetOpcode::G_INTRINSIC_TRUNC: 426 case TargetOpcode::G_INTRINSIC_ROUND: 427 return true; 428 } 429 return false; 430 } 431 432 const RegisterBankInfo::InstructionMapping & 433 AArch64RegisterBankInfo::getSameKindOfOperandsMapping( 434 const MachineInstr &MI) const { 435 const unsigned Opc = MI.getOpcode(); 436 const MachineFunction &MF = *MI.getParent()->getParent(); 437 const MachineRegisterInfo &MRI = MF.getRegInfo(); 438 439 unsigned NumOperands = MI.getNumOperands(); 440 assert(NumOperands <= 3 && 441 "This code is for instructions with 3 or less operands"); 442 443 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 444 unsigned Size = Ty.getSizeInBits(); 445 bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc); 446 447 PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR; 448 449 #ifndef NDEBUG 450 // Make sure all the operands are using similar size and type. 451 // Should probably be checked by the machine verifier. 452 // This code won't catch cases where the number of lanes is 453 // different between the operands. 454 // If we want to go to that level of details, it is probably 455 // best to check that the types are the same, period. 456 // Currently, we just check that the register banks are the same 457 // for each types. 458 for (unsigned Idx = 1; Idx != NumOperands; ++Idx) { 459 LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg()); 460 assert( 461 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset( 462 RBIdx, OpTy.getSizeInBits()) == 463 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) && 464 "Operand has incompatible size"); 465 bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc); 466 (void)OpIsFPR; 467 assert(IsFPR == OpIsFPR && "Operand has incompatible type"); 468 } 469 #endif // End NDEBUG. 470 471 return getInstructionMapping(DefaultMappingID, 1, 472 getValueMapping(RBIdx, Size), NumOperands); 473 } 474 475 /// \returns true if a given intrinsic \p ID only uses and defines FPRs. 476 static bool isFPIntrinsic(unsigned ID) { 477 // TODO: Add more intrinsics. 478 switch (ID) { 479 default: 480 return false; 481 case Intrinsic::aarch64_neon_uaddlv: 482 return true; 483 } 484 } 485 486 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI, 487 const MachineRegisterInfo &MRI, 488 const TargetRegisterInfo &TRI, 489 unsigned Depth) const { 490 unsigned Op = MI.getOpcode(); 491 if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MI.getIntrinsicID())) 492 return true; 493 494 // Do we have an explicit floating point instruction? 495 if (isPreISelGenericFloatingPointOpcode(Op)) 496 return true; 497 498 // No. Check if we have a copy-like instruction. If we do, then we could 499 // still be fed by floating point instructions. 500 if (Op != TargetOpcode::COPY && !MI.isPHI() && 501 !isPreISelGenericOptimizationHint(Op)) 502 return false; 503 504 // Check if we already know the register bank. 505 auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI); 506 if (RB == &AArch64::FPRRegBank) 507 return true; 508 if (RB == &AArch64::GPRRegBank) 509 return false; 510 511 // We don't know anything. 512 // 513 // If we have a phi, we may be able to infer that it will be assigned a FPR 514 // based off of its inputs. 515 if (!MI.isPHI() || Depth > MaxFPRSearchDepth) 516 return false; 517 518 return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) { 519 return Op.isReg() && 520 onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1); 521 }); 522 } 523 524 bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, 525 const MachineRegisterInfo &MRI, 526 const TargetRegisterInfo &TRI, 527 unsigned Depth) const { 528 switch (MI.getOpcode()) { 529 case TargetOpcode::G_FPTOSI: 530 case TargetOpcode::G_FPTOUI: 531 case TargetOpcode::G_FCMP: 532 return true; 533 default: 534 break; 535 } 536 return hasFPConstraints(MI, MRI, TRI, Depth); 537 } 538 539 bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, 540 const MachineRegisterInfo &MRI, 541 const TargetRegisterInfo &TRI, 542 unsigned Depth) const { 543 switch (MI.getOpcode()) { 544 case AArch64::G_DUP: 545 case TargetOpcode::G_SITOFP: 546 case TargetOpcode::G_UITOFP: 547 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 548 case TargetOpcode::G_INSERT_VECTOR_ELT: 549 case TargetOpcode::G_BUILD_VECTOR: 550 case TargetOpcode::G_BUILD_VECTOR_TRUNC: 551 return true; 552 default: 553 break; 554 } 555 return hasFPConstraints(MI, MRI, TRI, Depth); 556 } 557 558 const RegisterBankInfo::InstructionMapping & 559 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { 560 const unsigned Opc = MI.getOpcode(); 561 562 // Try the default logic for non-generic instructions that are either copies 563 // or already have some operands assigned to banks. 564 if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) || 565 Opc == TargetOpcode::G_PHI) { 566 const RegisterBankInfo::InstructionMapping &Mapping = 567 getInstrMappingImpl(MI); 568 if (Mapping.isValid()) 569 return Mapping; 570 } 571 572 const MachineFunction &MF = *MI.getParent()->getParent(); 573 const MachineRegisterInfo &MRI = MF.getRegInfo(); 574 const TargetSubtargetInfo &STI = MF.getSubtarget(); 575 const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 576 577 switch (Opc) { 578 // G_{F|S|U}REM are not listed because they are not legal. 579 // Arithmetic ops. 580 case TargetOpcode::G_ADD: 581 case TargetOpcode::G_SUB: 582 case TargetOpcode::G_PTR_ADD: 583 case TargetOpcode::G_MUL: 584 case TargetOpcode::G_SDIV: 585 case TargetOpcode::G_UDIV: 586 // Bitwise ops. 587 case TargetOpcode::G_AND: 588 case TargetOpcode::G_OR: 589 case TargetOpcode::G_XOR: 590 // Floating point ops. 591 case TargetOpcode::G_FADD: 592 case TargetOpcode::G_FSUB: 593 case TargetOpcode::G_FMUL: 594 case TargetOpcode::G_FDIV: 595 return getSameKindOfOperandsMapping(MI); 596 case TargetOpcode::G_FPEXT: { 597 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 598 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 599 return getInstructionMapping( 600 DefaultMappingID, /*Cost*/ 1, 601 getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()), 602 /*NumOperands*/ 2); 603 } 604 // Shifts. 605 case TargetOpcode::G_SHL: 606 case TargetOpcode::G_LSHR: 607 case TargetOpcode::G_ASHR: { 608 LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg()); 609 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 610 if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32) 611 return getInstructionMapping(DefaultMappingID, 1, 612 &ValMappings[Shift64Imm], 3); 613 return getSameKindOfOperandsMapping(MI); 614 } 615 case TargetOpcode::COPY: { 616 Register DstReg = MI.getOperand(0).getReg(); 617 Register SrcReg = MI.getOperand(1).getReg(); 618 // Check if one of the register is not a generic register. 619 if ((Register::isPhysicalRegister(DstReg) || 620 !MRI.getType(DstReg).isValid()) || 621 (Register::isPhysicalRegister(SrcReg) || 622 !MRI.getType(SrcReg).isValid())) { 623 const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI); 624 const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI); 625 if (!DstRB) 626 DstRB = SrcRB; 627 else if (!SrcRB) 628 SrcRB = DstRB; 629 // If both RB are null that means both registers are generic. 630 // We shouldn't be here. 631 assert(DstRB && SrcRB && "Both RegBank were nullptr"); 632 unsigned Size = getSizeInBits(DstReg, MRI, TRI); 633 return getInstructionMapping( 634 DefaultMappingID, copyCost(*DstRB, *SrcRB, Size), 635 getCopyMapping(DstRB->getID(), SrcRB->getID(), Size), 636 // We only care about the mapping of the destination. 637 /*NumOperands*/ 1); 638 } 639 // Both registers are generic, use G_BITCAST. 640 LLVM_FALLTHROUGH; 641 } 642 case TargetOpcode::G_BITCAST: { 643 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 644 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 645 unsigned Size = DstTy.getSizeInBits(); 646 bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64; 647 bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64; 648 const RegisterBank &DstRB = 649 DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; 650 const RegisterBank &SrcRB = 651 SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; 652 return getInstructionMapping( 653 DefaultMappingID, copyCost(DstRB, SrcRB, Size), 654 getCopyMapping(DstRB.getID(), SrcRB.getID(), Size), 655 // We only care about the mapping of the destination for COPY. 656 /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1); 657 } 658 default: 659 break; 660 } 661 662 unsigned NumOperands = MI.getNumOperands(); 663 664 // Track the size and bank of each register. We don't do partial mappings. 665 SmallVector<unsigned, 4> OpSize(NumOperands); 666 SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands); 667 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { 668 auto &MO = MI.getOperand(Idx); 669 if (!MO.isReg() || !MO.getReg()) 670 continue; 671 672 LLT Ty = MRI.getType(MO.getReg()); 673 OpSize[Idx] = Ty.getSizeInBits(); 674 675 // As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs. 676 // For floating-point instructions, scalars go in FPRs. 677 if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc) || 678 Ty.getSizeInBits() > 64) 679 OpRegBankIdx[Idx] = PMI_FirstFPR; 680 else 681 OpRegBankIdx[Idx] = PMI_FirstGPR; 682 } 683 684 unsigned Cost = 1; 685 // Some of the floating-point instructions have mixed GPR and FPR operands: 686 // fine-tune the computed mapping. 687 switch (Opc) { 688 case AArch64::G_DUP: { 689 Register ScalarReg = MI.getOperand(1).getReg(); 690 LLT ScalarTy = MRI.getType(ScalarReg); 691 auto ScalarDef = MRI.getVRegDef(ScalarReg); 692 // s8 is an exception for G_DUP, which we always want on gpr. 693 if (ScalarTy.getSizeInBits() != 8 && 694 (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank || 695 onlyDefinesFP(*ScalarDef, MRI, TRI))) 696 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 697 else 698 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; 699 break; 700 } 701 case TargetOpcode::G_TRUNC: { 702 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 703 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) 704 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 705 break; 706 } 707 case TargetOpcode::G_SITOFP: 708 case TargetOpcode::G_UITOFP: { 709 if (MRI.getType(MI.getOperand(0).getReg()).isVector()) 710 break; 711 // Integer to FP conversions don't necessarily happen between GPR -> FPR 712 // regbanks. They can also be done within an FPR register. 713 Register SrcReg = MI.getOperand(1).getReg(); 714 if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank) 715 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 716 else 717 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; 718 break; 719 } 720 case TargetOpcode::G_FPTOSI: 721 case TargetOpcode::G_FPTOUI: 722 if (MRI.getType(MI.getOperand(0).getReg()).isVector()) 723 break; 724 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; 725 break; 726 case TargetOpcode::G_FCMP: { 727 // If the result is a vector, it must use a FPR. 728 AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 = 729 MRI.getType(MI.getOperand(0).getReg()).isVector() ? PMI_FirstFPR 730 : PMI_FirstGPR; 731 OpRegBankIdx = {Idx0, 732 /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR}; 733 break; 734 } 735 case TargetOpcode::G_BITCAST: 736 // This is going to be a cross register bank copy and this is expensive. 737 if (OpRegBankIdx[0] != OpRegBankIdx[1]) 738 Cost = copyCost( 739 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank, 740 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank, 741 OpSize[0]); 742 break; 743 case TargetOpcode::G_LOAD: 744 // Loading in vector unit is slightly more expensive. 745 // This is actually only true for the LD1R and co instructions, 746 // but anyway for the fast mode this number does not matter and 747 // for the greedy mode the cost of the cross bank copy will 748 // offset this number. 749 // FIXME: Should be derived from the scheduling model. 750 if (OpRegBankIdx[0] != PMI_FirstGPR) 751 Cost = 2; 752 else 753 // Check if that load feeds fp instructions. 754 // In that case, we want the default mapping to be on FPR 755 // instead of blind map every scalar to GPR. 756 for (const MachineInstr &UseMI : 757 MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) { 758 // If we have at least one direct use in a FP instruction, 759 // assume this was a floating point load in the IR. 760 // If it was not, we would have had a bitcast before 761 // reaching that instruction. 762 // Int->FP conversion operations are also captured in onlyDefinesFP(). 763 if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) { 764 OpRegBankIdx[0] = PMI_FirstFPR; 765 break; 766 } 767 } 768 break; 769 case TargetOpcode::G_STORE: 770 // Check if that store is fed by fp instructions. 771 if (OpRegBankIdx[0] == PMI_FirstGPR) { 772 Register VReg = MI.getOperand(0).getReg(); 773 if (!VReg) 774 break; 775 MachineInstr *DefMI = MRI.getVRegDef(VReg); 776 if (onlyDefinesFP(*DefMI, MRI, TRI)) 777 OpRegBankIdx[0] = PMI_FirstFPR; 778 break; 779 } 780 break; 781 case TargetOpcode::G_SELECT: { 782 // If the destination is FPR, preserve that. 783 if (OpRegBankIdx[0] != PMI_FirstGPR) 784 break; 785 786 // If we're taking in vectors, we have no choice but to put everything on 787 // FPRs, except for the condition. The condition must always be on a GPR. 788 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg()); 789 if (SrcTy.isVector()) { 790 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR}; 791 break; 792 } 793 794 // Try to minimize the number of copies. If we have more floating point 795 // constrained values than not, then we'll put everything on FPR. Otherwise, 796 // everything has to be on GPR. 797 unsigned NumFP = 0; 798 799 // Check if the uses of the result always produce floating point values. 800 // 801 // For example: 802 // 803 // %z = G_SELECT %cond %x %y 804 // fpr = G_FOO %z ... 805 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 806 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) 807 ++NumFP; 808 809 // Check if the defs of the source values always produce floating point 810 // values. 811 // 812 // For example: 813 // 814 // %x = G_SOMETHING_ALWAYS_FLOAT %a ... 815 // %z = G_SELECT %cond %x %y 816 // 817 // Also check whether or not the sources have already been decided to be 818 // FPR. Keep track of this. 819 // 820 // This doesn't check the condition, since it's just whatever is in NZCV. 821 // This isn't passed explicitly in a register to fcsel/csel. 822 for (unsigned Idx = 2; Idx < 4; ++Idx) { 823 Register VReg = MI.getOperand(Idx).getReg(); 824 MachineInstr *DefMI = MRI.getVRegDef(VReg); 825 if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank || 826 onlyDefinesFP(*DefMI, MRI, TRI)) 827 ++NumFP; 828 } 829 830 // If we have more FP constraints than not, then move everything over to 831 // FPR. 832 if (NumFP >= 2) 833 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR}; 834 835 break; 836 } 837 case TargetOpcode::G_UNMERGE_VALUES: { 838 // If the first operand belongs to a FPR register bank, then make sure that 839 // we preserve that. 840 if (OpRegBankIdx[0] != PMI_FirstGPR) 841 break; 842 843 LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg()); 844 // UNMERGE into scalars from a vector should always use FPR. 845 // Likewise if any of the uses are FP instructions. 846 if (SrcTy.isVector() || SrcTy == LLT::scalar(128) || 847 any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 848 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) { 849 // Set the register bank of every operand to FPR. 850 for (unsigned Idx = 0, NumOperands = MI.getNumOperands(); 851 Idx < NumOperands; ++Idx) 852 OpRegBankIdx[Idx] = PMI_FirstFPR; 853 } 854 break; 855 } 856 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 857 // Destination and source need to be FPRs. 858 OpRegBankIdx[0] = PMI_FirstFPR; 859 OpRegBankIdx[1] = PMI_FirstFPR; 860 861 // Index needs to be a GPR. 862 OpRegBankIdx[2] = PMI_FirstGPR; 863 break; 864 case TargetOpcode::G_INSERT_VECTOR_ELT: 865 OpRegBankIdx[0] = PMI_FirstFPR; 866 OpRegBankIdx[1] = PMI_FirstFPR; 867 868 // The element may be either a GPR or FPR. Preserve that behaviour. 869 if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank) 870 OpRegBankIdx[2] = PMI_FirstFPR; 871 else 872 OpRegBankIdx[2] = PMI_FirstGPR; 873 874 // Index needs to be a GPR. 875 OpRegBankIdx[3] = PMI_FirstGPR; 876 break; 877 case TargetOpcode::G_EXTRACT: { 878 // For s128 sources we have to use fpr unless we know otherwise. 879 auto Src = MI.getOperand(1).getReg(); 880 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 881 if (SrcTy.getSizeInBits() != 128) 882 break; 883 auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass 884 ? PMI_FirstGPR 885 : PMI_FirstFPR; 886 OpRegBankIdx[0] = Idx; 887 OpRegBankIdx[1] = Idx; 888 break; 889 } 890 case TargetOpcode::G_BUILD_VECTOR: { 891 // If the first source operand belongs to a FPR register bank, then make 892 // sure that we preserve that. 893 if (OpRegBankIdx[1] != PMI_FirstGPR) 894 break; 895 Register VReg = MI.getOperand(1).getReg(); 896 if (!VReg) 897 break; 898 899 // Get the instruction that defined the source operand reg, and check if 900 // it's a floating point operation. Or, if it's a type like s16 which 901 // doesn't have a exact size gpr register class. The exception is if the 902 // build_vector has all constant operands, which may be better to leave as 903 // gpr without copies, so it can be matched in imported patterns. 904 MachineInstr *DefMI = MRI.getVRegDef(VReg); 905 unsigned DefOpc = DefMI->getOpcode(); 906 const LLT SrcTy = MRI.getType(VReg); 907 if (all_of(MI.operands(), [&](const MachineOperand &Op) { 908 return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() == 909 TargetOpcode::G_CONSTANT; 910 })) 911 break; 912 if (isPreISelGenericFloatingPointOpcode(DefOpc) || 913 SrcTy.getSizeInBits() < 32 || 914 getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) { 915 // Have a floating point op. 916 // Make sure every operand gets mapped to a FPR register class. 917 unsigned NumOperands = MI.getNumOperands(); 918 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) 919 OpRegBankIdx[Idx] = PMI_FirstFPR; 920 } 921 break; 922 } 923 case TargetOpcode::G_VECREDUCE_FADD: 924 case TargetOpcode::G_VECREDUCE_FMUL: 925 case TargetOpcode::G_VECREDUCE_FMAX: 926 case TargetOpcode::G_VECREDUCE_FMIN: 927 case TargetOpcode::G_VECREDUCE_ADD: 928 case TargetOpcode::G_VECREDUCE_MUL: 929 case TargetOpcode::G_VECREDUCE_AND: 930 case TargetOpcode::G_VECREDUCE_OR: 931 case TargetOpcode::G_VECREDUCE_XOR: 932 case TargetOpcode::G_VECREDUCE_SMAX: 933 case TargetOpcode::G_VECREDUCE_SMIN: 934 case TargetOpcode::G_VECREDUCE_UMAX: 935 case TargetOpcode::G_VECREDUCE_UMIN: 936 // Reductions produce a scalar value from a vector, the scalar should be on 937 // FPR bank. 938 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 939 break; 940 case TargetOpcode::G_VECREDUCE_SEQ_FADD: 941 case TargetOpcode::G_VECREDUCE_SEQ_FMUL: 942 // These reductions also take a scalar accumulator input. 943 // Assign them FPR for now. 944 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR}; 945 break; 946 case TargetOpcode::G_INTRINSIC: { 947 // Check if we know that the intrinsic has any constraints on its register 948 // banks. If it does, then update the mapping accordingly. 949 unsigned ID = MI.getIntrinsicID(); 950 unsigned Idx = 0; 951 if (!isFPIntrinsic(ID)) 952 break; 953 for (const auto &Op : MI.explicit_operands()) { 954 if (Op.isReg()) 955 OpRegBankIdx[Idx] = PMI_FirstFPR; 956 ++Idx; 957 } 958 break; 959 } 960 } 961 962 // Finally construct the computed mapping. 963 SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands); 964 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { 965 if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) { 966 auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]); 967 if (!Mapping->isValid()) 968 return getInvalidInstructionMapping(); 969 970 OpdsMapping[Idx] = Mapping; 971 } 972 } 973 974 return getInstructionMapping(DefaultMappingID, Cost, 975 getOperandsMapping(OpdsMapping), NumOperands); 976 } 977