1 //===- AArch64RegisterBankInfo.cpp ----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the RegisterBankInfo class for 10 /// AArch64. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64RegisterBankInfo.h" 15 #include "AArch64RegisterInfo.h" 16 #include "MCTargetDesc/AArch64MCTargetDesc.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 20 #include "llvm/CodeGen/GlobalISel/Utils.h" 21 #include "llvm/CodeGen/LowLevelType.h" 22 #include "llvm/CodeGen/MachineFunction.h" 23 #include "llvm/CodeGen/MachineInstr.h" 24 #include "llvm/CodeGen/MachineOperand.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/RegisterBank.h" 27 #include "llvm/CodeGen/RegisterBankInfo.h" 28 #include "llvm/CodeGen/TargetOpcodes.h" 29 #include "llvm/CodeGen/TargetRegisterInfo.h" 30 #include "llvm/CodeGen/TargetSubtargetInfo.h" 31 #include "llvm/IR/IntrinsicsAArch64.h" 32 #include "llvm/Support/ErrorHandling.h" 33 #include <algorithm> 34 #include <cassert> 35 36 #define GET_TARGET_REGBANK_IMPL 37 #include "AArch64GenRegisterBank.inc" 38 39 // This file will be TableGen'ed at some point. 40 #include "AArch64GenRegisterBankInfo.def" 41 42 using namespace llvm; 43 44 AArch64RegisterBankInfo::AArch64RegisterBankInfo( 45 const TargetRegisterInfo &TRI) { 46 static llvm::once_flag InitializeRegisterBankFlag; 47 48 static auto InitializeRegisterBankOnce = [&]() { 49 // We have only one set of register banks, whatever the subtarget 50 // is. Therefore, the initialization of the RegBanks table should be 51 // done only once. Indeed the table of all register banks 52 // (AArch64::RegBanks) is unique in the compiler. At some point, it 53 // will get tablegen'ed and the whole constructor becomes empty. 54 55 const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID); 56 (void)RBGPR; 57 assert(&AArch64::GPRRegBank == &RBGPR && 58 "The order in RegBanks is messed up"); 59 60 const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID); 61 (void)RBFPR; 62 assert(&AArch64::FPRRegBank == &RBFPR && 63 "The order in RegBanks is messed up"); 64 65 const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID); 66 (void)RBCCR; 67 assert(&AArch64::CCRegBank == &RBCCR && 68 "The order in RegBanks is messed up"); 69 70 // The GPR register bank is fully defined by all the registers in 71 // GR64all + its subclasses. 72 assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) && 73 "Subclass not added?"); 74 assert(RBGPR.getSize() == 128 && "GPRs should hold up to 128-bit"); 75 76 // The FPR register bank is fully defined by all the registers in 77 // GR64all + its subclasses. 78 assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) && 79 "Subclass not added?"); 80 assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) && 81 "Subclass not added?"); 82 assert(RBFPR.getSize() == 512 && 83 "FPRs should hold up to 512-bit via QQQQ sequence"); 84 85 assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) && 86 "Class not added?"); 87 assert(RBCCR.getSize() == 32 && "CCR should hold up to 32-bit"); 88 89 // Check that the TableGen'ed like file is in sync we our expectations. 90 // First, the Idx. 91 assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR, 92 {PMI_GPR32, PMI_GPR64, PMI_GPR128}) && 93 "PartialMappingIdx's are incorrectly ordered"); 94 assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR, 95 {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128, 96 PMI_FPR256, PMI_FPR512}) && 97 "PartialMappingIdx's are incorrectly ordered"); 98 // Now, the content. 99 // Check partial mapping. 100 #define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB) \ 101 do { \ 102 assert( \ 103 checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \ 104 #Idx " is incorrectly initialized"); \ 105 } while (false) 106 107 CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR); 108 CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR); 109 CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR); 110 CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR); 111 CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR); 112 CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR); 113 CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR); 114 CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR); 115 CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR); 116 117 // Check value mapping. 118 #define CHECK_VALUEMAP_IMPL(RBName, Size, Offset) \ 119 do { \ 120 assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size, \ 121 PartialMappingIdx::PMI_First##RBName, Size, \ 122 Offset) && \ 123 #RBName #Size " " #Offset " is incorrectly initialized"); \ 124 } while (false) 125 126 #define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0) 127 128 CHECK_VALUEMAP(GPR, 32); 129 CHECK_VALUEMAP(GPR, 64); 130 CHECK_VALUEMAP(GPR, 128); 131 CHECK_VALUEMAP(FPR, 16); 132 CHECK_VALUEMAP(FPR, 32); 133 CHECK_VALUEMAP(FPR, 64); 134 CHECK_VALUEMAP(FPR, 128); 135 CHECK_VALUEMAP(FPR, 256); 136 CHECK_VALUEMAP(FPR, 512); 137 138 // Check the value mapping for 3-operands instructions where all the operands 139 // map to the same value mapping. 140 #define CHECK_VALUEMAP_3OPS(RBName, Size) \ 141 do { \ 142 CHECK_VALUEMAP_IMPL(RBName, Size, 0); \ 143 CHECK_VALUEMAP_IMPL(RBName, Size, 1); \ 144 CHECK_VALUEMAP_IMPL(RBName, Size, 2); \ 145 } while (false) 146 147 CHECK_VALUEMAP_3OPS(GPR, 32); 148 CHECK_VALUEMAP_3OPS(GPR, 64); 149 CHECK_VALUEMAP_3OPS(GPR, 128); 150 CHECK_VALUEMAP_3OPS(FPR, 32); 151 CHECK_VALUEMAP_3OPS(FPR, 64); 152 CHECK_VALUEMAP_3OPS(FPR, 128); 153 CHECK_VALUEMAP_3OPS(FPR, 256); 154 CHECK_VALUEMAP_3OPS(FPR, 512); 155 156 #define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size) \ 157 do { \ 158 unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min; \ 159 unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \ 160 (void)PartialMapDstIdx; \ 161 (void)PartialMapSrcIdx; \ 162 const ValueMapping *Map = getCopyMapping( \ 163 AArch64::RBNameDst##RegBankID, AArch64::RBNameSrc##RegBankID, Size); \ 164 (void)Map; \ 165 assert(Map[0].BreakDown == \ 166 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \ 167 Map[0].NumBreakDowns == 1 && #RBNameDst #Size \ 168 " Dst is incorrectly initialized"); \ 169 assert(Map[1].BreakDown == \ 170 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \ 171 Map[1].NumBreakDowns == 1 && #RBNameSrc #Size \ 172 " Src is incorrectly initialized"); \ 173 \ 174 } while (false) 175 176 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32); 177 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32); 178 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64); 179 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64); 180 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32); 181 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32); 182 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64); 183 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64); 184 185 #define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize) \ 186 do { \ 187 unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min; \ 188 unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min; \ 189 (void)PartialMapDstIdx; \ 190 (void)PartialMapSrcIdx; \ 191 const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize); \ 192 (void)Map; \ 193 assert(Map[0].BreakDown == \ 194 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \ 195 Map[0].NumBreakDowns == 1 && "FPR" #DstSize \ 196 " Dst is incorrectly initialized"); \ 197 assert(Map[1].BreakDown == \ 198 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \ 199 Map[1].NumBreakDowns == 1 && "FPR" #SrcSize \ 200 " Src is incorrectly initialized"); \ 201 \ 202 } while (false) 203 204 CHECK_VALUEMAP_FPEXT(32, 16); 205 CHECK_VALUEMAP_FPEXT(64, 16); 206 CHECK_VALUEMAP_FPEXT(64, 32); 207 CHECK_VALUEMAP_FPEXT(128, 64); 208 209 assert(verify(TRI) && "Invalid register bank information"); 210 }; 211 212 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce); 213 } 214 215 unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A, 216 const RegisterBank &B, 217 unsigned Size) const { 218 // What do we do with different size? 219 // copy are same size. 220 // Will introduce other hooks for different size: 221 // * extract cost. 222 // * build_sequence cost. 223 224 // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV. 225 // FIXME: This should be deduced from the scheduling model. 226 if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank) 227 // FMOVXDr or FMOVWSr. 228 return 5; 229 if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank) 230 // FMOVDXr or FMOVSWr. 231 return 4; 232 233 return RegisterBankInfo::copyCost(A, B, Size); 234 } 235 236 const RegisterBank & 237 AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, 238 LLT) const { 239 switch (RC.getID()) { 240 case AArch64::FPR8RegClassID: 241 case AArch64::FPR16RegClassID: 242 case AArch64::FPR16_loRegClassID: 243 case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID: 244 case AArch64::FPR32RegClassID: 245 case AArch64::FPR64RegClassID: 246 case AArch64::FPR64_loRegClassID: 247 case AArch64::FPR128RegClassID: 248 case AArch64::FPR128_loRegClassID: 249 case AArch64::DDRegClassID: 250 case AArch64::DDDRegClassID: 251 case AArch64::DDDDRegClassID: 252 case AArch64::QQRegClassID: 253 case AArch64::QQQRegClassID: 254 case AArch64::QQQQRegClassID: 255 return getRegBank(AArch64::FPRRegBankID); 256 case AArch64::GPR32commonRegClassID: 257 case AArch64::GPR32RegClassID: 258 case AArch64::GPR32spRegClassID: 259 case AArch64::GPR32sponlyRegClassID: 260 case AArch64::GPR32argRegClassID: 261 case AArch64::GPR32allRegClassID: 262 case AArch64::GPR64commonRegClassID: 263 case AArch64::GPR64RegClassID: 264 case AArch64::GPR64spRegClassID: 265 case AArch64::GPR64sponlyRegClassID: 266 case AArch64::GPR64argRegClassID: 267 case AArch64::GPR64allRegClassID: 268 case AArch64::GPR64noipRegClassID: 269 case AArch64::GPR64common_and_GPR64noipRegClassID: 270 case AArch64::GPR64noip_and_tcGPR64RegClassID: 271 case AArch64::tcGPR64RegClassID: 272 case AArch64::rtcGPR64RegClassID: 273 case AArch64::WSeqPairsClassRegClassID: 274 case AArch64::XSeqPairsClassRegClassID: 275 case AArch64::MatrixIndexGPR32_8_11RegClassID: 276 case AArch64::MatrixIndexGPR32_12_15RegClassID: 277 case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_8_11RegClassID: 278 case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_12_15RegClassID: 279 return getRegBank(AArch64::GPRRegBankID); 280 case AArch64::CCRRegClassID: 281 return getRegBank(AArch64::CCRegBankID); 282 default: 283 llvm_unreachable("Register class not supported"); 284 } 285 } 286 287 RegisterBankInfo::InstructionMappings 288 AArch64RegisterBankInfo::getInstrAlternativeMappings( 289 const MachineInstr &MI) const { 290 const MachineFunction &MF = *MI.getParent()->getParent(); 291 const TargetSubtargetInfo &STI = MF.getSubtarget(); 292 const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 293 const MachineRegisterInfo &MRI = MF.getRegInfo(); 294 295 switch (MI.getOpcode()) { 296 case TargetOpcode::G_OR: { 297 // 32 and 64-bit or can be mapped on either FPR or 298 // GPR for the same cost. 299 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); 300 if (Size != 32 && Size != 64) 301 break; 302 303 // If the instruction has any implicit-defs or uses, 304 // do not mess with it. 305 if (MI.getNumOperands() != 3) 306 break; 307 InstructionMappings AltMappings; 308 const InstructionMapping &GPRMapping = getInstructionMapping( 309 /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size), 310 /*NumOperands*/ 3); 311 const InstructionMapping &FPRMapping = getInstructionMapping( 312 /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size), 313 /*NumOperands*/ 3); 314 315 AltMappings.push_back(&GPRMapping); 316 AltMappings.push_back(&FPRMapping); 317 return AltMappings; 318 } 319 case TargetOpcode::G_BITCAST: { 320 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); 321 if (Size != 32 && Size != 64) 322 break; 323 324 // If the instruction has any implicit-defs or uses, 325 // do not mess with it. 326 if (MI.getNumOperands() != 2) 327 break; 328 329 InstructionMappings AltMappings; 330 const InstructionMapping &GPRMapping = getInstructionMapping( 331 /*ID*/ 1, /*Cost*/ 1, 332 getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size), 333 /*NumOperands*/ 2); 334 const InstructionMapping &FPRMapping = getInstructionMapping( 335 /*ID*/ 2, /*Cost*/ 1, 336 getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size), 337 /*NumOperands*/ 2); 338 const InstructionMapping &GPRToFPRMapping = getInstructionMapping( 339 /*ID*/ 3, 340 /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size), 341 getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size), 342 /*NumOperands*/ 2); 343 const InstructionMapping &FPRToGPRMapping = getInstructionMapping( 344 /*ID*/ 3, 345 /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size), 346 getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size), 347 /*NumOperands*/ 2); 348 349 AltMappings.push_back(&GPRMapping); 350 AltMappings.push_back(&FPRMapping); 351 AltMappings.push_back(&GPRToFPRMapping); 352 AltMappings.push_back(&FPRToGPRMapping); 353 return AltMappings; 354 } 355 case TargetOpcode::G_LOAD: { 356 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); 357 if (Size != 64) 358 break; 359 360 // If the instruction has any implicit-defs or uses, 361 // do not mess with it. 362 if (MI.getNumOperands() != 2) 363 break; 364 365 InstructionMappings AltMappings; 366 const InstructionMapping &GPRMapping = getInstructionMapping( 367 /*ID*/ 1, /*Cost*/ 1, 368 getOperandsMapping({getValueMapping(PMI_FirstGPR, Size), 369 // Addresses are GPR 64-bit. 370 getValueMapping(PMI_FirstGPR, 64)}), 371 /*NumOperands*/ 2); 372 const InstructionMapping &FPRMapping = getInstructionMapping( 373 /*ID*/ 2, /*Cost*/ 1, 374 getOperandsMapping({getValueMapping(PMI_FirstFPR, Size), 375 // Addresses are GPR 64-bit. 376 getValueMapping(PMI_FirstGPR, 64)}), 377 /*NumOperands*/ 2); 378 379 AltMappings.push_back(&GPRMapping); 380 AltMappings.push_back(&FPRMapping); 381 return AltMappings; 382 } 383 default: 384 break; 385 } 386 return RegisterBankInfo::getInstrAlternativeMappings(MI); 387 } 388 389 void AArch64RegisterBankInfo::applyMappingImpl( 390 const OperandsMapper &OpdMapper) const { 391 switch (OpdMapper.getMI().getOpcode()) { 392 case TargetOpcode::G_OR: 393 case TargetOpcode::G_BITCAST: 394 case TargetOpcode::G_LOAD: 395 // Those ID must match getInstrAlternativeMappings. 396 assert((OpdMapper.getInstrMapping().getID() >= 1 && 397 OpdMapper.getInstrMapping().getID() <= 4) && 398 "Don't know how to handle that ID"); 399 return applyDefaultMapping(OpdMapper); 400 default: 401 llvm_unreachable("Don't know how to handle that operation"); 402 } 403 } 404 405 /// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode, 406 /// having only floating-point operands. 407 static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) { 408 switch (Opc) { 409 case TargetOpcode::G_FADD: 410 case TargetOpcode::G_FSUB: 411 case TargetOpcode::G_FMUL: 412 case TargetOpcode::G_FMA: 413 case TargetOpcode::G_FDIV: 414 case TargetOpcode::G_FCONSTANT: 415 case TargetOpcode::G_FPEXT: 416 case TargetOpcode::G_FPTRUNC: 417 case TargetOpcode::G_FCEIL: 418 case TargetOpcode::G_FFLOOR: 419 case TargetOpcode::G_FNEARBYINT: 420 case TargetOpcode::G_FNEG: 421 case TargetOpcode::G_FCOS: 422 case TargetOpcode::G_FSIN: 423 case TargetOpcode::G_FLOG10: 424 case TargetOpcode::G_FLOG: 425 case TargetOpcode::G_FLOG2: 426 case TargetOpcode::G_FSQRT: 427 case TargetOpcode::G_FABS: 428 case TargetOpcode::G_FEXP: 429 case TargetOpcode::G_FRINT: 430 case TargetOpcode::G_INTRINSIC_TRUNC: 431 case TargetOpcode::G_INTRINSIC_ROUND: 432 case TargetOpcode::G_FMAXNUM: 433 case TargetOpcode::G_FMINNUM: 434 case TargetOpcode::G_FMAXIMUM: 435 case TargetOpcode::G_FMINIMUM: 436 return true; 437 } 438 return false; 439 } 440 441 const RegisterBankInfo::InstructionMapping & 442 AArch64RegisterBankInfo::getSameKindOfOperandsMapping( 443 const MachineInstr &MI) const { 444 const unsigned Opc = MI.getOpcode(); 445 const MachineFunction &MF = *MI.getParent()->getParent(); 446 const MachineRegisterInfo &MRI = MF.getRegInfo(); 447 448 unsigned NumOperands = MI.getNumOperands(); 449 assert(NumOperands <= 3 && 450 "This code is for instructions with 3 or less operands"); 451 452 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 453 unsigned Size = Ty.getSizeInBits(); 454 bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc); 455 456 PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR; 457 458 #ifndef NDEBUG 459 // Make sure all the operands are using similar size and type. 460 // Should probably be checked by the machine verifier. 461 // This code won't catch cases where the number of lanes is 462 // different between the operands. 463 // If we want to go to that level of details, it is probably 464 // best to check that the types are the same, period. 465 // Currently, we just check that the register banks are the same 466 // for each types. 467 for (unsigned Idx = 1; Idx != NumOperands; ++Idx) { 468 LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg()); 469 assert( 470 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset( 471 RBIdx, OpTy.getSizeInBits()) == 472 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) && 473 "Operand has incompatible size"); 474 bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc); 475 (void)OpIsFPR; 476 assert(IsFPR == OpIsFPR && "Operand has incompatible type"); 477 } 478 #endif // End NDEBUG. 479 480 return getInstructionMapping(DefaultMappingID, 1, 481 getValueMapping(RBIdx, Size), NumOperands); 482 } 483 484 /// \returns true if a given intrinsic \p ID only uses and defines FPRs. 485 static bool isFPIntrinsic(unsigned ID) { 486 // TODO: Add more intrinsics. 487 switch (ID) { 488 default: 489 return false; 490 case Intrinsic::aarch64_neon_uaddlv: 491 return true; 492 } 493 } 494 495 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI, 496 const MachineRegisterInfo &MRI, 497 const TargetRegisterInfo &TRI, 498 unsigned Depth) const { 499 unsigned Op = MI.getOpcode(); 500 if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MI.getIntrinsicID())) 501 return true; 502 503 // Do we have an explicit floating point instruction? 504 if (isPreISelGenericFloatingPointOpcode(Op)) 505 return true; 506 507 // No. Check if we have a copy-like instruction. If we do, then we could 508 // still be fed by floating point instructions. 509 if (Op != TargetOpcode::COPY && !MI.isPHI() && 510 !isPreISelGenericOptimizationHint(Op)) 511 return false; 512 513 // Check if we already know the register bank. 514 auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI); 515 if (RB == &AArch64::FPRRegBank) 516 return true; 517 if (RB == &AArch64::GPRRegBank) 518 return false; 519 520 // We don't know anything. 521 // 522 // If we have a phi, we may be able to infer that it will be assigned a FPR 523 // based off of its inputs. 524 if (!MI.isPHI() || Depth > MaxFPRSearchDepth) 525 return false; 526 527 return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) { 528 return Op.isReg() && 529 onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1); 530 }); 531 } 532 533 bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, 534 const MachineRegisterInfo &MRI, 535 const TargetRegisterInfo &TRI, 536 unsigned Depth) const { 537 switch (MI.getOpcode()) { 538 case TargetOpcode::G_FPTOSI: 539 case TargetOpcode::G_FPTOUI: 540 case TargetOpcode::G_FCMP: 541 case TargetOpcode::G_LROUND: 542 case TargetOpcode::G_LLROUND: 543 return true; 544 default: 545 break; 546 } 547 return hasFPConstraints(MI, MRI, TRI, Depth); 548 } 549 550 bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, 551 const MachineRegisterInfo &MRI, 552 const TargetRegisterInfo &TRI, 553 unsigned Depth) const { 554 switch (MI.getOpcode()) { 555 case AArch64::G_DUP: 556 case TargetOpcode::G_SITOFP: 557 case TargetOpcode::G_UITOFP: 558 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 559 case TargetOpcode::G_INSERT_VECTOR_ELT: 560 case TargetOpcode::G_BUILD_VECTOR: 561 case TargetOpcode::G_BUILD_VECTOR_TRUNC: 562 return true; 563 default: 564 break; 565 } 566 return hasFPConstraints(MI, MRI, TRI, Depth); 567 } 568 569 const RegisterBankInfo::InstructionMapping & 570 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { 571 const unsigned Opc = MI.getOpcode(); 572 573 // Try the default logic for non-generic instructions that are either copies 574 // or already have some operands assigned to banks. 575 if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) || 576 Opc == TargetOpcode::G_PHI) { 577 const RegisterBankInfo::InstructionMapping &Mapping = 578 getInstrMappingImpl(MI); 579 if (Mapping.isValid()) 580 return Mapping; 581 } 582 583 const MachineFunction &MF = *MI.getParent()->getParent(); 584 const MachineRegisterInfo &MRI = MF.getRegInfo(); 585 const TargetSubtargetInfo &STI = MF.getSubtarget(); 586 const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 587 588 switch (Opc) { 589 // G_{F|S|U}REM are not listed because they are not legal. 590 // Arithmetic ops. 591 case TargetOpcode::G_ADD: 592 case TargetOpcode::G_SUB: 593 case TargetOpcode::G_PTR_ADD: 594 case TargetOpcode::G_MUL: 595 case TargetOpcode::G_SDIV: 596 case TargetOpcode::G_UDIV: 597 // Bitwise ops. 598 case TargetOpcode::G_AND: 599 case TargetOpcode::G_OR: 600 case TargetOpcode::G_XOR: 601 // Floating point ops. 602 case TargetOpcode::G_FADD: 603 case TargetOpcode::G_FSUB: 604 case TargetOpcode::G_FMUL: 605 case TargetOpcode::G_FDIV: 606 case TargetOpcode::G_FMAXIMUM: 607 case TargetOpcode::G_FMINIMUM: 608 return getSameKindOfOperandsMapping(MI); 609 case TargetOpcode::G_FPEXT: { 610 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 611 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 612 return getInstructionMapping( 613 DefaultMappingID, /*Cost*/ 1, 614 getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()), 615 /*NumOperands*/ 2); 616 } 617 // Shifts. 618 case TargetOpcode::G_SHL: 619 case TargetOpcode::G_LSHR: 620 case TargetOpcode::G_ASHR: { 621 LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg()); 622 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 623 if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32) 624 return getInstructionMapping(DefaultMappingID, 1, 625 &ValMappings[Shift64Imm], 3); 626 return getSameKindOfOperandsMapping(MI); 627 } 628 case TargetOpcode::COPY: { 629 Register DstReg = MI.getOperand(0).getReg(); 630 Register SrcReg = MI.getOperand(1).getReg(); 631 // Check if one of the register is not a generic register. 632 if ((DstReg.isPhysical() || !MRI.getType(DstReg).isValid()) || 633 (SrcReg.isPhysical() || !MRI.getType(SrcReg).isValid())) { 634 const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI); 635 const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI); 636 if (!DstRB) 637 DstRB = SrcRB; 638 else if (!SrcRB) 639 SrcRB = DstRB; 640 // If both RB are null that means both registers are generic. 641 // We shouldn't be here. 642 assert(DstRB && SrcRB && "Both RegBank were nullptr"); 643 unsigned Size = getSizeInBits(DstReg, MRI, TRI); 644 return getInstructionMapping( 645 DefaultMappingID, copyCost(*DstRB, *SrcRB, Size), 646 getCopyMapping(DstRB->getID(), SrcRB->getID(), Size), 647 // We only care about the mapping of the destination. 648 /*NumOperands*/ 1); 649 } 650 // Both registers are generic, use G_BITCAST. 651 [[fallthrough]]; 652 } 653 case TargetOpcode::G_BITCAST: { 654 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 655 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 656 unsigned Size = DstTy.getSizeInBits(); 657 bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64; 658 bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64; 659 const RegisterBank &DstRB = 660 DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; 661 const RegisterBank &SrcRB = 662 SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; 663 return getInstructionMapping( 664 DefaultMappingID, copyCost(DstRB, SrcRB, Size), 665 getCopyMapping(DstRB.getID(), SrcRB.getID(), Size), 666 // We only care about the mapping of the destination for COPY. 667 /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1); 668 } 669 default: 670 break; 671 } 672 673 unsigned NumOperands = MI.getNumOperands(); 674 675 // Track the size and bank of each register. We don't do partial mappings. 676 SmallVector<unsigned, 4> OpSize(NumOperands); 677 SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands); 678 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { 679 auto &MO = MI.getOperand(Idx); 680 if (!MO.isReg() || !MO.getReg()) 681 continue; 682 683 LLT Ty = MRI.getType(MO.getReg()); 684 if (!Ty.isValid()) 685 continue; 686 OpSize[Idx] = Ty.getSizeInBits(); 687 688 // As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs. 689 // For floating-point instructions, scalars go in FPRs. 690 if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc) || 691 Ty.getSizeInBits() > 64) 692 OpRegBankIdx[Idx] = PMI_FirstFPR; 693 else 694 OpRegBankIdx[Idx] = PMI_FirstGPR; 695 } 696 697 unsigned Cost = 1; 698 // Some of the floating-point instructions have mixed GPR and FPR operands: 699 // fine-tune the computed mapping. 700 switch (Opc) { 701 case AArch64::G_DUP: { 702 Register ScalarReg = MI.getOperand(1).getReg(); 703 LLT ScalarTy = MRI.getType(ScalarReg); 704 auto ScalarDef = MRI.getVRegDef(ScalarReg); 705 // s8 is an exception for G_DUP, which we always want on gpr. 706 if (ScalarTy.getSizeInBits() != 8 && 707 (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank || 708 onlyDefinesFP(*ScalarDef, MRI, TRI))) 709 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 710 else 711 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; 712 break; 713 } 714 case TargetOpcode::G_TRUNC: { 715 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 716 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) 717 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 718 break; 719 } 720 case TargetOpcode::G_SITOFP: 721 case TargetOpcode::G_UITOFP: { 722 if (MRI.getType(MI.getOperand(0).getReg()).isVector()) 723 break; 724 // Integer to FP conversions don't necessarily happen between GPR -> FPR 725 // regbanks. They can also be done within an FPR register. 726 Register SrcReg = MI.getOperand(1).getReg(); 727 if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank) 728 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 729 else 730 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; 731 break; 732 } 733 case TargetOpcode::G_FPTOSI: 734 case TargetOpcode::G_FPTOUI: 735 if (MRI.getType(MI.getOperand(0).getReg()).isVector()) 736 break; 737 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; 738 break; 739 case TargetOpcode::G_FCMP: { 740 // If the result is a vector, it must use a FPR. 741 AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 = 742 MRI.getType(MI.getOperand(0).getReg()).isVector() ? PMI_FirstFPR 743 : PMI_FirstGPR; 744 OpRegBankIdx = {Idx0, 745 /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR}; 746 break; 747 } 748 case TargetOpcode::G_BITCAST: 749 // This is going to be a cross register bank copy and this is expensive. 750 if (OpRegBankIdx[0] != OpRegBankIdx[1]) 751 Cost = copyCost( 752 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank, 753 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank, 754 OpSize[0]); 755 break; 756 case TargetOpcode::G_LOAD: 757 // Loading in vector unit is slightly more expensive. 758 // This is actually only true for the LD1R and co instructions, 759 // but anyway for the fast mode this number does not matter and 760 // for the greedy mode the cost of the cross bank copy will 761 // offset this number. 762 // FIXME: Should be derived from the scheduling model. 763 if (OpRegBankIdx[0] != PMI_FirstGPR) { 764 Cost = 2; 765 break; 766 } 767 768 if (cast<GLoad>(MI).isAtomic()) { 769 // Atomics always use GPR destinations. Don't refine any further. 770 OpRegBankIdx[0] = PMI_FirstGPR; 771 break; 772 } 773 774 // Check if that load feeds fp instructions. 775 // In that case, we want the default mapping to be on FPR 776 // instead of blind map every scalar to GPR. 777 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 778 [&](const MachineInstr &UseMI) { 779 // If we have at least one direct use in a FP instruction, 780 // assume this was a floating point load in the IR. If it was 781 // not, we would have had a bitcast before reaching that 782 // instruction. 783 // 784 // Int->FP conversion operations are also captured in 785 // onlyDefinesFP(). 786 return onlyUsesFP(UseMI, MRI, TRI) || 787 onlyDefinesFP(UseMI, MRI, TRI); 788 })) 789 OpRegBankIdx[0] = PMI_FirstFPR; 790 break; 791 case TargetOpcode::G_STORE: 792 // Check if that store is fed by fp instructions. 793 if (OpRegBankIdx[0] == PMI_FirstGPR) { 794 Register VReg = MI.getOperand(0).getReg(); 795 if (!VReg) 796 break; 797 MachineInstr *DefMI = MRI.getVRegDef(VReg); 798 if (onlyDefinesFP(*DefMI, MRI, TRI)) 799 OpRegBankIdx[0] = PMI_FirstFPR; 800 break; 801 } 802 break; 803 case TargetOpcode::G_SELECT: { 804 // If the destination is FPR, preserve that. 805 if (OpRegBankIdx[0] != PMI_FirstGPR) 806 break; 807 808 // If we're taking in vectors, we have no choice but to put everything on 809 // FPRs, except for the condition. The condition must always be on a GPR. 810 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg()); 811 if (SrcTy.isVector()) { 812 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR}; 813 break; 814 } 815 816 // Try to minimize the number of copies. If we have more floating point 817 // constrained values than not, then we'll put everything on FPR. Otherwise, 818 // everything has to be on GPR. 819 unsigned NumFP = 0; 820 821 // Check if the uses of the result always produce floating point values. 822 // 823 // For example: 824 // 825 // %z = G_SELECT %cond %x %y 826 // fpr = G_FOO %z ... 827 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 828 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) 829 ++NumFP; 830 831 // Check if the defs of the source values always produce floating point 832 // values. 833 // 834 // For example: 835 // 836 // %x = G_SOMETHING_ALWAYS_FLOAT %a ... 837 // %z = G_SELECT %cond %x %y 838 // 839 // Also check whether or not the sources have already been decided to be 840 // FPR. Keep track of this. 841 // 842 // This doesn't check the condition, since it's just whatever is in NZCV. 843 // This isn't passed explicitly in a register to fcsel/csel. 844 for (unsigned Idx = 2; Idx < 4; ++Idx) { 845 Register VReg = MI.getOperand(Idx).getReg(); 846 MachineInstr *DefMI = MRI.getVRegDef(VReg); 847 if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank || 848 onlyDefinesFP(*DefMI, MRI, TRI)) 849 ++NumFP; 850 } 851 852 // If we have more FP constraints than not, then move everything over to 853 // FPR. 854 if (NumFP >= 2) 855 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR}; 856 857 break; 858 } 859 case TargetOpcode::G_UNMERGE_VALUES: { 860 // If the first operand belongs to a FPR register bank, then make sure that 861 // we preserve that. 862 if (OpRegBankIdx[0] != PMI_FirstGPR) 863 break; 864 865 LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg()); 866 // UNMERGE into scalars from a vector should always use FPR. 867 // Likewise if any of the uses are FP instructions. 868 if (SrcTy.isVector() || SrcTy == LLT::scalar(128) || 869 any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 870 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) { 871 // Set the register bank of every operand to FPR. 872 for (unsigned Idx = 0, NumOperands = MI.getNumOperands(); 873 Idx < NumOperands; ++Idx) 874 OpRegBankIdx[Idx] = PMI_FirstFPR; 875 } 876 break; 877 } 878 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 879 // Destination and source need to be FPRs. 880 OpRegBankIdx[0] = PMI_FirstFPR; 881 OpRegBankIdx[1] = PMI_FirstFPR; 882 883 // Index needs to be a GPR. 884 OpRegBankIdx[2] = PMI_FirstGPR; 885 break; 886 case TargetOpcode::G_INSERT_VECTOR_ELT: 887 OpRegBankIdx[0] = PMI_FirstFPR; 888 OpRegBankIdx[1] = PMI_FirstFPR; 889 890 // The element may be either a GPR or FPR. Preserve that behaviour. 891 if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank) 892 OpRegBankIdx[2] = PMI_FirstFPR; 893 else 894 OpRegBankIdx[2] = PMI_FirstGPR; 895 896 // Index needs to be a GPR. 897 OpRegBankIdx[3] = PMI_FirstGPR; 898 break; 899 case TargetOpcode::G_EXTRACT: { 900 // For s128 sources we have to use fpr unless we know otherwise. 901 auto Src = MI.getOperand(1).getReg(); 902 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 903 if (SrcTy.getSizeInBits() != 128) 904 break; 905 auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass 906 ? PMI_FirstGPR 907 : PMI_FirstFPR; 908 OpRegBankIdx[0] = Idx; 909 OpRegBankIdx[1] = Idx; 910 break; 911 } 912 case TargetOpcode::G_BUILD_VECTOR: { 913 // If the first source operand belongs to a FPR register bank, then make 914 // sure that we preserve that. 915 if (OpRegBankIdx[1] != PMI_FirstGPR) 916 break; 917 Register VReg = MI.getOperand(1).getReg(); 918 if (!VReg) 919 break; 920 921 // Get the instruction that defined the source operand reg, and check if 922 // it's a floating point operation. Or, if it's a type like s16 which 923 // doesn't have a exact size gpr register class. The exception is if the 924 // build_vector has all constant operands, which may be better to leave as 925 // gpr without copies, so it can be matched in imported patterns. 926 MachineInstr *DefMI = MRI.getVRegDef(VReg); 927 unsigned DefOpc = DefMI->getOpcode(); 928 const LLT SrcTy = MRI.getType(VReg); 929 if (all_of(MI.operands(), [&](const MachineOperand &Op) { 930 return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() == 931 TargetOpcode::G_CONSTANT; 932 })) 933 break; 934 if (isPreISelGenericFloatingPointOpcode(DefOpc) || 935 SrcTy.getSizeInBits() < 32 || 936 getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) { 937 // Have a floating point op. 938 // Make sure every operand gets mapped to a FPR register class. 939 unsigned NumOperands = MI.getNumOperands(); 940 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) 941 OpRegBankIdx[Idx] = PMI_FirstFPR; 942 } 943 break; 944 } 945 case TargetOpcode::G_VECREDUCE_FADD: 946 case TargetOpcode::G_VECREDUCE_FMUL: 947 case TargetOpcode::G_VECREDUCE_FMAX: 948 case TargetOpcode::G_VECREDUCE_FMIN: 949 case TargetOpcode::G_VECREDUCE_ADD: 950 case TargetOpcode::G_VECREDUCE_MUL: 951 case TargetOpcode::G_VECREDUCE_AND: 952 case TargetOpcode::G_VECREDUCE_OR: 953 case TargetOpcode::G_VECREDUCE_XOR: 954 case TargetOpcode::G_VECREDUCE_SMAX: 955 case TargetOpcode::G_VECREDUCE_SMIN: 956 case TargetOpcode::G_VECREDUCE_UMAX: 957 case TargetOpcode::G_VECREDUCE_UMIN: 958 // Reductions produce a scalar value from a vector, the scalar should be on 959 // FPR bank. 960 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 961 break; 962 case TargetOpcode::G_VECREDUCE_SEQ_FADD: 963 case TargetOpcode::G_VECREDUCE_SEQ_FMUL: 964 // These reductions also take a scalar accumulator input. 965 // Assign them FPR for now. 966 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR}; 967 break; 968 case TargetOpcode::G_INTRINSIC: { 969 // Check if we know that the intrinsic has any constraints on its register 970 // banks. If it does, then update the mapping accordingly. 971 unsigned ID = MI.getIntrinsicID(); 972 unsigned Idx = 0; 973 if (!isFPIntrinsic(ID)) 974 break; 975 for (const auto &Op : MI.explicit_operands()) { 976 if (Op.isReg()) 977 OpRegBankIdx[Idx] = PMI_FirstFPR; 978 ++Idx; 979 } 980 break; 981 } 982 case TargetOpcode::G_LROUND: 983 case TargetOpcode::G_LLROUND: { 984 // Source is always floating point and destination is always integer. 985 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; 986 break; 987 } 988 } 989 990 // Finally construct the computed mapping. 991 SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands); 992 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { 993 if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) { 994 LLT Ty = MRI.getType(MI.getOperand(Idx).getReg()); 995 if (!Ty.isValid()) 996 continue; 997 auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]); 998 if (!Mapping->isValid()) 999 return getInvalidInstructionMapping(); 1000 1001 OpdsMapping[Idx] = Mapping; 1002 } 1003 } 1004 1005 return getInstructionMapping(DefaultMappingID, Cost, 1006 getOperandsMapping(OpdsMapping), NumOperands); 1007 } 1008