1 //===- AArch64RegisterBankInfo.cpp ----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the RegisterBankInfo class for 10 /// AArch64. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64RegisterBankInfo.h" 15 #include "AArch64InstrInfo.h" 16 #include "AArch64RegisterInfo.h" 17 #include "MCTargetDesc/AArch64MCTargetDesc.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 21 #include "llvm/CodeGen/GlobalISel/RegisterBank.h" 22 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" 23 #include "llvm/CodeGen/GlobalISel/Utils.h" 24 #include "llvm/CodeGen/LowLevelType.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstr.h" 27 #include "llvm/CodeGen/MachineOperand.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/TargetOpcodes.h" 30 #include "llvm/CodeGen/TargetRegisterInfo.h" 31 #include "llvm/CodeGen/TargetSubtargetInfo.h" 32 #include "llvm/IR/IntrinsicsAArch64.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include <algorithm> 35 #include <cassert> 36 37 #define GET_TARGET_REGBANK_IMPL 38 #include "AArch64GenRegisterBank.inc" 39 40 // This file will be TableGen'ed at some point. 41 #include "AArch64GenRegisterBankInfo.def" 42 43 using namespace llvm; 44 45 AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI) 46 : AArch64GenRegisterBankInfo() { 47 static llvm::once_flag InitializeRegisterBankFlag; 48 49 static auto InitializeRegisterBankOnce = [&]() { 50 // We have only one set of register banks, whatever the subtarget 51 // is. Therefore, the initialization of the RegBanks table should be 52 // done only once. Indeed the table of all register banks 53 // (AArch64::RegBanks) is unique in the compiler. At some point, it 54 // will get tablegen'ed and the whole constructor becomes empty. 55 56 const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID); 57 (void)RBGPR; 58 assert(&AArch64::GPRRegBank == &RBGPR && 59 "The order in RegBanks is messed up"); 60 61 const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID); 62 (void)RBFPR; 63 assert(&AArch64::FPRRegBank == &RBFPR && 64 "The order in RegBanks is messed up"); 65 66 const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID); 67 (void)RBCCR; 68 assert(&AArch64::CCRegBank == &RBCCR && 69 "The order in RegBanks is messed up"); 70 71 // The GPR register bank is fully defined by all the registers in 72 // GR64all + its subclasses. 73 assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) && 74 "Subclass not added?"); 75 assert(RBGPR.getSize() == 128 && "GPRs should hold up to 128-bit"); 76 77 // The FPR register bank is fully defined by all the registers in 78 // GR64all + its subclasses. 79 assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) && 80 "Subclass not added?"); 81 assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) && 82 "Subclass not added?"); 83 assert(RBFPR.getSize() == 512 && 84 "FPRs should hold up to 512-bit via QQQQ sequence"); 85 86 assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) && 87 "Class not added?"); 88 assert(RBCCR.getSize() == 32 && "CCR should hold up to 32-bit"); 89 90 // Check that the TableGen'ed like file is in sync we our expectations. 91 // First, the Idx. 92 assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR, 93 {PMI_GPR32, PMI_GPR64, PMI_GPR128}) && 94 "PartialMappingIdx's are incorrectly ordered"); 95 assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR, 96 {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128, 97 PMI_FPR256, PMI_FPR512}) && 98 "PartialMappingIdx's are incorrectly ordered"); 99 // Now, the content. 100 // Check partial mapping. 101 #define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB) \ 102 do { \ 103 assert( \ 104 checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \ 105 #Idx " is incorrectly initialized"); \ 106 } while (false) 107 108 CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR); 109 CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR); 110 CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR); 111 CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR); 112 CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR); 113 CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR); 114 CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR); 115 CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR); 116 CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR); 117 118 // Check value mapping. 119 #define CHECK_VALUEMAP_IMPL(RBName, Size, Offset) \ 120 do { \ 121 assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size, \ 122 PartialMappingIdx::PMI_First##RBName, Size, \ 123 Offset) && \ 124 #RBName #Size " " #Offset " is incorrectly initialized"); \ 125 } while (false) 126 127 #define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0) 128 129 CHECK_VALUEMAP(GPR, 32); 130 CHECK_VALUEMAP(GPR, 64); 131 CHECK_VALUEMAP(GPR, 128); 132 CHECK_VALUEMAP(FPR, 16); 133 CHECK_VALUEMAP(FPR, 32); 134 CHECK_VALUEMAP(FPR, 64); 135 CHECK_VALUEMAP(FPR, 128); 136 CHECK_VALUEMAP(FPR, 256); 137 CHECK_VALUEMAP(FPR, 512); 138 139 // Check the value mapping for 3-operands instructions where all the operands 140 // map to the same value mapping. 141 #define CHECK_VALUEMAP_3OPS(RBName, Size) \ 142 do { \ 143 CHECK_VALUEMAP_IMPL(RBName, Size, 0); \ 144 CHECK_VALUEMAP_IMPL(RBName, Size, 1); \ 145 CHECK_VALUEMAP_IMPL(RBName, Size, 2); \ 146 } while (false) 147 148 CHECK_VALUEMAP_3OPS(GPR, 32); 149 CHECK_VALUEMAP_3OPS(GPR, 64); 150 CHECK_VALUEMAP_3OPS(GPR, 128); 151 CHECK_VALUEMAP_3OPS(FPR, 32); 152 CHECK_VALUEMAP_3OPS(FPR, 64); 153 CHECK_VALUEMAP_3OPS(FPR, 128); 154 CHECK_VALUEMAP_3OPS(FPR, 256); 155 CHECK_VALUEMAP_3OPS(FPR, 512); 156 157 #define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size) \ 158 do { \ 159 unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min; \ 160 unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \ 161 (void)PartialMapDstIdx; \ 162 (void)PartialMapSrcIdx; \ 163 const ValueMapping *Map = getCopyMapping( \ 164 AArch64::RBNameDst##RegBankID, AArch64::RBNameSrc##RegBankID, Size); \ 165 (void)Map; \ 166 assert(Map[0].BreakDown == \ 167 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \ 168 Map[0].NumBreakDowns == 1 && #RBNameDst #Size \ 169 " Dst is incorrectly initialized"); \ 170 assert(Map[1].BreakDown == \ 171 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \ 172 Map[1].NumBreakDowns == 1 && #RBNameSrc #Size \ 173 " Src is incorrectly initialized"); \ 174 \ 175 } while (false) 176 177 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32); 178 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32); 179 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64); 180 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64); 181 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32); 182 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32); 183 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64); 184 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64); 185 186 #define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize) \ 187 do { \ 188 unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min; \ 189 unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min; \ 190 (void)PartialMapDstIdx; \ 191 (void)PartialMapSrcIdx; \ 192 const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize); \ 193 (void)Map; \ 194 assert(Map[0].BreakDown == \ 195 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \ 196 Map[0].NumBreakDowns == 1 && "FPR" #DstSize \ 197 " Dst is incorrectly initialized"); \ 198 assert(Map[1].BreakDown == \ 199 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \ 200 Map[1].NumBreakDowns == 1 && "FPR" #SrcSize \ 201 " Src is incorrectly initialized"); \ 202 \ 203 } while (false) 204 205 CHECK_VALUEMAP_FPEXT(32, 16); 206 CHECK_VALUEMAP_FPEXT(64, 16); 207 CHECK_VALUEMAP_FPEXT(64, 32); 208 CHECK_VALUEMAP_FPEXT(128, 64); 209 210 assert(verify(TRI) && "Invalid register bank information"); 211 }; 212 213 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce); 214 } 215 216 unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A, 217 const RegisterBank &B, 218 unsigned Size) const { 219 // What do we do with different size? 220 // copy are same size. 221 // Will introduce other hooks for different size: 222 // * extract cost. 223 // * build_sequence cost. 224 225 // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV. 226 // FIXME: This should be deduced from the scheduling model. 227 if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank) 228 // FMOVXDr or FMOVWSr. 229 return 5; 230 if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank) 231 // FMOVDXr or FMOVSWr. 232 return 4; 233 234 return RegisterBankInfo::copyCost(A, B, Size); 235 } 236 237 const RegisterBank & 238 AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, 239 LLT) const { 240 switch (RC.getID()) { 241 case AArch64::FPR8RegClassID: 242 case AArch64::FPR16RegClassID: 243 case AArch64::FPR16_loRegClassID: 244 case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID: 245 case AArch64::FPR32RegClassID: 246 case AArch64::FPR64RegClassID: 247 case AArch64::FPR64_loRegClassID: 248 case AArch64::FPR128RegClassID: 249 case AArch64::FPR128_loRegClassID: 250 case AArch64::DDRegClassID: 251 case AArch64::DDDRegClassID: 252 case AArch64::DDDDRegClassID: 253 case AArch64::QQRegClassID: 254 case AArch64::QQQRegClassID: 255 case AArch64::QQQQRegClassID: 256 return getRegBank(AArch64::FPRRegBankID); 257 case AArch64::GPR32commonRegClassID: 258 case AArch64::GPR32RegClassID: 259 case AArch64::GPR32spRegClassID: 260 case AArch64::GPR32sponlyRegClassID: 261 case AArch64::GPR32argRegClassID: 262 case AArch64::GPR32allRegClassID: 263 case AArch64::GPR64commonRegClassID: 264 case AArch64::GPR64RegClassID: 265 case AArch64::GPR64spRegClassID: 266 case AArch64::GPR64sponlyRegClassID: 267 case AArch64::GPR64argRegClassID: 268 case AArch64::GPR64allRegClassID: 269 case AArch64::GPR64noipRegClassID: 270 case AArch64::GPR64common_and_GPR64noipRegClassID: 271 case AArch64::GPR64noip_and_tcGPR64RegClassID: 272 case AArch64::tcGPR64RegClassID: 273 case AArch64::rtcGPR64RegClassID: 274 case AArch64::WSeqPairsClassRegClassID: 275 case AArch64::XSeqPairsClassRegClassID: 276 case AArch64::MatrixIndexGPR32_12_15RegClassID: 277 case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_12_15RegClassID: 278 return getRegBank(AArch64::GPRRegBankID); 279 case AArch64::CCRRegClassID: 280 return getRegBank(AArch64::CCRegBankID); 281 default: 282 llvm_unreachable("Register class not supported"); 283 } 284 } 285 286 RegisterBankInfo::InstructionMappings 287 AArch64RegisterBankInfo::getInstrAlternativeMappings( 288 const MachineInstr &MI) const { 289 const MachineFunction &MF = *MI.getParent()->getParent(); 290 const TargetSubtargetInfo &STI = MF.getSubtarget(); 291 const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 292 const MachineRegisterInfo &MRI = MF.getRegInfo(); 293 294 switch (MI.getOpcode()) { 295 case TargetOpcode::G_OR: { 296 // 32 and 64-bit or can be mapped on either FPR or 297 // GPR for the same cost. 298 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); 299 if (Size != 32 && Size != 64) 300 break; 301 302 // If the instruction has any implicit-defs or uses, 303 // do not mess with it. 304 if (MI.getNumOperands() != 3) 305 break; 306 InstructionMappings AltMappings; 307 const InstructionMapping &GPRMapping = getInstructionMapping( 308 /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size), 309 /*NumOperands*/ 3); 310 const InstructionMapping &FPRMapping = getInstructionMapping( 311 /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size), 312 /*NumOperands*/ 3); 313 314 AltMappings.push_back(&GPRMapping); 315 AltMappings.push_back(&FPRMapping); 316 return AltMappings; 317 } 318 case TargetOpcode::G_BITCAST: { 319 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); 320 if (Size != 32 && Size != 64) 321 break; 322 323 // If the instruction has any implicit-defs or uses, 324 // do not mess with it. 325 if (MI.getNumOperands() != 2) 326 break; 327 328 InstructionMappings AltMappings; 329 const InstructionMapping &GPRMapping = getInstructionMapping( 330 /*ID*/ 1, /*Cost*/ 1, 331 getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size), 332 /*NumOperands*/ 2); 333 const InstructionMapping &FPRMapping = getInstructionMapping( 334 /*ID*/ 2, /*Cost*/ 1, 335 getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size), 336 /*NumOperands*/ 2); 337 const InstructionMapping &GPRToFPRMapping = getInstructionMapping( 338 /*ID*/ 3, 339 /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size), 340 getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size), 341 /*NumOperands*/ 2); 342 const InstructionMapping &FPRToGPRMapping = getInstructionMapping( 343 /*ID*/ 3, 344 /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size), 345 getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size), 346 /*NumOperands*/ 2); 347 348 AltMappings.push_back(&GPRMapping); 349 AltMappings.push_back(&FPRMapping); 350 AltMappings.push_back(&GPRToFPRMapping); 351 AltMappings.push_back(&FPRToGPRMapping); 352 return AltMappings; 353 } 354 case TargetOpcode::G_LOAD: { 355 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); 356 if (Size != 64) 357 break; 358 359 // If the instruction has any implicit-defs or uses, 360 // do not mess with it. 361 if (MI.getNumOperands() != 2) 362 break; 363 364 InstructionMappings AltMappings; 365 const InstructionMapping &GPRMapping = getInstructionMapping( 366 /*ID*/ 1, /*Cost*/ 1, 367 getOperandsMapping({getValueMapping(PMI_FirstGPR, Size), 368 // Addresses are GPR 64-bit. 369 getValueMapping(PMI_FirstGPR, 64)}), 370 /*NumOperands*/ 2); 371 const InstructionMapping &FPRMapping = getInstructionMapping( 372 /*ID*/ 2, /*Cost*/ 1, 373 getOperandsMapping({getValueMapping(PMI_FirstFPR, Size), 374 // Addresses are GPR 64-bit. 375 getValueMapping(PMI_FirstGPR, 64)}), 376 /*NumOperands*/ 2); 377 378 AltMappings.push_back(&GPRMapping); 379 AltMappings.push_back(&FPRMapping); 380 return AltMappings; 381 } 382 default: 383 break; 384 } 385 return RegisterBankInfo::getInstrAlternativeMappings(MI); 386 } 387 388 void AArch64RegisterBankInfo::applyMappingImpl( 389 const OperandsMapper &OpdMapper) const { 390 switch (OpdMapper.getMI().getOpcode()) { 391 case TargetOpcode::G_OR: 392 case TargetOpcode::G_BITCAST: 393 case TargetOpcode::G_LOAD: 394 // Those ID must match getInstrAlternativeMappings. 395 assert((OpdMapper.getInstrMapping().getID() >= 1 && 396 OpdMapper.getInstrMapping().getID() <= 4) && 397 "Don't know how to handle that ID"); 398 return applyDefaultMapping(OpdMapper); 399 default: 400 llvm_unreachable("Don't know how to handle that operation"); 401 } 402 } 403 404 /// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode, 405 /// having only floating-point operands. 406 static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) { 407 switch (Opc) { 408 case TargetOpcode::G_FADD: 409 case TargetOpcode::G_FSUB: 410 case TargetOpcode::G_FMUL: 411 case TargetOpcode::G_FMA: 412 case TargetOpcode::G_FDIV: 413 case TargetOpcode::G_FCONSTANT: 414 case TargetOpcode::G_FPEXT: 415 case TargetOpcode::G_FPTRUNC: 416 case TargetOpcode::G_FCEIL: 417 case TargetOpcode::G_FFLOOR: 418 case TargetOpcode::G_FNEARBYINT: 419 case TargetOpcode::G_FNEG: 420 case TargetOpcode::G_FCOS: 421 case TargetOpcode::G_FSIN: 422 case TargetOpcode::G_FLOG10: 423 case TargetOpcode::G_FLOG: 424 case TargetOpcode::G_FLOG2: 425 case TargetOpcode::G_FSQRT: 426 case TargetOpcode::G_FABS: 427 case TargetOpcode::G_FEXP: 428 case TargetOpcode::G_FRINT: 429 case TargetOpcode::G_INTRINSIC_TRUNC: 430 case TargetOpcode::G_INTRINSIC_ROUND: 431 case TargetOpcode::G_FMAXNUM: 432 case TargetOpcode::G_FMINNUM: 433 case TargetOpcode::G_FMAXIMUM: 434 case TargetOpcode::G_FMINIMUM: 435 return true; 436 } 437 return false; 438 } 439 440 const RegisterBankInfo::InstructionMapping & 441 AArch64RegisterBankInfo::getSameKindOfOperandsMapping( 442 const MachineInstr &MI) const { 443 const unsigned Opc = MI.getOpcode(); 444 const MachineFunction &MF = *MI.getParent()->getParent(); 445 const MachineRegisterInfo &MRI = MF.getRegInfo(); 446 447 unsigned NumOperands = MI.getNumOperands(); 448 assert(NumOperands <= 3 && 449 "This code is for instructions with 3 or less operands"); 450 451 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 452 unsigned Size = Ty.getSizeInBits(); 453 bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc); 454 455 PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR; 456 457 #ifndef NDEBUG 458 // Make sure all the operands are using similar size and type. 459 // Should probably be checked by the machine verifier. 460 // This code won't catch cases where the number of lanes is 461 // different between the operands. 462 // If we want to go to that level of details, it is probably 463 // best to check that the types are the same, period. 464 // Currently, we just check that the register banks are the same 465 // for each types. 466 for (unsigned Idx = 1; Idx != NumOperands; ++Idx) { 467 LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg()); 468 assert( 469 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset( 470 RBIdx, OpTy.getSizeInBits()) == 471 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) && 472 "Operand has incompatible size"); 473 bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc); 474 (void)OpIsFPR; 475 assert(IsFPR == OpIsFPR && "Operand has incompatible type"); 476 } 477 #endif // End NDEBUG. 478 479 return getInstructionMapping(DefaultMappingID, 1, 480 getValueMapping(RBIdx, Size), NumOperands); 481 } 482 483 /// \returns true if a given intrinsic \p ID only uses and defines FPRs. 484 static bool isFPIntrinsic(unsigned ID) { 485 // TODO: Add more intrinsics. 486 switch (ID) { 487 default: 488 return false; 489 case Intrinsic::aarch64_neon_uaddlv: 490 return true; 491 } 492 } 493 494 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI, 495 const MachineRegisterInfo &MRI, 496 const TargetRegisterInfo &TRI, 497 unsigned Depth) const { 498 unsigned Op = MI.getOpcode(); 499 if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MI.getIntrinsicID())) 500 return true; 501 502 // Do we have an explicit floating point instruction? 503 if (isPreISelGenericFloatingPointOpcode(Op)) 504 return true; 505 506 // No. Check if we have a copy-like instruction. If we do, then we could 507 // still be fed by floating point instructions. 508 if (Op != TargetOpcode::COPY && !MI.isPHI() && 509 !isPreISelGenericOptimizationHint(Op)) 510 return false; 511 512 // Check if we already know the register bank. 513 auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI); 514 if (RB == &AArch64::FPRRegBank) 515 return true; 516 if (RB == &AArch64::GPRRegBank) 517 return false; 518 519 // We don't know anything. 520 // 521 // If we have a phi, we may be able to infer that it will be assigned a FPR 522 // based off of its inputs. 523 if (!MI.isPHI() || Depth > MaxFPRSearchDepth) 524 return false; 525 526 return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) { 527 return Op.isReg() && 528 onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1); 529 }); 530 } 531 532 bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, 533 const MachineRegisterInfo &MRI, 534 const TargetRegisterInfo &TRI, 535 unsigned Depth) const { 536 switch (MI.getOpcode()) { 537 case TargetOpcode::G_FPTOSI: 538 case TargetOpcode::G_FPTOUI: 539 case TargetOpcode::G_FCMP: 540 case TargetOpcode::G_LROUND: 541 case TargetOpcode::G_LLROUND: 542 return true; 543 default: 544 break; 545 } 546 return hasFPConstraints(MI, MRI, TRI, Depth); 547 } 548 549 bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, 550 const MachineRegisterInfo &MRI, 551 const TargetRegisterInfo &TRI, 552 unsigned Depth) const { 553 switch (MI.getOpcode()) { 554 case AArch64::G_DUP: 555 case TargetOpcode::G_SITOFP: 556 case TargetOpcode::G_UITOFP: 557 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 558 case TargetOpcode::G_INSERT_VECTOR_ELT: 559 case TargetOpcode::G_BUILD_VECTOR: 560 case TargetOpcode::G_BUILD_VECTOR_TRUNC: 561 return true; 562 default: 563 break; 564 } 565 return hasFPConstraints(MI, MRI, TRI, Depth); 566 } 567 568 const RegisterBankInfo::InstructionMapping & 569 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { 570 const unsigned Opc = MI.getOpcode(); 571 572 // Try the default logic for non-generic instructions that are either copies 573 // or already have some operands assigned to banks. 574 if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) || 575 Opc == TargetOpcode::G_PHI) { 576 const RegisterBankInfo::InstructionMapping &Mapping = 577 getInstrMappingImpl(MI); 578 if (Mapping.isValid()) 579 return Mapping; 580 } 581 582 const MachineFunction &MF = *MI.getParent()->getParent(); 583 const MachineRegisterInfo &MRI = MF.getRegInfo(); 584 const TargetSubtargetInfo &STI = MF.getSubtarget(); 585 const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 586 587 switch (Opc) { 588 // G_{F|S|U}REM are not listed because they are not legal. 589 // Arithmetic ops. 590 case TargetOpcode::G_ADD: 591 case TargetOpcode::G_SUB: 592 case TargetOpcode::G_PTR_ADD: 593 case TargetOpcode::G_MUL: 594 case TargetOpcode::G_SDIV: 595 case TargetOpcode::G_UDIV: 596 // Bitwise ops. 597 case TargetOpcode::G_AND: 598 case TargetOpcode::G_OR: 599 case TargetOpcode::G_XOR: 600 // Floating point ops. 601 case TargetOpcode::G_FADD: 602 case TargetOpcode::G_FSUB: 603 case TargetOpcode::G_FMUL: 604 case TargetOpcode::G_FDIV: 605 case TargetOpcode::G_FMAXIMUM: 606 case TargetOpcode::G_FMINIMUM: 607 return getSameKindOfOperandsMapping(MI); 608 case TargetOpcode::G_FPEXT: { 609 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 610 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 611 return getInstructionMapping( 612 DefaultMappingID, /*Cost*/ 1, 613 getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()), 614 /*NumOperands*/ 2); 615 } 616 // Shifts. 617 case TargetOpcode::G_SHL: 618 case TargetOpcode::G_LSHR: 619 case TargetOpcode::G_ASHR: { 620 LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg()); 621 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 622 if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32) 623 return getInstructionMapping(DefaultMappingID, 1, 624 &ValMappings[Shift64Imm], 3); 625 return getSameKindOfOperandsMapping(MI); 626 } 627 case TargetOpcode::COPY: { 628 Register DstReg = MI.getOperand(0).getReg(); 629 Register SrcReg = MI.getOperand(1).getReg(); 630 // Check if one of the register is not a generic register. 631 if ((Register::isPhysicalRegister(DstReg) || 632 !MRI.getType(DstReg).isValid()) || 633 (Register::isPhysicalRegister(SrcReg) || 634 !MRI.getType(SrcReg).isValid())) { 635 const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI); 636 const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI); 637 if (!DstRB) 638 DstRB = SrcRB; 639 else if (!SrcRB) 640 SrcRB = DstRB; 641 // If both RB are null that means both registers are generic. 642 // We shouldn't be here. 643 assert(DstRB && SrcRB && "Both RegBank were nullptr"); 644 unsigned Size = getSizeInBits(DstReg, MRI, TRI); 645 return getInstructionMapping( 646 DefaultMappingID, copyCost(*DstRB, *SrcRB, Size), 647 getCopyMapping(DstRB->getID(), SrcRB->getID(), Size), 648 // We only care about the mapping of the destination. 649 /*NumOperands*/ 1); 650 } 651 // Both registers are generic, use G_BITCAST. 652 LLVM_FALLTHROUGH; 653 } 654 case TargetOpcode::G_BITCAST: { 655 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 656 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 657 unsigned Size = DstTy.getSizeInBits(); 658 bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64; 659 bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64; 660 const RegisterBank &DstRB = 661 DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; 662 const RegisterBank &SrcRB = 663 SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; 664 return getInstructionMapping( 665 DefaultMappingID, copyCost(DstRB, SrcRB, Size), 666 getCopyMapping(DstRB.getID(), SrcRB.getID(), Size), 667 // We only care about the mapping of the destination for COPY. 668 /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1); 669 } 670 default: 671 break; 672 } 673 674 unsigned NumOperands = MI.getNumOperands(); 675 676 // Track the size and bank of each register. We don't do partial mappings. 677 SmallVector<unsigned, 4> OpSize(NumOperands); 678 SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands); 679 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { 680 auto &MO = MI.getOperand(Idx); 681 if (!MO.isReg() || !MO.getReg()) 682 continue; 683 684 LLT Ty = MRI.getType(MO.getReg()); 685 OpSize[Idx] = Ty.getSizeInBits(); 686 687 // As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs. 688 // For floating-point instructions, scalars go in FPRs. 689 if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc) || 690 Ty.getSizeInBits() > 64) 691 OpRegBankIdx[Idx] = PMI_FirstFPR; 692 else 693 OpRegBankIdx[Idx] = PMI_FirstGPR; 694 } 695 696 unsigned Cost = 1; 697 // Some of the floating-point instructions have mixed GPR and FPR operands: 698 // fine-tune the computed mapping. 699 switch (Opc) { 700 case AArch64::G_DUP: { 701 Register ScalarReg = MI.getOperand(1).getReg(); 702 LLT ScalarTy = MRI.getType(ScalarReg); 703 auto ScalarDef = MRI.getVRegDef(ScalarReg); 704 // s8 is an exception for G_DUP, which we always want on gpr. 705 if (ScalarTy.getSizeInBits() != 8 && 706 (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank || 707 onlyDefinesFP(*ScalarDef, MRI, TRI))) 708 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 709 else 710 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; 711 break; 712 } 713 case TargetOpcode::G_TRUNC: { 714 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 715 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) 716 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 717 break; 718 } 719 case TargetOpcode::G_SITOFP: 720 case TargetOpcode::G_UITOFP: { 721 if (MRI.getType(MI.getOperand(0).getReg()).isVector()) 722 break; 723 // Integer to FP conversions don't necessarily happen between GPR -> FPR 724 // regbanks. They can also be done within an FPR register. 725 Register SrcReg = MI.getOperand(1).getReg(); 726 if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank) 727 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 728 else 729 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; 730 break; 731 } 732 case TargetOpcode::G_FPTOSI: 733 case TargetOpcode::G_FPTOUI: 734 if (MRI.getType(MI.getOperand(0).getReg()).isVector()) 735 break; 736 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; 737 break; 738 case TargetOpcode::G_FCMP: { 739 // If the result is a vector, it must use a FPR. 740 AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 = 741 MRI.getType(MI.getOperand(0).getReg()).isVector() ? PMI_FirstFPR 742 : PMI_FirstGPR; 743 OpRegBankIdx = {Idx0, 744 /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR}; 745 break; 746 } 747 case TargetOpcode::G_BITCAST: 748 // This is going to be a cross register bank copy and this is expensive. 749 if (OpRegBankIdx[0] != OpRegBankIdx[1]) 750 Cost = copyCost( 751 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank, 752 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank, 753 OpSize[0]); 754 break; 755 case TargetOpcode::G_LOAD: 756 // Loading in vector unit is slightly more expensive. 757 // This is actually only true for the LD1R and co instructions, 758 // but anyway for the fast mode this number does not matter and 759 // for the greedy mode the cost of the cross bank copy will 760 // offset this number. 761 // FIXME: Should be derived from the scheduling model. 762 if (OpRegBankIdx[0] != PMI_FirstGPR) { 763 Cost = 2; 764 break; 765 } 766 767 if (cast<GLoad>(MI).isAtomic()) { 768 // Atomics always use GPR destinations. Don't refine any further. 769 OpRegBankIdx[0] = PMI_FirstGPR; 770 break; 771 } 772 773 // Check if that load feeds fp instructions. 774 // In that case, we want the default mapping to be on FPR 775 // instead of blind map every scalar to GPR. 776 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 777 [&](const MachineInstr &UseMI) { 778 // If we have at least one direct use in a FP instruction, 779 // assume this was a floating point load in the IR. If it was 780 // not, we would have had a bitcast before reaching that 781 // instruction. 782 // 783 // Int->FP conversion operations are also captured in 784 // onlyDefinesFP(). 785 return onlyUsesFP(UseMI, MRI, TRI) || 786 onlyDefinesFP(UseMI, MRI, TRI); 787 })) 788 OpRegBankIdx[0] = PMI_FirstFPR; 789 break; 790 case TargetOpcode::G_STORE: 791 // Check if that store is fed by fp instructions. 792 if (OpRegBankIdx[0] == PMI_FirstGPR) { 793 Register VReg = MI.getOperand(0).getReg(); 794 if (!VReg) 795 break; 796 MachineInstr *DefMI = MRI.getVRegDef(VReg); 797 if (onlyDefinesFP(*DefMI, MRI, TRI)) 798 OpRegBankIdx[0] = PMI_FirstFPR; 799 break; 800 } 801 break; 802 case TargetOpcode::G_SELECT: { 803 // If the destination is FPR, preserve that. 804 if (OpRegBankIdx[0] != PMI_FirstGPR) 805 break; 806 807 // If we're taking in vectors, we have no choice but to put everything on 808 // FPRs, except for the condition. The condition must always be on a GPR. 809 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg()); 810 if (SrcTy.isVector()) { 811 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR}; 812 break; 813 } 814 815 // Try to minimize the number of copies. If we have more floating point 816 // constrained values than not, then we'll put everything on FPR. Otherwise, 817 // everything has to be on GPR. 818 unsigned NumFP = 0; 819 820 // Check if the uses of the result always produce floating point values. 821 // 822 // For example: 823 // 824 // %z = G_SELECT %cond %x %y 825 // fpr = G_FOO %z ... 826 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 827 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) 828 ++NumFP; 829 830 // Check if the defs of the source values always produce floating point 831 // values. 832 // 833 // For example: 834 // 835 // %x = G_SOMETHING_ALWAYS_FLOAT %a ... 836 // %z = G_SELECT %cond %x %y 837 // 838 // Also check whether or not the sources have already been decided to be 839 // FPR. Keep track of this. 840 // 841 // This doesn't check the condition, since it's just whatever is in NZCV. 842 // This isn't passed explicitly in a register to fcsel/csel. 843 for (unsigned Idx = 2; Idx < 4; ++Idx) { 844 Register VReg = MI.getOperand(Idx).getReg(); 845 MachineInstr *DefMI = MRI.getVRegDef(VReg); 846 if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank || 847 onlyDefinesFP(*DefMI, MRI, TRI)) 848 ++NumFP; 849 } 850 851 // If we have more FP constraints than not, then move everything over to 852 // FPR. 853 if (NumFP >= 2) 854 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR}; 855 856 break; 857 } 858 case TargetOpcode::G_UNMERGE_VALUES: { 859 // If the first operand belongs to a FPR register bank, then make sure that 860 // we preserve that. 861 if (OpRegBankIdx[0] != PMI_FirstGPR) 862 break; 863 864 LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg()); 865 // UNMERGE into scalars from a vector should always use FPR. 866 // Likewise if any of the uses are FP instructions. 867 if (SrcTy.isVector() || SrcTy == LLT::scalar(128) || 868 any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 869 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) { 870 // Set the register bank of every operand to FPR. 871 for (unsigned Idx = 0, NumOperands = MI.getNumOperands(); 872 Idx < NumOperands; ++Idx) 873 OpRegBankIdx[Idx] = PMI_FirstFPR; 874 } 875 break; 876 } 877 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 878 // Destination and source need to be FPRs. 879 OpRegBankIdx[0] = PMI_FirstFPR; 880 OpRegBankIdx[1] = PMI_FirstFPR; 881 882 // Index needs to be a GPR. 883 OpRegBankIdx[2] = PMI_FirstGPR; 884 break; 885 case TargetOpcode::G_INSERT_VECTOR_ELT: 886 OpRegBankIdx[0] = PMI_FirstFPR; 887 OpRegBankIdx[1] = PMI_FirstFPR; 888 889 // The element may be either a GPR or FPR. Preserve that behaviour. 890 if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank) 891 OpRegBankIdx[2] = PMI_FirstFPR; 892 else 893 OpRegBankIdx[2] = PMI_FirstGPR; 894 895 // Index needs to be a GPR. 896 OpRegBankIdx[3] = PMI_FirstGPR; 897 break; 898 case TargetOpcode::G_EXTRACT: { 899 // For s128 sources we have to use fpr unless we know otherwise. 900 auto Src = MI.getOperand(1).getReg(); 901 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 902 if (SrcTy.getSizeInBits() != 128) 903 break; 904 auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass 905 ? PMI_FirstGPR 906 : PMI_FirstFPR; 907 OpRegBankIdx[0] = Idx; 908 OpRegBankIdx[1] = Idx; 909 break; 910 } 911 case TargetOpcode::G_BUILD_VECTOR: { 912 // If the first source operand belongs to a FPR register bank, then make 913 // sure that we preserve that. 914 if (OpRegBankIdx[1] != PMI_FirstGPR) 915 break; 916 Register VReg = MI.getOperand(1).getReg(); 917 if (!VReg) 918 break; 919 920 // Get the instruction that defined the source operand reg, and check if 921 // it's a floating point operation. Or, if it's a type like s16 which 922 // doesn't have a exact size gpr register class. The exception is if the 923 // build_vector has all constant operands, which may be better to leave as 924 // gpr without copies, so it can be matched in imported patterns. 925 MachineInstr *DefMI = MRI.getVRegDef(VReg); 926 unsigned DefOpc = DefMI->getOpcode(); 927 const LLT SrcTy = MRI.getType(VReg); 928 if (all_of(MI.operands(), [&](const MachineOperand &Op) { 929 return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() == 930 TargetOpcode::G_CONSTANT; 931 })) 932 break; 933 if (isPreISelGenericFloatingPointOpcode(DefOpc) || 934 SrcTy.getSizeInBits() < 32 || 935 getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) { 936 // Have a floating point op. 937 // Make sure every operand gets mapped to a FPR register class. 938 unsigned NumOperands = MI.getNumOperands(); 939 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) 940 OpRegBankIdx[Idx] = PMI_FirstFPR; 941 } 942 break; 943 } 944 case TargetOpcode::G_VECREDUCE_FADD: 945 case TargetOpcode::G_VECREDUCE_FMUL: 946 case TargetOpcode::G_VECREDUCE_FMAX: 947 case TargetOpcode::G_VECREDUCE_FMIN: 948 case TargetOpcode::G_VECREDUCE_ADD: 949 case TargetOpcode::G_VECREDUCE_MUL: 950 case TargetOpcode::G_VECREDUCE_AND: 951 case TargetOpcode::G_VECREDUCE_OR: 952 case TargetOpcode::G_VECREDUCE_XOR: 953 case TargetOpcode::G_VECREDUCE_SMAX: 954 case TargetOpcode::G_VECREDUCE_SMIN: 955 case TargetOpcode::G_VECREDUCE_UMAX: 956 case TargetOpcode::G_VECREDUCE_UMIN: 957 // Reductions produce a scalar value from a vector, the scalar should be on 958 // FPR bank. 959 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 960 break; 961 case TargetOpcode::G_VECREDUCE_SEQ_FADD: 962 case TargetOpcode::G_VECREDUCE_SEQ_FMUL: 963 // These reductions also take a scalar accumulator input. 964 // Assign them FPR for now. 965 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR}; 966 break; 967 case TargetOpcode::G_INTRINSIC: { 968 // Check if we know that the intrinsic has any constraints on its register 969 // banks. If it does, then update the mapping accordingly. 970 unsigned ID = MI.getIntrinsicID(); 971 unsigned Idx = 0; 972 if (!isFPIntrinsic(ID)) 973 break; 974 for (const auto &Op : MI.explicit_operands()) { 975 if (Op.isReg()) 976 OpRegBankIdx[Idx] = PMI_FirstFPR; 977 ++Idx; 978 } 979 break; 980 } 981 case TargetOpcode::G_LROUND: 982 case TargetOpcode::G_LLROUND: { 983 // Source is always floating point and destination is always integer. 984 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; 985 break; 986 } 987 } 988 989 // Finally construct the computed mapping. 990 SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands); 991 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { 992 if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) { 993 auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]); 994 if (!Mapping->isValid()) 995 return getInvalidInstructionMapping(); 996 997 OpdsMapping[Idx] = Mapping; 998 } 999 } 1000 1001 return getInstructionMapping(DefaultMappingID, Cost, 1002 getOperandsMapping(OpdsMapping), NumOperands); 1003 } 1004