1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the AArch64 target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64MachineFunctionInfo.h" 14 #include "AArch64TargetMachine.h" 15 #include "MCTargetDesc/AArch64AddressingModes.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/CodeGen/SelectionDAGISel.h" 18 #include "llvm/IR/Function.h" // To access function attributes. 19 #include "llvm/IR/GlobalValue.h" 20 #include "llvm/IR/Intrinsics.h" 21 #include "llvm/IR/IntrinsicsAArch64.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/ErrorHandling.h" 24 #include "llvm/Support/KnownBits.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 using namespace llvm; 29 30 #define DEBUG_TYPE "aarch64-isel" 31 32 //===--------------------------------------------------------------------===// 33 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine 34 /// instructions for SelectionDAG operations. 35 /// 36 namespace { 37 38 class AArch64DAGToDAGISel : public SelectionDAGISel { 39 40 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 41 /// make the right decision when generating code for different targets. 42 const AArch64Subtarget *Subtarget; 43 44 public: 45 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, 46 CodeGenOpt::Level OptLevel) 47 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {} 48 49 StringRef getPassName() const override { 50 return "AArch64 Instruction Selection"; 51 } 52 53 bool runOnMachineFunction(MachineFunction &MF) override { 54 Subtarget = &MF.getSubtarget<AArch64Subtarget>(); 55 return SelectionDAGISel::runOnMachineFunction(MF); 56 } 57 58 void Select(SDNode *Node) override; 59 60 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 61 /// inline asm expressions. 62 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 63 unsigned ConstraintID, 64 std::vector<SDValue> &OutOps) override; 65 66 template <signed Low, signed High, signed Scale> 67 bool SelectRDVLImm(SDValue N, SDValue &Imm); 68 69 bool tryMLAV64LaneV128(SDNode *N); 70 bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N); 71 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); 72 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 73 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 74 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 75 return SelectShiftedRegister(N, false, Reg, Shift); 76 } 77 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 78 return SelectShiftedRegister(N, true, Reg, Shift); 79 } 80 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) { 81 return SelectAddrModeIndexed7S(N, 1, Base, OffImm); 82 } 83 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) { 84 return SelectAddrModeIndexed7S(N, 2, Base, OffImm); 85 } 86 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) { 87 return SelectAddrModeIndexed7S(N, 4, Base, OffImm); 88 } 89 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) { 90 return SelectAddrModeIndexed7S(N, 8, Base, OffImm); 91 } 92 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) { 93 return SelectAddrModeIndexed7S(N, 16, Base, OffImm); 94 } 95 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) { 96 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm); 97 } 98 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) { 99 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm); 100 } 101 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { 102 return SelectAddrModeIndexed(N, 1, Base, OffImm); 103 } 104 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { 105 return SelectAddrModeIndexed(N, 2, Base, OffImm); 106 } 107 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { 108 return SelectAddrModeIndexed(N, 4, Base, OffImm); 109 } 110 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { 111 return SelectAddrModeIndexed(N, 8, Base, OffImm); 112 } 113 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { 114 return SelectAddrModeIndexed(N, 16, Base, OffImm); 115 } 116 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { 117 return SelectAddrModeUnscaled(N, 1, Base, OffImm); 118 } 119 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { 120 return SelectAddrModeUnscaled(N, 2, Base, OffImm); 121 } 122 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { 123 return SelectAddrModeUnscaled(N, 4, Base, OffImm); 124 } 125 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { 126 return SelectAddrModeUnscaled(N, 8, Base, OffImm); 127 } 128 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { 129 return SelectAddrModeUnscaled(N, 16, Base, OffImm); 130 } 131 template <unsigned Size, unsigned Max> 132 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) { 133 // Test if there is an appropriate addressing mode and check if the 134 // immediate fits. 135 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm); 136 if (Found) { 137 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) { 138 int64_t C = CI->getSExtValue(); 139 if (C <= Max) 140 return true; 141 } 142 } 143 144 // Otherwise, base only, materialize address in register. 145 Base = N; 146 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 147 return true; 148 } 149 150 template<int Width> 151 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, 152 SDValue &SignExtend, SDValue &DoShift) { 153 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 154 } 155 156 template<int Width> 157 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, 158 SDValue &SignExtend, SDValue &DoShift) { 159 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 160 } 161 162 bool SelectExtractHigh(SDValue N, SDValue &Res) { 163 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST) 164 N = N->getOperand(0); 165 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR || 166 !isa<ConstantSDNode>(N->getOperand(1))) 167 return false; 168 EVT VT = N->getValueType(0); 169 EVT LVT = N->getOperand(0).getValueType(); 170 unsigned Index = N->getConstantOperandVal(1); 171 if (!VT.is64BitVector() || !LVT.is128BitVector() || 172 Index != VT.getVectorNumElements()) 173 return false; 174 Res = N->getOperand(0); 175 return true; 176 } 177 178 bool SelectDupZeroOrUndef(SDValue N) { 179 switch(N->getOpcode()) { 180 case ISD::UNDEF: 181 return true; 182 case AArch64ISD::DUP: 183 case ISD::SPLAT_VECTOR: { 184 auto Opnd0 = N->getOperand(0); 185 if (auto CN = dyn_cast<ConstantSDNode>(Opnd0)) 186 if (CN->isZero()) 187 return true; 188 if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0)) 189 if (CN->isZero()) 190 return true; 191 break; 192 } 193 default: 194 break; 195 } 196 197 return false; 198 } 199 200 bool SelectDupZero(SDValue N) { 201 switch(N->getOpcode()) { 202 case AArch64ISD::DUP: 203 case ISD::SPLAT_VECTOR: { 204 auto Opnd0 = N->getOperand(0); 205 if (auto CN = dyn_cast<ConstantSDNode>(Opnd0)) 206 if (CN->isZero()) 207 return true; 208 if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0)) 209 if (CN->isZero()) 210 return true; 211 break; 212 } 213 } 214 215 return false; 216 } 217 218 template<MVT::SimpleValueType VT> 219 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) { 220 return SelectSVEAddSubImm(N, VT, Imm, Shift); 221 } 222 223 template <MVT::SimpleValueType VT> 224 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) { 225 return SelectSVECpyDupImm(N, VT, Imm, Shift); 226 } 227 228 template <MVT::SimpleValueType VT, bool Invert = false> 229 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) { 230 return SelectSVELogicalImm(N, VT, Imm, Invert); 231 } 232 233 template <MVT::SimpleValueType VT> 234 bool SelectSVEArithImm(SDValue N, SDValue &Imm) { 235 return SelectSVEArithImm(N, VT, Imm); 236 } 237 238 template <unsigned Low, unsigned High, bool AllowSaturation = false> 239 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) { 240 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm); 241 } 242 243 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) { 244 if (N->getOpcode() != ISD::SPLAT_VECTOR) 245 return false; 246 247 EVT EltVT = N->getValueType(0).getVectorElementType(); 248 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1, 249 /* High */ EltVT.getFixedSizeInBits(), 250 /* AllowSaturation */ true, Imm); 251 } 252 253 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 254 template<signed Min, signed Max, signed Scale, bool Shift> 255 bool SelectCntImm(SDValue N, SDValue &Imm) { 256 if (!isa<ConstantSDNode>(N)) 257 return false; 258 259 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 260 if (Shift) 261 MulImm = 1LL << MulImm; 262 263 if ((MulImm % std::abs(Scale)) != 0) 264 return false; 265 266 MulImm /= Scale; 267 if ((MulImm >= Min) && (MulImm <= Max)) { 268 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); 269 return true; 270 } 271 272 return false; 273 } 274 275 template <signed Max, signed Scale> 276 bool SelectEXTImm(SDValue N, SDValue &Imm) { 277 if (!isa<ConstantSDNode>(N)) 278 return false; 279 280 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 281 282 if (MulImm >= 0 && MulImm <= Max) { 283 MulImm *= Scale; 284 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); 285 return true; 286 } 287 288 return false; 289 } 290 291 template <unsigned BaseReg> bool ImmToTile(SDValue N, SDValue &Imm) { 292 if (auto *CI = dyn_cast<ConstantSDNode>(N)) { 293 uint64_t C = CI->getZExtValue(); 294 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other); 295 return true; 296 } 297 return false; 298 } 299 300 /// Form sequences of consecutive 64/128-bit registers for use in NEON 301 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have 302 /// between 1 and 4 elements. If it contains a single element that is returned 303 /// unchanged; otherwise a REG_SEQUENCE value is returned. 304 SDValue createDTuple(ArrayRef<SDValue> Vecs); 305 SDValue createQTuple(ArrayRef<SDValue> Vecs); 306 // Form a sequence of SVE registers for instructions using list of vectors, 307 // e.g. structured loads and stores (ldN, stN). 308 SDValue createZTuple(ArrayRef<SDValue> Vecs); 309 310 /// Generic helper for the createDTuple/createQTuple 311 /// functions. Those should almost always be called instead. 312 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[], 313 const unsigned SubRegs[]); 314 315 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); 316 317 bool tryIndexedLoad(SDNode *N); 318 319 bool trySelectStackSlotTagP(SDNode *N); 320 void SelectTagP(SDNode *N); 321 322 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 323 unsigned SubRegIdx); 324 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 325 unsigned SubRegIdx); 326 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 327 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 328 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, 329 unsigned Opc_rr, unsigned Opc_ri, 330 bool IsIntr = false); 331 332 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); 333 /// SVE Reg+Imm addressing mode. 334 template <int64_t Min, int64_t Max> 335 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base, 336 SDValue &OffImm); 337 /// SVE Reg+Reg address mode. 338 template <unsigned Scale> 339 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) { 340 return SelectSVERegRegAddrMode(N, Scale, Base, Offset); 341 } 342 343 template <unsigned Scale> 344 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) { 345 return SelectSMETileSlice(N, Scale, Vector, Offset); 346 } 347 348 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); 349 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); 350 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 351 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 352 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale, 353 unsigned Opc_rr, unsigned Opc_ri); 354 std::tuple<unsigned, SDValue, SDValue> 355 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri, 356 const SDValue &OldBase, const SDValue &OldOffset, 357 unsigned Scale); 358 359 bool tryBitfieldExtractOp(SDNode *N); 360 bool tryBitfieldExtractOpFromSExt(SDNode *N); 361 bool tryBitfieldInsertOp(SDNode *N); 362 bool tryBitfieldInsertInZeroOp(SDNode *N); 363 bool tryShiftAmountMod(SDNode *N); 364 bool tryHighFPExt(SDNode *N); 365 366 bool tryReadRegister(SDNode *N); 367 bool tryWriteRegister(SDNode *N); 368 369 // Include the pieces autogenerated from the target description. 370 #include "AArch64GenDAGISel.inc" 371 372 private: 373 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, 374 SDValue &Shift); 375 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, 376 SDValue &OffImm) { 377 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm); 378 } 379 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW, 380 unsigned Size, SDValue &Base, 381 SDValue &OffImm); 382 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, 383 SDValue &OffImm); 384 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, 385 SDValue &OffImm); 386 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, 387 SDValue &Offset, SDValue &SignExtend, 388 SDValue &DoShift); 389 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, 390 SDValue &Offset, SDValue &SignExtend, 391 SDValue &DoShift); 392 bool isWorthFolding(SDValue V) const; 393 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, 394 SDValue &Offset, SDValue &SignExtend); 395 396 template<unsigned RegWidth> 397 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { 398 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); 399 } 400 401 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); 402 403 bool SelectCMP_SWAP(SDNode *N); 404 405 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); 406 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); 407 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert); 408 409 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); 410 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High, 411 bool AllowSaturation, SDValue &Imm); 412 413 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm); 414 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, 415 SDValue &Offset); 416 bool SelectSMETileSlice(SDValue N, unsigned Scale, SDValue &Vector, 417 SDValue &Offset); 418 419 bool SelectAllActivePredicate(SDValue N); 420 }; 421 } // end anonymous namespace 422 423 /// isIntImmediate - This method tests to see if the node is a constant 424 /// operand. If so Imm will receive the 32-bit value. 425 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { 426 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { 427 Imm = C->getZExtValue(); 428 return true; 429 } 430 return false; 431 } 432 433 // isIntImmediate - This method tests to see if a constant operand. 434 // If so Imm will receive the value. 435 static bool isIntImmediate(SDValue N, uint64_t &Imm) { 436 return isIntImmediate(N.getNode(), Imm); 437 } 438 439 // isOpcWithIntImmediate - This method tests to see if the node is a specific 440 // opcode and that it has a immediate integer right operand. 441 // If so Imm will receive the 32 bit value. 442 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, 443 uint64_t &Imm) { 444 return N->getOpcode() == Opc && 445 isIntImmediate(N->getOperand(1).getNode(), Imm); 446 } 447 448 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( 449 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 450 switch(ConstraintID) { 451 default: 452 llvm_unreachable("Unexpected asm memory constraint"); 453 case InlineAsm::Constraint_m: 454 case InlineAsm::Constraint_o: 455 case InlineAsm::Constraint_Q: 456 // We need to make sure that this one operand does not end up in XZR, thus 457 // require the address to be in a PointerRegClass register. 458 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); 459 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); 460 SDLoc dl(Op); 461 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); 462 SDValue NewOp = 463 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 464 dl, Op.getValueType(), 465 Op, RC), 0); 466 OutOps.push_back(NewOp); 467 return false; 468 } 469 return true; 470 } 471 472 /// SelectArithImmed - Select an immediate value that can be represented as 473 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 474 /// Val set to the 12-bit value and Shift set to the shifter operand. 475 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, 476 SDValue &Shift) { 477 // This function is called from the addsub_shifted_imm ComplexPattern, 478 // which lists [imm] as the list of opcode it's interested in, however 479 // we still need to check whether the operand is actually an immediate 480 // here because the ComplexPattern opcode list is only used in 481 // root-level opcode matching. 482 if (!isa<ConstantSDNode>(N.getNode())) 483 return false; 484 485 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 486 unsigned ShiftAmt; 487 488 if (Immed >> 12 == 0) { 489 ShiftAmt = 0; 490 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 491 ShiftAmt = 12; 492 Immed = Immed >> 12; 493 } else 494 return false; 495 496 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 497 SDLoc dl(N); 498 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32); 499 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32); 500 return true; 501 } 502 503 /// SelectNegArithImmed - As above, but negates the value before trying to 504 /// select it. 505 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, 506 SDValue &Shift) { 507 // This function is called from the addsub_shifted_imm ComplexPattern, 508 // which lists [imm] as the list of opcode it's interested in, however 509 // we still need to check whether the operand is actually an immediate 510 // here because the ComplexPattern opcode list is only used in 511 // root-level opcode matching. 512 if (!isa<ConstantSDNode>(N.getNode())) 513 return false; 514 515 // The immediate operand must be a 24-bit zero-extended immediate. 516 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 517 518 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 519 // have the opposite effect on the C flag, so this pattern mustn't match under 520 // those circumstances. 521 if (Immed == 0) 522 return false; 523 524 if (N.getValueType() == MVT::i32) 525 Immed = ~((uint32_t)Immed) + 1; 526 else 527 Immed = ~Immed + 1ULL; 528 if (Immed & 0xFFFFFFFFFF000000ULL) 529 return false; 530 531 Immed &= 0xFFFFFFULL; 532 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val, 533 Shift); 534 } 535 536 /// getShiftTypeForNode - Translate a shift node to the corresponding 537 /// ShiftType value. 538 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { 539 switch (N.getOpcode()) { 540 default: 541 return AArch64_AM::InvalidShiftExtend; 542 case ISD::SHL: 543 return AArch64_AM::LSL; 544 case ISD::SRL: 545 return AArch64_AM::LSR; 546 case ISD::SRA: 547 return AArch64_AM::ASR; 548 case ISD::ROTR: 549 return AArch64_AM::ROR; 550 } 551 } 552 553 /// Determine whether it is worth it to fold SHL into the addressing 554 /// mode. 555 static bool isWorthFoldingSHL(SDValue V) { 556 assert(V.getOpcode() == ISD::SHL && "invalid opcode"); 557 // It is worth folding logical shift of up to three places. 558 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1)); 559 if (!CSD) 560 return false; 561 unsigned ShiftVal = CSD->getZExtValue(); 562 if (ShiftVal > 3) 563 return false; 564 565 // Check if this particular node is reused in any non-memory related 566 // operation. If yes, do not try to fold this node into the address 567 // computation, since the computation will be kept. 568 const SDNode *Node = V.getNode(); 569 for (SDNode *UI : Node->uses()) 570 if (!isa<MemSDNode>(*UI)) 571 for (SDNode *UII : UI->uses()) 572 if (!isa<MemSDNode>(*UII)) 573 return false; 574 return true; 575 } 576 577 /// Determine whether it is worth to fold V into an extended register. 578 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { 579 // Trivial if we are optimizing for code size or if there is only 580 // one use of the value. 581 if (CurDAG->shouldOptForSize() || V.hasOneUse()) 582 return true; 583 // If a subtarget has a fastpath LSL we can fold a logical shift into 584 // the addressing mode and save a cycle. 585 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL && 586 isWorthFoldingSHL(V)) 587 return true; 588 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) { 589 const SDValue LHS = V.getOperand(0); 590 const SDValue RHS = V.getOperand(1); 591 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) 592 return true; 593 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS)) 594 return true; 595 } 596 597 // It hurts otherwise, since the value will be reused. 598 return false; 599 } 600 601 /// SelectShiftedRegister - Select a "shifted register" operand. If the value 602 /// is not shifted, set the Shift operand to default of "LSL 0". The logical 603 /// instructions allow the shifted register to be rotated, but the arithmetic 604 /// instructions do not. The AllowROR parameter specifies whether ROR is 605 /// supported. 606 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, 607 SDValue &Reg, SDValue &Shift) { 608 AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); 609 if (ShType == AArch64_AM::InvalidShiftExtend) 610 return false; 611 if (!AllowROR && ShType == AArch64_AM::ROR) 612 return false; 613 614 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 615 unsigned BitSize = N.getValueSizeInBits(); 616 unsigned Val = RHS->getZExtValue() & (BitSize - 1); 617 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); 618 619 Reg = N.getOperand(0); 620 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32); 621 return isWorthFolding(N); 622 } 623 624 return false; 625 } 626 627 /// getExtendTypeForNode - Translate an extend node to the corresponding 628 /// ExtendType value. 629 static AArch64_AM::ShiftExtendType 630 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { 631 if (N.getOpcode() == ISD::SIGN_EXTEND || 632 N.getOpcode() == ISD::SIGN_EXTEND_INREG) { 633 EVT SrcVT; 634 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) 635 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); 636 else 637 SrcVT = N.getOperand(0).getValueType(); 638 639 if (!IsLoadStore && SrcVT == MVT::i8) 640 return AArch64_AM::SXTB; 641 else if (!IsLoadStore && SrcVT == MVT::i16) 642 return AArch64_AM::SXTH; 643 else if (SrcVT == MVT::i32) 644 return AArch64_AM::SXTW; 645 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 646 647 return AArch64_AM::InvalidShiftExtend; 648 } else if (N.getOpcode() == ISD::ZERO_EXTEND || 649 N.getOpcode() == ISD::ANY_EXTEND) { 650 EVT SrcVT = N.getOperand(0).getValueType(); 651 if (!IsLoadStore && SrcVT == MVT::i8) 652 return AArch64_AM::UXTB; 653 else if (!IsLoadStore && SrcVT == MVT::i16) 654 return AArch64_AM::UXTH; 655 else if (SrcVT == MVT::i32) 656 return AArch64_AM::UXTW; 657 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 658 659 return AArch64_AM::InvalidShiftExtend; 660 } else if (N.getOpcode() == ISD::AND) { 661 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 662 if (!CSD) 663 return AArch64_AM::InvalidShiftExtend; 664 uint64_t AndMask = CSD->getZExtValue(); 665 666 switch (AndMask) { 667 default: 668 return AArch64_AM::InvalidShiftExtend; 669 case 0xFF: 670 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 671 case 0xFFFF: 672 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 673 case 0xFFFFFFFF: 674 return AArch64_AM::UXTW; 675 } 676 } 677 678 return AArch64_AM::InvalidShiftExtend; 679 } 680 681 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. 682 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { 683 if (DL->getOpcode() != AArch64ISD::DUPLANE16 && 684 DL->getOpcode() != AArch64ISD::DUPLANE32) 685 return false; 686 687 SDValue SV = DL->getOperand(0); 688 if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) 689 return false; 690 691 SDValue EV = SV.getOperand(1); 692 if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) 693 return false; 694 695 ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode()); 696 ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode()); 697 LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); 698 LaneOp = EV.getOperand(0); 699 700 return true; 701 } 702 703 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a 704 // high lane extract. 705 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, 706 SDValue &LaneOp, int &LaneIdx) { 707 708 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { 709 std::swap(Op0, Op1); 710 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) 711 return false; 712 } 713 StdOp = Op1; 714 return true; 715 } 716 717 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand 718 /// is a lane in the upper half of a 128-bit vector. Recognize and select this 719 /// so that we don't emit unnecessary lane extracts. 720 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) { 721 SDLoc dl(N); 722 SDValue Op0 = N->getOperand(0); 723 SDValue Op1 = N->getOperand(1); 724 SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. 725 SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. 726 int LaneIdx = -1; // Will hold the lane index. 727 728 if (Op1.getOpcode() != ISD::MUL || 729 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 730 LaneIdx)) { 731 std::swap(Op0, Op1); 732 if (Op1.getOpcode() != ISD::MUL || 733 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 734 LaneIdx)) 735 return false; 736 } 737 738 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); 739 740 SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; 741 742 unsigned MLAOpc = ~0U; 743 744 switch (N->getSimpleValueType(0).SimpleTy) { 745 default: 746 llvm_unreachable("Unrecognized MLA."); 747 case MVT::v4i16: 748 MLAOpc = AArch64::MLAv4i16_indexed; 749 break; 750 case MVT::v8i16: 751 MLAOpc = AArch64::MLAv8i16_indexed; 752 break; 753 case MVT::v2i32: 754 MLAOpc = AArch64::MLAv2i32_indexed; 755 break; 756 case MVT::v4i32: 757 MLAOpc = AArch64::MLAv4i32_indexed; 758 break; 759 } 760 761 ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops)); 762 return true; 763 } 764 765 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) { 766 SDLoc dl(N); 767 SDValue SMULLOp0; 768 SDValue SMULLOp1; 769 int LaneIdx; 770 771 if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, 772 LaneIdx)) 773 return false; 774 775 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); 776 777 SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; 778 779 unsigned SMULLOpc = ~0U; 780 781 if (IntNo == Intrinsic::aarch64_neon_smull) { 782 switch (N->getSimpleValueType(0).SimpleTy) { 783 default: 784 llvm_unreachable("Unrecognized SMULL."); 785 case MVT::v4i32: 786 SMULLOpc = AArch64::SMULLv4i16_indexed; 787 break; 788 case MVT::v2i64: 789 SMULLOpc = AArch64::SMULLv2i32_indexed; 790 break; 791 } 792 } else if (IntNo == Intrinsic::aarch64_neon_umull) { 793 switch (N->getSimpleValueType(0).SimpleTy) { 794 default: 795 llvm_unreachable("Unrecognized SMULL."); 796 case MVT::v4i32: 797 SMULLOpc = AArch64::UMULLv4i16_indexed; 798 break; 799 case MVT::v2i64: 800 SMULLOpc = AArch64::UMULLv2i32_indexed; 801 break; 802 } 803 } else 804 llvm_unreachable("Unrecognized intrinsic."); 805 806 ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops)); 807 return true; 808 } 809 810 /// Instructions that accept extend modifiers like UXTW expect the register 811 /// being extended to be a GPR32, but the incoming DAG might be acting on a 812 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if 813 /// this is the case. 814 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { 815 if (N.getValueType() == MVT::i32) 816 return N; 817 818 SDLoc dl(N); 819 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 820 MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 821 dl, MVT::i32, N, SubReg); 822 return SDValue(Node, 0); 823 } 824 825 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 826 template<signed Low, signed High, signed Scale> 827 bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) { 828 if (!isa<ConstantSDNode>(N)) 829 return false; 830 831 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 832 if ((MulImm % std::abs(Scale)) == 0) { 833 int64_t RDVLImm = MulImm / Scale; 834 if ((RDVLImm >= Low) && (RDVLImm <= High)) { 835 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32); 836 return true; 837 } 838 } 839 840 return false; 841 } 842 843 /// SelectArithExtendedRegister - Select a "extended register" operand. This 844 /// operand folds in an extend followed by an optional left shift. 845 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, 846 SDValue &Shift) { 847 unsigned ShiftVal = 0; 848 AArch64_AM::ShiftExtendType Ext; 849 850 if (N.getOpcode() == ISD::SHL) { 851 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 852 if (!CSD) 853 return false; 854 ShiftVal = CSD->getZExtValue(); 855 if (ShiftVal > 4) 856 return false; 857 858 Ext = getExtendTypeForNode(N.getOperand(0)); 859 if (Ext == AArch64_AM::InvalidShiftExtend) 860 return false; 861 862 Reg = N.getOperand(0).getOperand(0); 863 } else { 864 Ext = getExtendTypeForNode(N); 865 if (Ext == AArch64_AM::InvalidShiftExtend) 866 return false; 867 868 Reg = N.getOperand(0); 869 870 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the 871 // isDef32 as a heuristic for when the operand is likely to be a 32bit def. 872 auto isDef32 = [](SDValue N) { 873 unsigned Opc = N.getOpcode(); 874 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG && 875 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext && 876 Opc != ISD::AssertZext && Opc != ISD::AssertAlign && 877 Opc != ISD::FREEZE; 878 }; 879 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 && 880 isDef32(Reg)) 881 return false; 882 } 883 884 // AArch64 mandates that the RHS of the operation must use the smallest 885 // register class that could contain the size being extended from. Thus, 886 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though 887 // there might not be an actual 32-bit value in the program. We can 888 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. 889 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); 890 Reg = narrowIfNeeded(CurDAG, Reg); 891 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), 892 MVT::i32); 893 return isWorthFolding(N); 894 } 895 896 /// If there's a use of this ADDlow that's not itself a load/store then we'll 897 /// need to create a real ADD instruction from it anyway and there's no point in 898 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's 899 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding 900 /// leads to duplicated ADRP instructions. 901 static bool isWorthFoldingADDlow(SDValue N) { 902 for (auto Use : N->uses()) { 903 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && 904 Use->getOpcode() != ISD::ATOMIC_LOAD && 905 Use->getOpcode() != ISD::ATOMIC_STORE) 906 return false; 907 908 // ldar and stlr have much more restrictive addressing modes (just a 909 // register). 910 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering())) 911 return false; 912 } 913 914 return true; 915 } 916 917 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit 918 /// immediate" address. The "Size" argument is the size in bytes of the memory 919 /// reference, which determines the scale. 920 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, 921 unsigned BW, unsigned Size, 922 SDValue &Base, 923 SDValue &OffImm) { 924 SDLoc dl(N); 925 const DataLayout &DL = CurDAG->getDataLayout(); 926 const TargetLowering *TLI = getTargetLowering(); 927 if (N.getOpcode() == ISD::FrameIndex) { 928 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 929 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 930 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 931 return true; 932 } 933 934 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed 935 // selected here doesn't support labels/immediates, only base+offset. 936 if (CurDAG->isBaseWithConstantOffset(N)) { 937 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 938 if (IsSignedImm) { 939 int64_t RHSC = RHS->getSExtValue(); 940 unsigned Scale = Log2_32(Size); 941 int64_t Range = 0x1LL << (BW - 1); 942 943 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) && 944 RHSC < (Range << Scale)) { 945 Base = N.getOperand(0); 946 if (Base.getOpcode() == ISD::FrameIndex) { 947 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 948 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 949 } 950 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 951 return true; 952 } 953 } else { 954 // unsigned Immediate 955 uint64_t RHSC = RHS->getZExtValue(); 956 unsigned Scale = Log2_32(Size); 957 uint64_t Range = 0x1ULL << BW; 958 959 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) { 960 Base = N.getOperand(0); 961 if (Base.getOpcode() == ISD::FrameIndex) { 962 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 963 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 964 } 965 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 966 return true; 967 } 968 } 969 } 970 } 971 // Base only. The address will be materialized into a register before 972 // the memory is accessed. 973 // add x0, Xbase, #offset 974 // stp x1, x2, [x0] 975 Base = N; 976 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 977 return true; 978 } 979 980 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit 981 /// immediate" address. The "Size" argument is the size in bytes of the memory 982 /// reference, which determines the scale. 983 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, 984 SDValue &Base, SDValue &OffImm) { 985 SDLoc dl(N); 986 const DataLayout &DL = CurDAG->getDataLayout(); 987 const TargetLowering *TLI = getTargetLowering(); 988 if (N.getOpcode() == ISD::FrameIndex) { 989 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 990 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 991 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 992 return true; 993 } 994 995 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { 996 GlobalAddressSDNode *GAN = 997 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); 998 Base = N.getOperand(0); 999 OffImm = N.getOperand(1); 1000 if (!GAN) 1001 return true; 1002 1003 if (GAN->getOffset() % Size == 0 && 1004 GAN->getGlobal()->getPointerAlignment(DL) >= Size) 1005 return true; 1006 } 1007 1008 if (CurDAG->isBaseWithConstantOffset(N)) { 1009 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1010 int64_t RHSC = (int64_t)RHS->getZExtValue(); 1011 unsigned Scale = Log2_32(Size); 1012 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 1013 Base = N.getOperand(0); 1014 if (Base.getOpcode() == ISD::FrameIndex) { 1015 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1016 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1017 } 1018 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 1019 return true; 1020 } 1021 } 1022 } 1023 1024 // Before falling back to our general case, check if the unscaled 1025 // instructions can handle this. If so, that's preferable. 1026 if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) 1027 return false; 1028 1029 // Base only. The address will be materialized into a register before 1030 // the memory is accessed. 1031 // add x0, Xbase, #offset 1032 // ldr x0, [x0] 1033 Base = N; 1034 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1035 return true; 1036 } 1037 1038 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit 1039 /// immediate" address. This should only match when there is an offset that 1040 /// is not valid for a scaled immediate addressing mode. The "Size" argument 1041 /// is the size in bytes of the memory reference, which is needed here to know 1042 /// what is valid for a scaled immediate. 1043 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, 1044 SDValue &Base, 1045 SDValue &OffImm) { 1046 if (!CurDAG->isBaseWithConstantOffset(N)) 1047 return false; 1048 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1049 int64_t RHSC = RHS->getSExtValue(); 1050 // If the offset is valid as a scaled immediate, don't match here. 1051 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && 1052 RHSC < (0x1000 << Log2_32(Size))) 1053 return false; 1054 if (RHSC >= -256 && RHSC < 256) { 1055 Base = N.getOperand(0); 1056 if (Base.getOpcode() == ISD::FrameIndex) { 1057 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1058 const TargetLowering *TLI = getTargetLowering(); 1059 Base = CurDAG->getTargetFrameIndex( 1060 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1061 } 1062 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64); 1063 return true; 1064 } 1065 } 1066 return false; 1067 } 1068 1069 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { 1070 SDLoc dl(N); 1071 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 1072 SDValue ImpDef = SDValue( 1073 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0); 1074 MachineSDNode *Node = CurDAG->getMachineNode( 1075 TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg); 1076 return SDValue(Node, 0); 1077 } 1078 1079 /// Check if the given SHL node (\p N), can be used to form an 1080 /// extended register for an addressing mode. 1081 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, 1082 bool WantExtend, SDValue &Offset, 1083 SDValue &SignExtend) { 1084 assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); 1085 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1086 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) 1087 return false; 1088 1089 SDLoc dl(N); 1090 if (WantExtend) { 1091 AArch64_AM::ShiftExtendType Ext = 1092 getExtendTypeForNode(N.getOperand(0), true); 1093 if (Ext == AArch64_AM::InvalidShiftExtend) 1094 return false; 1095 1096 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); 1097 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1098 MVT::i32); 1099 } else { 1100 Offset = N.getOperand(0); 1101 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32); 1102 } 1103 1104 unsigned LegalShiftVal = Log2_32(Size); 1105 unsigned ShiftVal = CSD->getZExtValue(); 1106 1107 if (ShiftVal != 0 && ShiftVal != LegalShiftVal) 1108 return false; 1109 1110 return isWorthFolding(N); 1111 } 1112 1113 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, 1114 SDValue &Base, SDValue &Offset, 1115 SDValue &SignExtend, 1116 SDValue &DoShift) { 1117 if (N.getOpcode() != ISD::ADD) 1118 return false; 1119 SDValue LHS = N.getOperand(0); 1120 SDValue RHS = N.getOperand(1); 1121 SDLoc dl(N); 1122 1123 // We don't want to match immediate adds here, because they are better lowered 1124 // to the register-immediate addressing modes. 1125 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) 1126 return false; 1127 1128 // Check if this particular node is reused in any non-memory related 1129 // operation. If yes, do not try to fold this node into the address 1130 // computation, since the computation will be kept. 1131 const SDNode *Node = N.getNode(); 1132 for (SDNode *UI : Node->uses()) { 1133 if (!isa<MemSDNode>(*UI)) 1134 return false; 1135 } 1136 1137 // Remember if it is worth folding N when it produces extended register. 1138 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 1139 1140 // Try to match a shifted extend on the RHS. 1141 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1142 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { 1143 Base = LHS; 1144 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1145 return true; 1146 } 1147 1148 // Try to match a shifted extend on the LHS. 1149 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1150 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { 1151 Base = RHS; 1152 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1153 return true; 1154 } 1155 1156 // There was no shift, whatever else we find. 1157 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32); 1158 1159 AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; 1160 // Try to match an unshifted extend on the LHS. 1161 if (IsExtendedRegisterWorthFolding && 1162 (Ext = getExtendTypeForNode(LHS, true)) != 1163 AArch64_AM::InvalidShiftExtend) { 1164 Base = RHS; 1165 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); 1166 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1167 MVT::i32); 1168 if (isWorthFolding(LHS)) 1169 return true; 1170 } 1171 1172 // Try to match an unshifted extend on the RHS. 1173 if (IsExtendedRegisterWorthFolding && 1174 (Ext = getExtendTypeForNode(RHS, true)) != 1175 AArch64_AM::InvalidShiftExtend) { 1176 Base = LHS; 1177 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); 1178 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1179 MVT::i32); 1180 if (isWorthFolding(RHS)) 1181 return true; 1182 } 1183 1184 return false; 1185 } 1186 1187 // Check if the given immediate is preferred by ADD. If an immediate can be 1188 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be 1189 // encoded by one MOVZ, return true. 1190 static bool isPreferredADD(int64_t ImmOff) { 1191 // Constant in [0x0, 0xfff] can be encoded in ADD. 1192 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) 1193 return true; 1194 // Check if it can be encoded in an "ADD LSL #12". 1195 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) 1196 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. 1197 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && 1198 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; 1199 return false; 1200 } 1201 1202 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, 1203 SDValue &Base, SDValue &Offset, 1204 SDValue &SignExtend, 1205 SDValue &DoShift) { 1206 if (N.getOpcode() != ISD::ADD) 1207 return false; 1208 SDValue LHS = N.getOperand(0); 1209 SDValue RHS = N.getOperand(1); 1210 SDLoc DL(N); 1211 1212 // Check if this particular node is reused in any non-memory related 1213 // operation. If yes, do not try to fold this node into the address 1214 // computation, since the computation will be kept. 1215 const SDNode *Node = N.getNode(); 1216 for (SDNode *UI : Node->uses()) { 1217 if (!isa<MemSDNode>(*UI)) 1218 return false; 1219 } 1220 1221 // Watch out if RHS is a wide immediate, it can not be selected into 1222 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into 1223 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate 1224 // instructions like: 1225 // MOV X0, WideImmediate 1226 // ADD X1, BaseReg, X0 1227 // LDR X2, [X1, 0] 1228 // For such situation, using [BaseReg, XReg] addressing mode can save one 1229 // ADD/SUB: 1230 // MOV X0, WideImmediate 1231 // LDR X2, [BaseReg, X0] 1232 if (isa<ConstantSDNode>(RHS)) { 1233 int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue(); 1234 unsigned Scale = Log2_32(Size); 1235 // Skip the immediate can be selected by load/store addressing mode. 1236 // Also skip the immediate can be encoded by a single ADD (SUB is also 1237 // checked by using -ImmOff). 1238 if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || 1239 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) 1240 return false; 1241 1242 SDValue Ops[] = { RHS }; 1243 SDNode *MOVI = 1244 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 1245 SDValue MOVIV = SDValue(MOVI, 0); 1246 // This ADD of two X register will be selected into [Reg+Reg] mode. 1247 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); 1248 } 1249 1250 // Remember if it is worth folding N when it produces extended register. 1251 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 1252 1253 // Try to match a shifted extend on the RHS. 1254 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1255 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { 1256 Base = LHS; 1257 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1258 return true; 1259 } 1260 1261 // Try to match a shifted extend on the LHS. 1262 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1263 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { 1264 Base = RHS; 1265 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1266 return true; 1267 } 1268 1269 // Match any non-shifted, non-extend, non-immediate add expression. 1270 Base = LHS; 1271 Offset = RHS; 1272 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32); 1273 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32); 1274 // Reg1 + Reg2 is free: no check needed. 1275 return true; 1276 } 1277 1278 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { 1279 static const unsigned RegClassIDs[] = { 1280 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; 1281 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, 1282 AArch64::dsub2, AArch64::dsub3}; 1283 1284 return createTuple(Regs, RegClassIDs, SubRegs); 1285 } 1286 1287 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { 1288 static const unsigned RegClassIDs[] = { 1289 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; 1290 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, 1291 AArch64::qsub2, AArch64::qsub3}; 1292 1293 return createTuple(Regs, RegClassIDs, SubRegs); 1294 } 1295 1296 SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) { 1297 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID, 1298 AArch64::ZPR3RegClassID, 1299 AArch64::ZPR4RegClassID}; 1300 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, 1301 AArch64::zsub2, AArch64::zsub3}; 1302 1303 return createTuple(Regs, RegClassIDs, SubRegs); 1304 } 1305 1306 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, 1307 const unsigned RegClassIDs[], 1308 const unsigned SubRegs[]) { 1309 // There's no special register-class for a vector-list of 1 element: it's just 1310 // a vector. 1311 if (Regs.size() == 1) 1312 return Regs[0]; 1313 1314 assert(Regs.size() >= 2 && Regs.size() <= 4); 1315 1316 SDLoc DL(Regs[0]); 1317 1318 SmallVector<SDValue, 4> Ops; 1319 1320 // First operand of REG_SEQUENCE is the desired RegClass. 1321 Ops.push_back( 1322 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32)); 1323 1324 // Then we get pairs of source & subregister-position for the components. 1325 for (unsigned i = 0; i < Regs.size(); ++i) { 1326 Ops.push_back(Regs[i]); 1327 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32)); 1328 } 1329 1330 SDNode *N = 1331 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 1332 return SDValue(N, 0); 1333 } 1334 1335 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, 1336 bool isExt) { 1337 SDLoc dl(N); 1338 EVT VT = N->getValueType(0); 1339 1340 unsigned ExtOff = isExt; 1341 1342 // Form a REG_SEQUENCE to force register allocation. 1343 unsigned Vec0Off = ExtOff + 1; 1344 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, 1345 N->op_begin() + Vec0Off + NumVecs); 1346 SDValue RegSeq = createQTuple(Regs); 1347 1348 SmallVector<SDValue, 6> Ops; 1349 if (isExt) 1350 Ops.push_back(N->getOperand(1)); 1351 Ops.push_back(RegSeq); 1352 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); 1353 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 1354 } 1355 1356 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { 1357 LoadSDNode *LD = cast<LoadSDNode>(N); 1358 if (LD->isUnindexed()) 1359 return false; 1360 EVT VT = LD->getMemoryVT(); 1361 EVT DstVT = N->getValueType(0); 1362 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1363 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; 1364 1365 // We're not doing validity checking here. That was done when checking 1366 // if we should mark the load as indexed or not. We're just selecting 1367 // the right instruction. 1368 unsigned Opcode = 0; 1369 1370 ISD::LoadExtType ExtType = LD->getExtensionType(); 1371 bool InsertTo64 = false; 1372 if (VT == MVT::i64) 1373 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; 1374 else if (VT == MVT::i32) { 1375 if (ExtType == ISD::NON_EXTLOAD) 1376 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1377 else if (ExtType == ISD::SEXTLOAD) 1378 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; 1379 else { 1380 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1381 InsertTo64 = true; 1382 // The result of the load is only i32. It's the subreg_to_reg that makes 1383 // it into an i64. 1384 DstVT = MVT::i32; 1385 } 1386 } else if (VT == MVT::i16) { 1387 if (ExtType == ISD::SEXTLOAD) { 1388 if (DstVT == MVT::i64) 1389 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; 1390 else 1391 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; 1392 } else { 1393 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; 1394 InsertTo64 = DstVT == MVT::i64; 1395 // The result of the load is only i32. It's the subreg_to_reg that makes 1396 // it into an i64. 1397 DstVT = MVT::i32; 1398 } 1399 } else if (VT == MVT::i8) { 1400 if (ExtType == ISD::SEXTLOAD) { 1401 if (DstVT == MVT::i64) 1402 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; 1403 else 1404 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; 1405 } else { 1406 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; 1407 InsertTo64 = DstVT == MVT::i64; 1408 // The result of the load is only i32. It's the subreg_to_reg that makes 1409 // it into an i64. 1410 DstVT = MVT::i32; 1411 } 1412 } else if (VT == MVT::f16) { 1413 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1414 } else if (VT == MVT::bf16) { 1415 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1416 } else if (VT == MVT::f32) { 1417 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; 1418 } else if (VT == MVT::f64 || VT.is64BitVector()) { 1419 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; 1420 } else if (VT.is128BitVector()) { 1421 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; 1422 } else 1423 return false; 1424 SDValue Chain = LD->getChain(); 1425 SDValue Base = LD->getBasePtr(); 1426 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); 1427 int OffsetVal = (int)OffsetOp->getZExtValue(); 1428 SDLoc dl(N); 1429 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); 1430 SDValue Ops[] = { Base, Offset, Chain }; 1431 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, 1432 MVT::Other, Ops); 1433 1434 // Transfer memoperands. 1435 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1436 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp}); 1437 1438 // Either way, we're replacing the node, so tell the caller that. 1439 SDValue LoadedVal = SDValue(Res, 1); 1440 if (InsertTo64) { 1441 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 1442 LoadedVal = 1443 SDValue(CurDAG->getMachineNode( 1444 AArch64::SUBREG_TO_REG, dl, MVT::i64, 1445 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal, 1446 SubReg), 1447 0); 1448 } 1449 1450 ReplaceUses(SDValue(N, 0), LoadedVal); 1451 ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); 1452 ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); 1453 CurDAG->RemoveDeadNode(N); 1454 return true; 1455 } 1456 1457 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 1458 unsigned SubRegIdx) { 1459 SDLoc dl(N); 1460 EVT VT = N->getValueType(0); 1461 SDValue Chain = N->getOperand(0); 1462 1463 SDValue Ops[] = {N->getOperand(2), // Mem operand; 1464 Chain}; 1465 1466 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1467 1468 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1469 SDValue SuperReg = SDValue(Ld, 0); 1470 for (unsigned i = 0; i < NumVecs; ++i) 1471 ReplaceUses(SDValue(N, i), 1472 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1473 1474 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1475 1476 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one, 1477 // because it's too simple to have needed special treatment during lowering. 1478 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) { 1479 MachineMemOperand *MemOp = MemIntr->getMemOperand(); 1480 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 1481 } 1482 1483 CurDAG->RemoveDeadNode(N); 1484 } 1485 1486 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, 1487 unsigned Opc, unsigned SubRegIdx) { 1488 SDLoc dl(N); 1489 EVT VT = N->getValueType(0); 1490 SDValue Chain = N->getOperand(0); 1491 1492 SDValue Ops[] = {N->getOperand(1), // Mem operand 1493 N->getOperand(2), // Incremental 1494 Chain}; 1495 1496 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1497 MVT::Untyped, MVT::Other}; 1498 1499 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1500 1501 // Update uses of write back register 1502 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1503 1504 // Update uses of vector list 1505 SDValue SuperReg = SDValue(Ld, 1); 1506 if (NumVecs == 1) 1507 ReplaceUses(SDValue(N, 0), SuperReg); 1508 else 1509 for (unsigned i = 0; i < NumVecs; ++i) 1510 ReplaceUses(SDValue(N, i), 1511 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1512 1513 // Update the chain 1514 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1515 CurDAG->RemoveDeadNode(N); 1516 } 1517 1518 /// Optimize \param OldBase and \param OldOffset selecting the best addressing 1519 /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the 1520 /// new Base and an SDValue representing the new offset. 1521 std::tuple<unsigned, SDValue, SDValue> 1522 AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, 1523 unsigned Opc_ri, 1524 const SDValue &OldBase, 1525 const SDValue &OldOffset, 1526 unsigned Scale) { 1527 SDValue NewBase = OldBase; 1528 SDValue NewOffset = OldOffset; 1529 // Detect a possible Reg+Imm addressing mode. 1530 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>( 1531 N, OldBase, NewBase, NewOffset); 1532 1533 // Detect a possible reg+reg addressing mode, but only if we haven't already 1534 // detected a Reg+Imm one. 1535 const bool IsRegReg = 1536 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset); 1537 1538 // Select the instruction. 1539 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset); 1540 } 1541 1542 void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs, 1543 unsigned Scale, unsigned Opc_ri, 1544 unsigned Opc_rr, bool IsIntr) { 1545 assert(Scale < 4 && "Invalid scaling value."); 1546 SDLoc DL(N); 1547 EVT VT = N->getValueType(0); 1548 SDValue Chain = N->getOperand(0); 1549 1550 // Optimize addressing mode. 1551 SDValue Base, Offset; 1552 unsigned Opc; 1553 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 1554 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2), 1555 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale); 1556 1557 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate 1558 Base, // Memory operand 1559 Offset, Chain}; 1560 1561 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1562 1563 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); 1564 SDValue SuperReg = SDValue(Load, 0); 1565 for (unsigned i = 0; i < NumVecs; ++i) 1566 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1567 AArch64::zsub0 + i, DL, VT, SuperReg)); 1568 1569 // Copy chain 1570 unsigned ChainIdx = NumVecs; 1571 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); 1572 CurDAG->RemoveDeadNode(N); 1573 } 1574 1575 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, 1576 unsigned Opc) { 1577 SDLoc dl(N); 1578 EVT VT = N->getOperand(2)->getValueType(0); 1579 1580 // Form a REG_SEQUENCE to force register allocation. 1581 bool Is128Bit = VT.getSizeInBits() == 128; 1582 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1583 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1584 1585 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; 1586 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1587 1588 // Transfer memoperands. 1589 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1590 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1591 1592 ReplaceNode(N, St); 1593 } 1594 1595 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs, 1596 unsigned Scale, unsigned Opc_rr, 1597 unsigned Opc_ri) { 1598 SDLoc dl(N); 1599 1600 // Form a REG_SEQUENCE to force register allocation. 1601 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1602 SDValue RegSeq = createZTuple(Regs); 1603 1604 // Optimize addressing mode. 1605 unsigned Opc; 1606 SDValue Offset, Base; 1607 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 1608 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3), 1609 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale); 1610 1611 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate 1612 Base, // address 1613 Offset, // offset 1614 N->getOperand(0)}; // chain 1615 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1616 1617 ReplaceNode(N, St); 1618 } 1619 1620 bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, 1621 SDValue &OffImm) { 1622 SDLoc dl(N); 1623 const DataLayout &DL = CurDAG->getDataLayout(); 1624 const TargetLowering *TLI = getTargetLowering(); 1625 1626 // Try to match it for the frame address 1627 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) { 1628 int FI = FINode->getIndex(); 1629 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1630 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1631 return true; 1632 } 1633 1634 return false; 1635 } 1636 1637 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, 1638 unsigned Opc) { 1639 SDLoc dl(N); 1640 EVT VT = N->getOperand(2)->getValueType(0); 1641 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1642 MVT::Other}; // Type for the Chain 1643 1644 // Form a REG_SEQUENCE to force register allocation. 1645 bool Is128Bit = VT.getSizeInBits() == 128; 1646 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1647 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1648 1649 SDValue Ops[] = {RegSeq, 1650 N->getOperand(NumVecs + 1), // base register 1651 N->getOperand(NumVecs + 2), // Incremental 1652 N->getOperand(0)}; // Chain 1653 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1654 1655 ReplaceNode(N, St); 1656 } 1657 1658 namespace { 1659 /// WidenVector - Given a value in the V64 register class, produce the 1660 /// equivalent value in the V128 register class. 1661 class WidenVector { 1662 SelectionDAG &DAG; 1663 1664 public: 1665 WidenVector(SelectionDAG &DAG) : DAG(DAG) {} 1666 1667 SDValue operator()(SDValue V64Reg) { 1668 EVT VT = V64Reg.getValueType(); 1669 unsigned NarrowSize = VT.getVectorNumElements(); 1670 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1671 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); 1672 SDLoc DL(V64Reg); 1673 1674 SDValue Undef = 1675 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); 1676 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); 1677 } 1678 }; 1679 } // namespace 1680 1681 /// NarrowVector - Given a value in the V128 register class, produce the 1682 /// equivalent value in the V64 register class. 1683 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { 1684 EVT VT = V128Reg.getValueType(); 1685 unsigned WideSize = VT.getVectorNumElements(); 1686 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1687 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); 1688 1689 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, 1690 V128Reg); 1691 } 1692 1693 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, 1694 unsigned Opc) { 1695 SDLoc dl(N); 1696 EVT VT = N->getValueType(0); 1697 bool Narrow = VT.getSizeInBits() == 64; 1698 1699 // Form a REG_SEQUENCE to force register allocation. 1700 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1701 1702 if (Narrow) 1703 transform(Regs, Regs.begin(), 1704 WidenVector(*CurDAG)); 1705 1706 SDValue RegSeq = createQTuple(Regs); 1707 1708 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1709 1710 unsigned LaneNo = 1711 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1712 1713 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1714 N->getOperand(NumVecs + 3), N->getOperand(0)}; 1715 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1716 SDValue SuperReg = SDValue(Ld, 0); 1717 1718 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1719 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 1720 AArch64::qsub2, AArch64::qsub3 }; 1721 for (unsigned i = 0; i < NumVecs; ++i) { 1722 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); 1723 if (Narrow) 1724 NV = NarrowVector(NV, *CurDAG); 1725 ReplaceUses(SDValue(N, i), NV); 1726 } 1727 1728 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1729 CurDAG->RemoveDeadNode(N); 1730 } 1731 1732 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, 1733 unsigned Opc) { 1734 SDLoc dl(N); 1735 EVT VT = N->getValueType(0); 1736 bool Narrow = VT.getSizeInBits() == 64; 1737 1738 // Form a REG_SEQUENCE to force register allocation. 1739 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1740 1741 if (Narrow) 1742 transform(Regs, Regs.begin(), 1743 WidenVector(*CurDAG)); 1744 1745 SDValue RegSeq = createQTuple(Regs); 1746 1747 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1748 RegSeq->getValueType(0), MVT::Other}; 1749 1750 unsigned LaneNo = 1751 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1752 1753 SDValue Ops[] = {RegSeq, 1754 CurDAG->getTargetConstant(LaneNo, dl, 1755 MVT::i64), // Lane Number 1756 N->getOperand(NumVecs + 2), // Base register 1757 N->getOperand(NumVecs + 3), // Incremental 1758 N->getOperand(0)}; 1759 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1760 1761 // Update uses of the write back register 1762 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1763 1764 // Update uses of the vector list 1765 SDValue SuperReg = SDValue(Ld, 1); 1766 if (NumVecs == 1) { 1767 ReplaceUses(SDValue(N, 0), 1768 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); 1769 } else { 1770 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1771 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 1772 AArch64::qsub2, AArch64::qsub3 }; 1773 for (unsigned i = 0; i < NumVecs; ++i) { 1774 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, 1775 SuperReg); 1776 if (Narrow) 1777 NV = NarrowVector(NV, *CurDAG); 1778 ReplaceUses(SDValue(N, i), NV); 1779 } 1780 } 1781 1782 // Update the Chain 1783 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1784 CurDAG->RemoveDeadNode(N); 1785 } 1786 1787 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, 1788 unsigned Opc) { 1789 SDLoc dl(N); 1790 EVT VT = N->getOperand(2)->getValueType(0); 1791 bool Narrow = VT.getSizeInBits() == 64; 1792 1793 // Form a REG_SEQUENCE to force register allocation. 1794 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1795 1796 if (Narrow) 1797 transform(Regs, Regs.begin(), 1798 WidenVector(*CurDAG)); 1799 1800 SDValue RegSeq = createQTuple(Regs); 1801 1802 unsigned LaneNo = 1803 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1804 1805 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1806 N->getOperand(NumVecs + 3), N->getOperand(0)}; 1807 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 1808 1809 // Transfer memoperands. 1810 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1811 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1812 1813 ReplaceNode(N, St); 1814 } 1815 1816 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, 1817 unsigned Opc) { 1818 SDLoc dl(N); 1819 EVT VT = N->getOperand(2)->getValueType(0); 1820 bool Narrow = VT.getSizeInBits() == 64; 1821 1822 // Form a REG_SEQUENCE to force register allocation. 1823 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1824 1825 if (Narrow) 1826 transform(Regs, Regs.begin(), 1827 WidenVector(*CurDAG)); 1828 1829 SDValue RegSeq = createQTuple(Regs); 1830 1831 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1832 MVT::Other}; 1833 1834 unsigned LaneNo = 1835 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1836 1837 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1838 N->getOperand(NumVecs + 2), // Base Register 1839 N->getOperand(NumVecs + 3), // Incremental 1840 N->getOperand(0)}; 1841 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1842 1843 // Transfer memoperands. 1844 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1845 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1846 1847 ReplaceNode(N, St); 1848 } 1849 1850 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, 1851 unsigned &Opc, SDValue &Opd0, 1852 unsigned &LSB, unsigned &MSB, 1853 unsigned NumberOfIgnoredLowBits, 1854 bool BiggerPattern) { 1855 assert(N->getOpcode() == ISD::AND && 1856 "N must be a AND operation to call this function"); 1857 1858 EVT VT = N->getValueType(0); 1859 1860 // Here we can test the type of VT and return false when the type does not 1861 // match, but since it is done prior to that call in the current context 1862 // we turned that into an assert to avoid redundant code. 1863 assert((VT == MVT::i32 || VT == MVT::i64) && 1864 "Type checking must have been done before calling this function"); 1865 1866 // FIXME: simplify-demanded-bits in DAGCombine will probably have 1867 // changed the AND node to a 32-bit mask operation. We'll have to 1868 // undo that as part of the transform here if we want to catch all 1869 // the opportunities. 1870 // Currently the NumberOfIgnoredLowBits argument helps to recover 1871 // form these situations when matching bigger pattern (bitfield insert). 1872 1873 // For unsigned extracts, check for a shift right and mask 1874 uint64_t AndImm = 0; 1875 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm)) 1876 return false; 1877 1878 const SDNode *Op0 = N->getOperand(0).getNode(); 1879 1880 // Because of simplify-demanded-bits in DAGCombine, the mask may have been 1881 // simplified. Try to undo that 1882 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits); 1883 1884 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 1885 if (AndImm & (AndImm + 1)) 1886 return false; 1887 1888 bool ClampMSB = false; 1889 uint64_t SrlImm = 0; 1890 // Handle the SRL + ANY_EXTEND case. 1891 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && 1892 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) { 1893 // Extend the incoming operand of the SRL to 64-bit. 1894 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); 1895 // Make sure to clamp the MSB so that we preserve the semantics of the 1896 // original operations. 1897 ClampMSB = true; 1898 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && 1899 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, 1900 SrlImm)) { 1901 // If the shift result was truncated, we can still combine them. 1902 Opd0 = Op0->getOperand(0).getOperand(0); 1903 1904 // Use the type of SRL node. 1905 VT = Opd0->getValueType(0); 1906 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) { 1907 Opd0 = Op0->getOperand(0); 1908 ClampMSB = (VT == MVT::i32); 1909 } else if (BiggerPattern) { 1910 // Let's pretend a 0 shift right has been performed. 1911 // The resulting code will be at least as good as the original one 1912 // plus it may expose more opportunities for bitfield insert pattern. 1913 // FIXME: Currently we limit this to the bigger pattern, because 1914 // some optimizations expect AND and not UBFM. 1915 Opd0 = N->getOperand(0); 1916 } else 1917 return false; 1918 1919 // Bail out on large immediates. This happens when no proper 1920 // combining/constant folding was performed. 1921 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) { 1922 LLVM_DEBUG( 1923 (dbgs() << N 1924 << ": Found large shift immediate, this should not happen\n")); 1925 return false; 1926 } 1927 1928 LSB = SrlImm; 1929 MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm) 1930 : countTrailingOnes<uint64_t>(AndImm)) - 1931 1; 1932 if (ClampMSB) 1933 // Since we're moving the extend before the right shift operation, we need 1934 // to clamp the MSB to make sure we don't shift in undefined bits instead of 1935 // the zeros which would get shifted in with the original right shift 1936 // operation. 1937 MSB = MSB > 31 ? 31 : MSB; 1938 1939 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 1940 return true; 1941 } 1942 1943 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, 1944 SDValue &Opd0, unsigned &Immr, 1945 unsigned &Imms) { 1946 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); 1947 1948 EVT VT = N->getValueType(0); 1949 unsigned BitWidth = VT.getSizeInBits(); 1950 assert((VT == MVT::i32 || VT == MVT::i64) && 1951 "Type checking must have been done before calling this function"); 1952 1953 SDValue Op = N->getOperand(0); 1954 if (Op->getOpcode() == ISD::TRUNCATE) { 1955 Op = Op->getOperand(0); 1956 VT = Op->getValueType(0); 1957 BitWidth = VT.getSizeInBits(); 1958 } 1959 1960 uint64_t ShiftImm; 1961 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) && 1962 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 1963 return false; 1964 1965 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1966 if (ShiftImm + Width > BitWidth) 1967 return false; 1968 1969 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri; 1970 Opd0 = Op.getOperand(0); 1971 Immr = ShiftImm; 1972 Imms = ShiftImm + Width - 1; 1973 return true; 1974 } 1975 1976 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, 1977 SDValue &Opd0, unsigned &LSB, 1978 unsigned &MSB) { 1979 // We are looking for the following pattern which basically extracts several 1980 // continuous bits from the source value and places it from the LSB of the 1981 // destination value, all other bits of the destination value or set to zero: 1982 // 1983 // Value2 = AND Value, MaskImm 1984 // SRL Value2, ShiftImm 1985 // 1986 // with MaskImm >> ShiftImm to search for the bit width. 1987 // 1988 // This gets selected into a single UBFM: 1989 // 1990 // UBFM Value, ShiftImm, BitWide + SrlImm -1 1991 // 1992 1993 if (N->getOpcode() != ISD::SRL) 1994 return false; 1995 1996 uint64_t AndMask = 0; 1997 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask)) 1998 return false; 1999 2000 Opd0 = N->getOperand(0).getOperand(0); 2001 2002 uint64_t SrlImm = 0; 2003 if (!isIntImmediate(N->getOperand(1), SrlImm)) 2004 return false; 2005 2006 // Check whether we really have several bits extract here. 2007 unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm)); 2008 if (BitWide && isMask_64(AndMask >> SrlImm)) { 2009 if (N->getValueType(0) == MVT::i32) 2010 Opc = AArch64::UBFMWri; 2011 else 2012 Opc = AArch64::UBFMXri; 2013 2014 LSB = SrlImm; 2015 MSB = BitWide + SrlImm - 1; 2016 return true; 2017 } 2018 2019 return false; 2020 } 2021 2022 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, 2023 unsigned &Immr, unsigned &Imms, 2024 bool BiggerPattern) { 2025 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && 2026 "N must be a SHR/SRA operation to call this function"); 2027 2028 EVT VT = N->getValueType(0); 2029 2030 // Here we can test the type of VT and return false when the type does not 2031 // match, but since it is done prior to that call in the current context 2032 // we turned that into an assert to avoid redundant code. 2033 assert((VT == MVT::i32 || VT == MVT::i64) && 2034 "Type checking must have been done before calling this function"); 2035 2036 // Check for AND + SRL doing several bits extract. 2037 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms)) 2038 return true; 2039 2040 // We're looking for a shift of a shift. 2041 uint64_t ShlImm = 0; 2042 uint64_t TruncBits = 0; 2043 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) { 2044 Opd0 = N->getOperand(0).getOperand(0); 2045 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && 2046 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { 2047 // We are looking for a shift of truncate. Truncate from i64 to i32 could 2048 // be considered as setting high 32 bits as zero. Our strategy here is to 2049 // always generate 64bit UBFM. This consistency will help the CSE pass 2050 // later find more redundancy. 2051 Opd0 = N->getOperand(0).getOperand(0); 2052 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); 2053 VT = Opd0.getValueType(); 2054 assert(VT == MVT::i64 && "the promoted type should be i64"); 2055 } else if (BiggerPattern) { 2056 // Let's pretend a 0 shift left has been performed. 2057 // FIXME: Currently we limit this to the bigger pattern case, 2058 // because some optimizations expect AND and not UBFM 2059 Opd0 = N->getOperand(0); 2060 } else 2061 return false; 2062 2063 // Missing combines/constant folding may have left us with strange 2064 // constants. 2065 if (ShlImm >= VT.getSizeInBits()) { 2066 LLVM_DEBUG( 2067 (dbgs() << N 2068 << ": Found large shift immediate, this should not happen\n")); 2069 return false; 2070 } 2071 2072 uint64_t SrlImm = 0; 2073 if (!isIntImmediate(N->getOperand(1), SrlImm)) 2074 return false; 2075 2076 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() && 2077 "bad amount in shift node!"); 2078 int immr = SrlImm - ShlImm; 2079 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr; 2080 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1; 2081 // SRA requires a signed extraction 2082 if (VT == MVT::i32) 2083 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; 2084 else 2085 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; 2086 return true; 2087 } 2088 2089 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) { 2090 assert(N->getOpcode() == ISD::SIGN_EXTEND); 2091 2092 EVT VT = N->getValueType(0); 2093 EVT NarrowVT = N->getOperand(0)->getValueType(0); 2094 if (VT != MVT::i64 || NarrowVT != MVT::i32) 2095 return false; 2096 2097 uint64_t ShiftImm; 2098 SDValue Op = N->getOperand(0); 2099 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 2100 return false; 2101 2102 SDLoc dl(N); 2103 // Extend the incoming operand of the shift to 64-bits. 2104 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0)); 2105 unsigned Immr = ShiftImm; 2106 unsigned Imms = NarrowVT.getSizeInBits() - 1; 2107 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2108 CurDAG->getTargetConstant(Imms, dl, VT)}; 2109 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops); 2110 return true; 2111 } 2112 2113 /// Try to form fcvtl2 instructions from a floating-point extend of a high-half 2114 /// extract of a subvector. 2115 bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) { 2116 assert(N->getOpcode() == ISD::FP_EXTEND); 2117 2118 // There are 2 forms of fcvtl2 - extend to double or extend to float. 2119 SDValue Extract = N->getOperand(0); 2120 EVT VT = N->getValueType(0); 2121 EVT NarrowVT = Extract.getValueType(); 2122 if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) && 2123 (VT != MVT::v4f32 || NarrowVT != MVT::v4f16)) 2124 return false; 2125 2126 // Optionally look past a bitcast. 2127 Extract = peekThroughBitcasts(Extract); 2128 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) 2129 return false; 2130 2131 // Match extract from start of high half index. 2132 // Example: v8i16 -> v4i16 means the extract must begin at index 4. 2133 unsigned ExtractIndex = Extract.getConstantOperandVal(1); 2134 if (ExtractIndex != Extract.getValueType().getVectorNumElements()) 2135 return false; 2136 2137 auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16; 2138 CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0)); 2139 return true; 2140 } 2141 2142 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, 2143 SDValue &Opd0, unsigned &Immr, unsigned &Imms, 2144 unsigned NumberOfIgnoredLowBits = 0, 2145 bool BiggerPattern = false) { 2146 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) 2147 return false; 2148 2149 switch (N->getOpcode()) { 2150 default: 2151 if (!N->isMachineOpcode()) 2152 return false; 2153 break; 2154 case ISD::AND: 2155 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, 2156 NumberOfIgnoredLowBits, BiggerPattern); 2157 case ISD::SRL: 2158 case ISD::SRA: 2159 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern); 2160 2161 case ISD::SIGN_EXTEND_INREG: 2162 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms); 2163 } 2164 2165 unsigned NOpc = N->getMachineOpcode(); 2166 switch (NOpc) { 2167 default: 2168 return false; 2169 case AArch64::SBFMWri: 2170 case AArch64::UBFMWri: 2171 case AArch64::SBFMXri: 2172 case AArch64::UBFMXri: 2173 Opc = NOpc; 2174 Opd0 = N->getOperand(0); 2175 Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); 2176 Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); 2177 return true; 2178 } 2179 // Unreachable 2180 return false; 2181 } 2182 2183 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) { 2184 unsigned Opc, Immr, Imms; 2185 SDValue Opd0; 2186 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms)) 2187 return false; 2188 2189 EVT VT = N->getValueType(0); 2190 SDLoc dl(N); 2191 2192 // If the bit extract operation is 64bit but the original type is 32bit, we 2193 // need to add one EXTRACT_SUBREG. 2194 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { 2195 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64), 2196 CurDAG->getTargetConstant(Imms, dl, MVT::i64)}; 2197 2198 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); 2199 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 2200 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, 2201 MVT::i32, SDValue(BFM, 0), SubReg)); 2202 return true; 2203 } 2204 2205 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2206 CurDAG->getTargetConstant(Imms, dl, VT)}; 2207 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2208 return true; 2209 } 2210 2211 /// Does DstMask form a complementary pair with the mask provided by 2212 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, 2213 /// this asks whether DstMask zeroes precisely those bits that will be set by 2214 /// the other half. 2215 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, 2216 unsigned NumberOfIgnoredHighBits, EVT VT) { 2217 assert((VT == MVT::i32 || VT == MVT::i64) && 2218 "i32 or i64 mask type expected!"); 2219 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; 2220 2221 APInt SignificantDstMask = APInt(BitWidth, DstMask); 2222 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); 2223 2224 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && 2225 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes(); 2226 } 2227 2228 // Look for bits that will be useful for later uses. 2229 // A bit is consider useless as soon as it is dropped and never used 2230 // before it as been dropped. 2231 // E.g., looking for useful bit of x 2232 // 1. y = x & 0x7 2233 // 2. z = y >> 2 2234 // After #1, x useful bits are 0x7, then the useful bits of x, live through 2235 // y. 2236 // After #2, the useful bits of x are 0x4. 2237 // However, if x is used on an unpredicatable instruction, then all its bits 2238 // are useful. 2239 // E.g. 2240 // 1. y = x & 0x7 2241 // 2. z = y >> 2 2242 // 3. str x, [@x] 2243 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); 2244 2245 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, 2246 unsigned Depth) { 2247 uint64_t Imm = 2248 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2249 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); 2250 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); 2251 getUsefulBits(Op, UsefulBits, Depth + 1); 2252 } 2253 2254 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, 2255 uint64_t Imm, uint64_t MSB, 2256 unsigned Depth) { 2257 // inherit the bitwidth value 2258 APInt OpUsefulBits(UsefulBits); 2259 OpUsefulBits = 1; 2260 2261 if (MSB >= Imm) { 2262 OpUsefulBits <<= MSB - Imm + 1; 2263 --OpUsefulBits; 2264 // The interesting part will be in the lower part of the result 2265 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2266 // The interesting part was starting at Imm in the argument 2267 OpUsefulBits <<= Imm; 2268 } else { 2269 OpUsefulBits <<= MSB + 1; 2270 --OpUsefulBits; 2271 // The interesting part will be shifted in the result 2272 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm; 2273 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2274 // The interesting part was at zero in the argument 2275 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm); 2276 } 2277 2278 UsefulBits &= OpUsefulBits; 2279 } 2280 2281 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, 2282 unsigned Depth) { 2283 uint64_t Imm = 2284 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2285 uint64_t MSB = 2286 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2287 2288 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); 2289 } 2290 2291 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, 2292 unsigned Depth) { 2293 uint64_t ShiftTypeAndValue = 2294 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2295 APInt Mask(UsefulBits); 2296 Mask.clearAllBits(); 2297 Mask.flipAllBits(); 2298 2299 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { 2300 // Shift Left 2301 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2302 Mask <<= ShiftAmt; 2303 getUsefulBits(Op, Mask, Depth + 1); 2304 Mask.lshrInPlace(ShiftAmt); 2305 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { 2306 // Shift Right 2307 // We do not handle AArch64_AM::ASR, because the sign will change the 2308 // number of useful bits 2309 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2310 Mask.lshrInPlace(ShiftAmt); 2311 getUsefulBits(Op, Mask, Depth + 1); 2312 Mask <<= ShiftAmt; 2313 } else 2314 return; 2315 2316 UsefulBits &= Mask; 2317 } 2318 2319 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, 2320 unsigned Depth) { 2321 uint64_t Imm = 2322 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2323 uint64_t MSB = 2324 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); 2325 2326 APInt OpUsefulBits(UsefulBits); 2327 OpUsefulBits = 1; 2328 2329 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0); 2330 ResultUsefulBits.flipAllBits(); 2331 APInt Mask(UsefulBits.getBitWidth(), 0); 2332 2333 getUsefulBits(Op, ResultUsefulBits, Depth + 1); 2334 2335 if (MSB >= Imm) { 2336 // The instruction is a BFXIL. 2337 uint64_t Width = MSB - Imm + 1; 2338 uint64_t LSB = Imm; 2339 2340 OpUsefulBits <<= Width; 2341 --OpUsefulBits; 2342 2343 if (Op.getOperand(1) == Orig) { 2344 // Copy the low bits from the result to bits starting from LSB. 2345 Mask = ResultUsefulBits & OpUsefulBits; 2346 Mask <<= LSB; 2347 } 2348 2349 if (Op.getOperand(0) == Orig) 2350 // Bits starting from LSB in the input contribute to the result. 2351 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2352 } else { 2353 // The instruction is a BFI. 2354 uint64_t Width = MSB + 1; 2355 uint64_t LSB = UsefulBits.getBitWidth() - Imm; 2356 2357 OpUsefulBits <<= Width; 2358 --OpUsefulBits; 2359 OpUsefulBits <<= LSB; 2360 2361 if (Op.getOperand(1) == Orig) { 2362 // Copy the bits from the result to the zero bits. 2363 Mask = ResultUsefulBits & OpUsefulBits; 2364 Mask.lshrInPlace(LSB); 2365 } 2366 2367 if (Op.getOperand(0) == Orig) 2368 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2369 } 2370 2371 UsefulBits &= Mask; 2372 } 2373 2374 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, 2375 SDValue Orig, unsigned Depth) { 2376 2377 // Users of this node should have already been instruction selected 2378 // FIXME: Can we turn that into an assert? 2379 if (!UserNode->isMachineOpcode()) 2380 return; 2381 2382 switch (UserNode->getMachineOpcode()) { 2383 default: 2384 return; 2385 case AArch64::ANDSWri: 2386 case AArch64::ANDSXri: 2387 case AArch64::ANDWri: 2388 case AArch64::ANDXri: 2389 // We increment Depth only when we call the getUsefulBits 2390 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, 2391 Depth); 2392 case AArch64::UBFMWri: 2393 case AArch64::UBFMXri: 2394 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); 2395 2396 case AArch64::ORRWrs: 2397 case AArch64::ORRXrs: 2398 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig) 2399 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, 2400 Depth); 2401 return; 2402 case AArch64::BFMWri: 2403 case AArch64::BFMXri: 2404 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); 2405 2406 case AArch64::STRBBui: 2407 case AArch64::STURBBi: 2408 if (UserNode->getOperand(0) != Orig) 2409 return; 2410 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff); 2411 return; 2412 2413 case AArch64::STRHHui: 2414 case AArch64::STURHHi: 2415 if (UserNode->getOperand(0) != Orig) 2416 return; 2417 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff); 2418 return; 2419 } 2420 } 2421 2422 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { 2423 if (Depth >= SelectionDAG::MaxRecursionDepth) 2424 return; 2425 // Initialize UsefulBits 2426 if (!Depth) { 2427 unsigned Bitwidth = Op.getScalarValueSizeInBits(); 2428 // At the beginning, assume every produced bits is useful 2429 UsefulBits = APInt(Bitwidth, 0); 2430 UsefulBits.flipAllBits(); 2431 } 2432 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); 2433 2434 for (SDNode *Node : Op.getNode()->uses()) { 2435 // A use cannot produce useful bits 2436 APInt UsefulBitsForUse = APInt(UsefulBits); 2437 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); 2438 UsersUsefulBits |= UsefulBitsForUse; 2439 } 2440 // UsefulBits contains the produced bits that are meaningful for the 2441 // current definition, thus a user cannot make a bit meaningful at 2442 // this point 2443 UsefulBits &= UsersUsefulBits; 2444 } 2445 2446 /// Create a machine node performing a notional SHL of Op by ShlAmount. If 2447 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is 2448 /// 0, return Op unchanged. 2449 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { 2450 if (ShlAmount == 0) 2451 return Op; 2452 2453 EVT VT = Op.getValueType(); 2454 SDLoc dl(Op); 2455 unsigned BitWidth = VT.getSizeInBits(); 2456 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2457 2458 SDNode *ShiftNode; 2459 if (ShlAmount > 0) { 2460 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt 2461 ShiftNode = CurDAG->getMachineNode( 2462 UBFMOpc, dl, VT, Op, 2463 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT), 2464 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT)); 2465 } else { 2466 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 2467 assert(ShlAmount < 0 && "expected right shift"); 2468 int ShrAmount = -ShlAmount; 2469 ShiftNode = CurDAG->getMachineNode( 2470 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT), 2471 CurDAG->getTargetConstant(BitWidth - 1, dl, VT)); 2472 } 2473 2474 return SDValue(ShiftNode, 0); 2475 } 2476 2477 /// Does this tree qualify as an attempt to move a bitfield into position, 2478 /// essentially "(and (shl VAL, N), Mask)". 2479 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, 2480 bool BiggerPattern, 2481 SDValue &Src, int &ShiftAmount, 2482 int &MaskWidth) { 2483 EVT VT = Op.getValueType(); 2484 unsigned BitWidth = VT.getSizeInBits(); 2485 (void)BitWidth; 2486 assert(BitWidth == 32 || BitWidth == 64); 2487 2488 KnownBits Known = CurDAG->computeKnownBits(Op); 2489 2490 // Non-zero in the sense that they're not provably zero, which is the key 2491 // point if we want to use this value 2492 uint64_t NonZeroBits = (~Known.Zero).getZExtValue(); 2493 2494 // Discard a constant AND mask if present. It's safe because the node will 2495 // already have been factored into the computeKnownBits calculation above. 2496 uint64_t AndImm; 2497 if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) { 2498 assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0); 2499 Op = Op.getOperand(0); 2500 } 2501 2502 // Don't match if the SHL has more than one use, since then we'll end up 2503 // generating SHL+UBFIZ instead of just keeping SHL+AND. 2504 if (!BiggerPattern && !Op.hasOneUse()) 2505 return false; 2506 2507 uint64_t ShlImm; 2508 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) 2509 return false; 2510 Op = Op.getOperand(0); 2511 2512 if (!isShiftedMask_64(NonZeroBits)) 2513 return false; 2514 2515 ShiftAmount = countTrailingZeros(NonZeroBits); 2516 MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount); 2517 2518 // BFI encompasses sufficiently many nodes that it's worth inserting an extra 2519 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL 2520 // amount. BiggerPattern is true when this pattern is being matched for BFI, 2521 // BiggerPattern is false when this pattern is being matched for UBFIZ, in 2522 // which case it is not profitable to insert an extra shift. 2523 if (ShlImm - ShiftAmount != 0 && !BiggerPattern) 2524 return false; 2525 Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount); 2526 2527 return true; 2528 } 2529 2530 static bool isShiftedMask(uint64_t Mask, EVT VT) { 2531 assert(VT == MVT::i32 || VT == MVT::i64); 2532 if (VT == MVT::i32) 2533 return isShiftedMask_32(Mask); 2534 return isShiftedMask_64(Mask); 2535 } 2536 2537 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being 2538 // inserted only sets known zero bits. 2539 static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { 2540 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 2541 2542 EVT VT = N->getValueType(0); 2543 if (VT != MVT::i32 && VT != MVT::i64) 2544 return false; 2545 2546 unsigned BitWidth = VT.getSizeInBits(); 2547 2548 uint64_t OrImm; 2549 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm)) 2550 return false; 2551 2552 // Skip this transformation if the ORR immediate can be encoded in the ORR. 2553 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely 2554 // performance neutral. 2555 if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth)) 2556 return false; 2557 2558 uint64_t MaskImm; 2559 SDValue And = N->getOperand(0); 2560 // Must be a single use AND with an immediate operand. 2561 if (!And.hasOneUse() || 2562 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm)) 2563 return false; 2564 2565 // Compute the Known Zero for the AND as this allows us to catch more general 2566 // cases than just looking for AND with imm. 2567 KnownBits Known = CurDAG->computeKnownBits(And); 2568 2569 // Non-zero in the sense that they're not provably zero, which is the key 2570 // point if we want to use this value. 2571 uint64_t NotKnownZero = (~Known.Zero).getZExtValue(); 2572 2573 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00). 2574 if (!isShiftedMask(Known.Zero.getZExtValue(), VT)) 2575 return false; 2576 2577 // The bits being inserted must only set those bits that are known to be zero. 2578 if ((OrImm & NotKnownZero) != 0) { 2579 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't 2580 // currently handle this case. 2581 return false; 2582 } 2583 2584 // BFI/BFXIL dst, src, #lsb, #width. 2585 int LSB = countTrailingOnes(NotKnownZero); 2586 int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation(); 2587 2588 // BFI/BFXIL is an alias of BFM, so translate to BFM operands. 2589 unsigned ImmR = (BitWidth - LSB) % BitWidth; 2590 unsigned ImmS = Width - 1; 2591 2592 // If we're creating a BFI instruction avoid cases where we need more 2593 // instructions to materialize the BFI constant as compared to the original 2594 // ORR. A BFXIL will use the same constant as the original ORR, so the code 2595 // should be no worse in this case. 2596 bool IsBFI = LSB != 0; 2597 uint64_t BFIImm = OrImm >> LSB; 2598 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) { 2599 // We have a BFI instruction and we know the constant can't be materialized 2600 // with a ORR-immediate with the zero register. 2601 unsigned OrChunks = 0, BFIChunks = 0; 2602 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) { 2603 if (((OrImm >> Shift) & 0xFFFF) != 0) 2604 ++OrChunks; 2605 if (((BFIImm >> Shift) & 0xFFFF) != 0) 2606 ++BFIChunks; 2607 } 2608 if (BFIChunks > OrChunks) 2609 return false; 2610 } 2611 2612 // Materialize the constant to be inserted. 2613 SDLoc DL(N); 2614 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm; 2615 SDNode *MOVI = CurDAG->getMachineNode( 2616 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT)); 2617 2618 // Create the BFI/BFXIL instruction. 2619 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0), 2620 CurDAG->getTargetConstant(ImmR, DL, VT), 2621 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2622 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2623 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2624 return true; 2625 } 2626 2627 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, 2628 SelectionDAG *CurDAG) { 2629 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 2630 2631 EVT VT = N->getValueType(0); 2632 if (VT != MVT::i32 && VT != MVT::i64) 2633 return false; 2634 2635 unsigned BitWidth = VT.getSizeInBits(); 2636 2637 // Because of simplify-demanded-bits in DAGCombine, involved masks may not 2638 // have the expected shape. Try to undo that. 2639 2640 unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); 2641 unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); 2642 2643 // Given a OR operation, check if we have the following pattern 2644 // ubfm c, b, imm, imm2 (or something that does the same jobs, see 2645 // isBitfieldExtractOp) 2646 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and 2647 // countTrailingZeros(mask2) == imm2 - imm + 1 2648 // f = d | c 2649 // if yes, replace the OR instruction with: 2650 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2 2651 2652 // OR is commutative, check all combinations of operand order and values of 2653 // BiggerPattern, i.e. 2654 // Opd0, Opd1, BiggerPattern=false 2655 // Opd1, Opd0, BiggerPattern=false 2656 // Opd0, Opd1, BiggerPattern=true 2657 // Opd1, Opd0, BiggerPattern=true 2658 // Several of these combinations may match, so check with BiggerPattern=false 2659 // first since that will produce better results by matching more instructions 2660 // and/or inserting fewer extra instructions. 2661 for (int I = 0; I < 4; ++I) { 2662 2663 SDValue Dst, Src; 2664 unsigned ImmR, ImmS; 2665 bool BiggerPattern = I / 2; 2666 SDValue OrOpd0Val = N->getOperand(I % 2); 2667 SDNode *OrOpd0 = OrOpd0Val.getNode(); 2668 SDValue OrOpd1Val = N->getOperand((I + 1) % 2); 2669 SDNode *OrOpd1 = OrOpd1Val.getNode(); 2670 2671 unsigned BFXOpc; 2672 int DstLSB, Width; 2673 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, 2674 NumberOfIgnoredLowBits, BiggerPattern)) { 2675 // Check that the returned opcode is compatible with the pattern, 2676 // i.e., same type and zero extended (U and not S) 2677 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || 2678 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) 2679 continue; 2680 2681 // Compute the width of the bitfield insertion 2682 DstLSB = 0; 2683 Width = ImmS - ImmR + 1; 2684 // FIXME: This constraint is to catch bitfield insertion we may 2685 // want to widen the pattern if we want to grab general bitfied 2686 // move case 2687 if (Width <= 0) 2688 continue; 2689 2690 // If the mask on the insertee is correct, we have a BFXIL operation. We 2691 // can share the ImmR and ImmS values from the already-computed UBFM. 2692 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val, 2693 BiggerPattern, 2694 Src, DstLSB, Width)) { 2695 ImmR = (BitWidth - DstLSB) % BitWidth; 2696 ImmS = Width - 1; 2697 } else 2698 continue; 2699 2700 // Check the second part of the pattern 2701 EVT VT = OrOpd1Val.getValueType(); 2702 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"); 2703 2704 // Compute the Known Zero for the candidate of the first operand. 2705 // This allows to catch more general case than just looking for 2706 // AND with imm. Indeed, simplify-demanded-bits may have removed 2707 // the AND instruction because it proves it was useless. 2708 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val); 2709 2710 // Check if there is enough room for the second operand to appear 2711 // in the first one 2712 APInt BitsToBeInserted = 2713 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width); 2714 2715 if ((BitsToBeInserted & ~Known.Zero) != 0) 2716 continue; 2717 2718 // Set the first operand 2719 uint64_t Imm; 2720 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && 2721 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) 2722 // In that case, we can eliminate the AND 2723 Dst = OrOpd1->getOperand(0); 2724 else 2725 // Maybe the AND has been removed by simplify-demanded-bits 2726 // or is useful because it discards more bits 2727 Dst = OrOpd1Val; 2728 2729 // both parts match 2730 SDLoc DL(N); 2731 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT), 2732 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2733 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2734 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2735 return true; 2736 } 2737 2738 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff 2739 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted 2740 // mask (e.g., 0x000ffff0). 2741 uint64_t Mask0Imm, Mask1Imm; 2742 SDValue And0 = N->getOperand(0); 2743 SDValue And1 = N->getOperand(1); 2744 if (And0.hasOneUse() && And1.hasOneUse() && 2745 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) && 2746 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) && 2747 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) && 2748 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) { 2749 2750 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm), 2751 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the 2752 // bits to be inserted. 2753 if (isShiftedMask(Mask0Imm, VT)) { 2754 std::swap(And0, And1); 2755 std::swap(Mask0Imm, Mask1Imm); 2756 } 2757 2758 SDValue Src = And1->getOperand(0); 2759 SDValue Dst = And0->getOperand(0); 2760 unsigned LSB = countTrailingZeros(Mask1Imm); 2761 int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation(); 2762 2763 // The BFXIL inserts the low-order bits from a source register, so right 2764 // shift the needed bits into place. 2765 SDLoc DL(N); 2766 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 2767 uint64_t LsrImm = LSB; 2768 if (Src->hasOneUse() && 2769 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) && 2770 (LsrImm + LSB) < BitWidth) { 2771 Src = Src->getOperand(0); 2772 LsrImm += LSB; 2773 } 2774 2775 SDNode *LSR = CurDAG->getMachineNode( 2776 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT), 2777 CurDAG->getTargetConstant(BitWidth - 1, DL, VT)); 2778 2779 // BFXIL is an alias of BFM, so translate to BFM operands. 2780 unsigned ImmR = (BitWidth - LSB) % BitWidth; 2781 unsigned ImmS = Width - 1; 2782 2783 // Create the BFXIL instruction. 2784 SDValue Ops[] = {Dst, SDValue(LSR, 0), 2785 CurDAG->getTargetConstant(ImmR, DL, VT), 2786 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2787 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2788 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2789 return true; 2790 } 2791 2792 return false; 2793 } 2794 2795 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) { 2796 if (N->getOpcode() != ISD::OR) 2797 return false; 2798 2799 APInt NUsefulBits; 2800 getUsefulBits(SDValue(N, 0), NUsefulBits); 2801 2802 // If all bits are not useful, just return UNDEF. 2803 if (!NUsefulBits) { 2804 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); 2805 return true; 2806 } 2807 2808 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG)) 2809 return true; 2810 2811 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG); 2812 } 2813 2814 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the 2815 /// equivalent of a left shift by a constant amount followed by an and masking 2816 /// out a contiguous set of bits. 2817 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) { 2818 if (N->getOpcode() != ISD::AND) 2819 return false; 2820 2821 EVT VT = N->getValueType(0); 2822 if (VT != MVT::i32 && VT != MVT::i64) 2823 return false; 2824 2825 SDValue Op0; 2826 int DstLSB, Width; 2827 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false, 2828 Op0, DstLSB, Width)) 2829 return false; 2830 2831 // ImmR is the rotate right amount. 2832 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); 2833 // ImmS is the most significant bit of the source to be moved. 2834 unsigned ImmS = Width - 1; 2835 2836 SDLoc DL(N); 2837 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT), 2838 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2839 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 2840 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2841 return true; 2842 } 2843 2844 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in 2845 /// variable shift/rotate instructions. 2846 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) { 2847 EVT VT = N->getValueType(0); 2848 2849 unsigned Opc; 2850 switch (N->getOpcode()) { 2851 case ISD::ROTR: 2852 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr; 2853 break; 2854 case ISD::SHL: 2855 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr; 2856 break; 2857 case ISD::SRL: 2858 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr; 2859 break; 2860 case ISD::SRA: 2861 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr; 2862 break; 2863 default: 2864 return false; 2865 } 2866 2867 uint64_t Size; 2868 uint64_t Bits; 2869 if (VT == MVT::i32) { 2870 Bits = 5; 2871 Size = 32; 2872 } else if (VT == MVT::i64) { 2873 Bits = 6; 2874 Size = 64; 2875 } else 2876 return false; 2877 2878 SDValue ShiftAmt = N->getOperand(1); 2879 SDLoc DL(N); 2880 SDValue NewShiftAmt; 2881 2882 // Skip over an extend of the shift amount. 2883 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND || 2884 ShiftAmt->getOpcode() == ISD::ANY_EXTEND) 2885 ShiftAmt = ShiftAmt->getOperand(0); 2886 2887 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { 2888 SDValue Add0 = ShiftAmt->getOperand(0); 2889 SDValue Add1 = ShiftAmt->getOperand(1); 2890 uint64_t Add0Imm; 2891 uint64_t Add1Imm; 2892 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) { 2893 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X 2894 // to avoid the ADD/SUB. 2895 NewShiftAmt = Add0; 2896 } else if (ShiftAmt->getOpcode() == ISD::SUB && 2897 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 && 2898 (Add0Imm % Size == 0)) { 2899 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X 2900 // to generate a NEG instead of a SUB from a constant. 2901 unsigned NegOpc; 2902 unsigned ZeroReg; 2903 EVT SubVT = ShiftAmt->getValueType(0); 2904 if (SubVT == MVT::i32) { 2905 NegOpc = AArch64::SUBWrr; 2906 ZeroReg = AArch64::WZR; 2907 } else { 2908 assert(SubVT == MVT::i64); 2909 NegOpc = AArch64::SUBXrr; 2910 ZeroReg = AArch64::XZR; 2911 } 2912 SDValue Zero = 2913 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); 2914 MachineSDNode *Neg = 2915 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1); 2916 NewShiftAmt = SDValue(Neg, 0); 2917 } else if (ShiftAmt->getOpcode() == ISD::SUB && 2918 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) { 2919 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X 2920 // to generate a NOT instead of a SUB from a constant. 2921 unsigned NotOpc; 2922 unsigned ZeroReg; 2923 EVT SubVT = ShiftAmt->getValueType(0); 2924 if (SubVT == MVT::i32) { 2925 NotOpc = AArch64::ORNWrr; 2926 ZeroReg = AArch64::WZR; 2927 } else { 2928 assert(SubVT == MVT::i64); 2929 NotOpc = AArch64::ORNXrr; 2930 ZeroReg = AArch64::XZR; 2931 } 2932 SDValue Zero = 2933 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); 2934 MachineSDNode *Not = 2935 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1); 2936 NewShiftAmt = SDValue(Not, 0); 2937 } else 2938 return false; 2939 } else { 2940 // If the shift amount is masked with an AND, check that the mask covers the 2941 // bits that are implicitly ANDed off by the above opcodes and if so, skip 2942 // the AND. 2943 uint64_t MaskImm; 2944 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) && 2945 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm)) 2946 return false; 2947 2948 if (countTrailingOnes(MaskImm) < Bits) 2949 return false; 2950 2951 NewShiftAmt = ShiftAmt->getOperand(0); 2952 } 2953 2954 // Narrow/widen the shift amount to match the size of the shift operation. 2955 if (VT == MVT::i32) 2956 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt); 2957 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) { 2958 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32); 2959 MachineSDNode *Ext = CurDAG->getMachineNode( 2960 AArch64::SUBREG_TO_REG, DL, VT, 2961 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg); 2962 NewShiftAmt = SDValue(Ext, 0); 2963 } 2964 2965 SDValue Ops[] = {N->getOperand(0), NewShiftAmt}; 2966 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2967 return true; 2968 } 2969 2970 bool 2971 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, 2972 unsigned RegWidth) { 2973 APFloat FVal(0.0); 2974 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 2975 FVal = CN->getValueAPF(); 2976 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) { 2977 // Some otherwise illegal constants are allowed in this case. 2978 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || 2979 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1))) 2980 return false; 2981 2982 ConstantPoolSDNode *CN = 2983 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)); 2984 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF(); 2985 } else 2986 return false; 2987 2988 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits 2989 // is between 1 and 32 for a destination w-register, or 1 and 64 for an 2990 // x-register. 2991 // 2992 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we 2993 // want THIS_NODE to be 2^fbits. This is much easier to deal with using 2994 // integers. 2995 bool IsExact; 2996 2997 // fbits is between 1 and 64 in the worst-case, which means the fmul 2998 // could have 2^64 as an actual operand. Need 65 bits of precision. 2999 APSInt IntVal(65, true); 3000 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); 3001 3002 // N.b. isPowerOf2 also checks for > 0. 3003 if (!IsExact || !IntVal.isPowerOf2()) return false; 3004 unsigned FBits = IntVal.logBase2(); 3005 3006 // Checks above should have guaranteed that we haven't lost information in 3007 // finding FBits, but it must still be in range. 3008 if (FBits == 0 || FBits > RegWidth) return false; 3009 3010 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32); 3011 return true; 3012 } 3013 3014 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields 3015 // of the string and obtains the integer values from them and combines these 3016 // into a single value to be used in the MRS/MSR instruction. 3017 static int getIntOperandFromRegisterString(StringRef RegString) { 3018 SmallVector<StringRef, 5> Fields; 3019 RegString.split(Fields, ':'); 3020 3021 if (Fields.size() == 1) 3022 return -1; 3023 3024 assert(Fields.size() == 5 3025 && "Invalid number of fields in read register string"); 3026 3027 SmallVector<int, 5> Ops; 3028 bool AllIntFields = true; 3029 3030 for (StringRef Field : Fields) { 3031 unsigned IntField; 3032 AllIntFields &= !Field.getAsInteger(10, IntField); 3033 Ops.push_back(IntField); 3034 } 3035 3036 assert(AllIntFields && 3037 "Unexpected non-integer value in special register string."); 3038 (void)AllIntFields; 3039 3040 // Need to combine the integer fields of the string into a single value 3041 // based on the bit encoding of MRS/MSR instruction. 3042 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | 3043 (Ops[3] << 3) | (Ops[4]); 3044 } 3045 3046 // Lower the read_register intrinsic to an MRS instruction node if the special 3047 // register string argument is either of the form detailed in the ALCE (the 3048 // form described in getIntOperandsFromRegsterString) or is a named register 3049 // known by the MRS SysReg mapper. 3050 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) { 3051 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 3052 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 3053 SDLoc DL(N); 3054 3055 int Reg = getIntOperandFromRegisterString(RegString->getString()); 3056 if (Reg != -1) { 3057 ReplaceNode(N, CurDAG->getMachineNode( 3058 AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other, 3059 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3060 N->getOperand(0))); 3061 return true; 3062 } 3063 3064 // Use the sysreg mapper to map the remaining possible strings to the 3065 // value for the register to be used for the instruction operand. 3066 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 3067 if (TheReg && TheReg->Readable && 3068 TheReg->haveFeatures(Subtarget->getFeatureBits())) 3069 Reg = TheReg->Encoding; 3070 else 3071 Reg = AArch64SysReg::parseGenericRegister(RegString->getString()); 3072 3073 if (Reg != -1) { 3074 ReplaceNode(N, CurDAG->getMachineNode( 3075 AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other, 3076 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3077 N->getOperand(0))); 3078 return true; 3079 } 3080 3081 if (RegString->getString() == "pc") { 3082 ReplaceNode(N, CurDAG->getMachineNode( 3083 AArch64::ADR, DL, N->getSimpleValueType(0), MVT::Other, 3084 CurDAG->getTargetConstant(0, DL, MVT::i32), 3085 N->getOperand(0))); 3086 return true; 3087 } 3088 3089 return false; 3090 } 3091 3092 // Lower the write_register intrinsic to an MSR instruction node if the special 3093 // register string argument is either of the form detailed in the ALCE (the 3094 // form described in getIntOperandsFromRegsterString) or is a named register 3095 // known by the MSR SysReg mapper. 3096 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { 3097 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 3098 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 3099 SDLoc DL(N); 3100 3101 int Reg = getIntOperandFromRegisterString(RegString->getString()); 3102 if (Reg != -1) { 3103 ReplaceNode( 3104 N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other, 3105 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3106 N->getOperand(2), N->getOperand(0))); 3107 return true; 3108 } 3109 3110 // Check if the register was one of those allowed as the pstatefield value in 3111 // the MSR (immediate) instruction. To accept the values allowed in the 3112 // pstatefield for the MSR (immediate) instruction, we also require that an 3113 // immediate value has been provided as an argument, we know that this is 3114 // the case as it has been ensured by semantic checking. 3115 auto PMapper = AArch64PState::lookupPStateByName(RegString->getString()); 3116 if (PMapper) { 3117 assert (isa<ConstantSDNode>(N->getOperand(2)) 3118 && "Expected a constant integer expression."); 3119 unsigned Reg = PMapper->Encoding; 3120 uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 3121 unsigned State; 3122 if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) { 3123 assert(Immed < 2 && "Bad imm"); 3124 State = AArch64::MSRpstateImm1; 3125 } else { 3126 assert(Immed < 16 && "Bad imm"); 3127 State = AArch64::MSRpstateImm4; 3128 } 3129 ReplaceNode(N, CurDAG->getMachineNode( 3130 State, DL, MVT::Other, 3131 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3132 CurDAG->getTargetConstant(Immed, DL, MVT::i16), 3133 N->getOperand(0))); 3134 return true; 3135 } 3136 3137 // Use the sysreg mapper to attempt to map the remaining possible strings 3138 // to the value for the register to be used for the MSR (register) 3139 // instruction operand. 3140 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 3141 if (TheReg && TheReg->Writeable && 3142 TheReg->haveFeatures(Subtarget->getFeatureBits())) 3143 Reg = TheReg->Encoding; 3144 else 3145 Reg = AArch64SysReg::parseGenericRegister(RegString->getString()); 3146 if (Reg != -1) { 3147 ReplaceNode(N, CurDAG->getMachineNode( 3148 AArch64::MSR, DL, MVT::Other, 3149 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3150 N->getOperand(2), N->getOperand(0))); 3151 return true; 3152 } 3153 3154 return false; 3155 } 3156 3157 /// We've got special pseudo-instructions for these 3158 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3159 unsigned Opcode; 3160 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3161 3162 // Leave IR for LSE if subtarget supports it. 3163 if (Subtarget->hasLSE()) return false; 3164 3165 if (MemTy == MVT::i8) 3166 Opcode = AArch64::CMP_SWAP_8; 3167 else if (MemTy == MVT::i16) 3168 Opcode = AArch64::CMP_SWAP_16; 3169 else if (MemTy == MVT::i32) 3170 Opcode = AArch64::CMP_SWAP_32; 3171 else if (MemTy == MVT::i64) 3172 Opcode = AArch64::CMP_SWAP_64; 3173 else 3174 llvm_unreachable("Unknown AtomicCmpSwap type"); 3175 3176 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32; 3177 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3178 N->getOperand(0)}; 3179 SDNode *CmpSwap = CurDAG->getMachineNode( 3180 Opcode, SDLoc(N), 3181 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops); 3182 3183 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3184 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3185 3186 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3187 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3188 CurDAG->RemoveDeadNode(N); 3189 3190 return true; 3191 } 3192 3193 bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, 3194 SDValue &Shift) { 3195 if (!isa<ConstantSDNode>(N)) 3196 return false; 3197 3198 SDLoc DL(N); 3199 uint64_t Val = cast<ConstantSDNode>(N) 3200 ->getAPIntValue() 3201 .trunc(VT.getFixedSizeInBits()) 3202 .getZExtValue(); 3203 3204 switch (VT.SimpleTy) { 3205 case MVT::i8: 3206 // All immediates are supported. 3207 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3208 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); 3209 return true; 3210 case MVT::i16: 3211 case MVT::i32: 3212 case MVT::i64: 3213 // Support 8bit unsigned immediates. 3214 if (Val <= 255) { 3215 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3216 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32); 3217 return true; 3218 } 3219 // Support 16bit unsigned immediates that are a multiple of 256. 3220 if (Val <= 65280 && Val % 256 == 0) { 3221 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 3222 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32); 3223 return true; 3224 } 3225 break; 3226 default: 3227 break; 3228 } 3229 3230 return false; 3231 } 3232 3233 bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, 3234 SDValue &Shift) { 3235 if (!isa<ConstantSDNode>(N)) 3236 return false; 3237 3238 SDLoc DL(N); 3239 int64_t Val = cast<ConstantSDNode>(N) 3240 ->getAPIntValue() 3241 .trunc(VT.getFixedSizeInBits()) 3242 .getSExtValue(); 3243 3244 switch (VT.SimpleTy) { 3245 case MVT::i8: 3246 // All immediates are supported. 3247 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3248 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); 3249 return true; 3250 case MVT::i16: 3251 case MVT::i32: 3252 case MVT::i64: 3253 // Support 8bit signed immediates. 3254 if (Val >= -128 && Val <= 127) { 3255 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3256 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); 3257 return true; 3258 } 3259 // Support 16bit signed immediates that are a multiple of 256. 3260 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) { 3261 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 3262 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32); 3263 return true; 3264 } 3265 break; 3266 default: 3267 break; 3268 } 3269 3270 return false; 3271 } 3272 3273 bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) { 3274 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3275 int64_t ImmVal = CNode->getSExtValue(); 3276 SDLoc DL(N); 3277 if (ImmVal >= -128 && ImmVal < 128) { 3278 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 3279 return true; 3280 } 3281 } 3282 return false; 3283 } 3284 3285 bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) { 3286 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3287 uint64_t ImmVal = CNode->getZExtValue(); 3288 3289 switch (VT.SimpleTy) { 3290 case MVT::i8: 3291 ImmVal &= 0xFF; 3292 break; 3293 case MVT::i16: 3294 ImmVal &= 0xFFFF; 3295 break; 3296 case MVT::i32: 3297 ImmVal &= 0xFFFFFFFF; 3298 break; 3299 case MVT::i64: 3300 break; 3301 default: 3302 llvm_unreachable("Unexpected type"); 3303 } 3304 3305 if (ImmVal < 256) { 3306 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 3307 return true; 3308 } 3309 } 3310 return false; 3311 } 3312 3313 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, 3314 bool Invert) { 3315 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3316 uint64_t ImmVal = CNode->getZExtValue(); 3317 SDLoc DL(N); 3318 3319 if (Invert) 3320 ImmVal = ~ImmVal; 3321 3322 // Shift mask depending on type size. 3323 switch (VT.SimpleTy) { 3324 case MVT::i8: 3325 ImmVal &= 0xFF; 3326 ImmVal |= ImmVal << 8; 3327 ImmVal |= ImmVal << 16; 3328 ImmVal |= ImmVal << 32; 3329 break; 3330 case MVT::i16: 3331 ImmVal &= 0xFFFF; 3332 ImmVal |= ImmVal << 16; 3333 ImmVal |= ImmVal << 32; 3334 break; 3335 case MVT::i32: 3336 ImmVal &= 0xFFFFFFFF; 3337 ImmVal |= ImmVal << 32; 3338 break; 3339 case MVT::i64: 3340 break; 3341 default: 3342 llvm_unreachable("Unexpected type"); 3343 } 3344 3345 uint64_t encoding; 3346 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) { 3347 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64); 3348 return true; 3349 } 3350 } 3351 return false; 3352 } 3353 3354 // SVE shift intrinsics allow shift amounts larger than the element's bitwidth. 3355 // Rather than attempt to normalise everything we can sometimes saturate the 3356 // shift amount during selection. This function also allows for consistent 3357 // isel patterns by ensuring the resulting "Imm" node is of the i32 type 3358 // required by the instructions. 3359 bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low, 3360 uint64_t High, bool AllowSaturation, 3361 SDValue &Imm) { 3362 if (auto *CN = dyn_cast<ConstantSDNode>(N)) { 3363 uint64_t ImmVal = CN->getZExtValue(); 3364 3365 // Reject shift amounts that are too small. 3366 if (ImmVal < Low) 3367 return false; 3368 3369 // Reject or saturate shift amounts that are too big. 3370 if (ImmVal > High) { 3371 if (!AllowSaturation) 3372 return false; 3373 ImmVal = High; 3374 } 3375 3376 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 3377 return true; 3378 } 3379 3380 return false; 3381 } 3382 3383 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { 3384 // tagp(FrameIndex, IRGstack, tag_offset): 3385 // since the offset between FrameIndex and IRGstack is a compile-time 3386 // constant, this can be lowered to a single ADDG instruction. 3387 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) { 3388 return false; 3389 } 3390 3391 SDValue IRG_SP = N->getOperand(2); 3392 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN || 3393 cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() != 3394 Intrinsic::aarch64_irg_sp) { 3395 return false; 3396 } 3397 3398 const TargetLowering *TLI = getTargetLowering(); 3399 SDLoc DL(N); 3400 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex(); 3401 SDValue FiOp = CurDAG->getTargetFrameIndex( 3402 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3403 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 3404 3405 SDNode *Out = CurDAG->getMachineNode( 3406 AArch64::TAGPstack, DL, MVT::i64, 3407 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2), 3408 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 3409 ReplaceNode(N, Out); 3410 return true; 3411 } 3412 3413 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) { 3414 assert(isa<ConstantSDNode>(N->getOperand(3)) && 3415 "llvm.aarch64.tagp third argument must be an immediate"); 3416 if (trySelectStackSlotTagP(N)) 3417 return; 3418 // FIXME: above applies in any case when offset between Op1 and Op2 is a 3419 // compile-time constant, not just for stack allocations. 3420 3421 // General case for unrelated pointers in Op1 and Op2. 3422 SDLoc DL(N); 3423 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 3424 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64, 3425 {N->getOperand(1), N->getOperand(2)}); 3426 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64, 3427 {SDValue(N1, 0), N->getOperand(2)}); 3428 SDNode *N3 = CurDAG->getMachineNode( 3429 AArch64::ADDG, DL, MVT::i64, 3430 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64), 3431 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 3432 ReplaceNode(N, N3); 3433 } 3434 3435 // NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length 3436 // vector types larger than NEON don't have a matching SubRegIndex. 3437 static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { 3438 assert(V.getValueType().isScalableVector() && 3439 V.getValueType().getSizeInBits().getKnownMinSize() == 3440 AArch64::SVEBitsPerBlock && 3441 "Expected to extract from a packed scalable vector!"); 3442 assert(VT.isFixedLengthVector() && 3443 "Expected to extract a fixed length vector!"); 3444 3445 SDLoc DL(V); 3446 switch (VT.getSizeInBits()) { 3447 case 64: { 3448 auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); 3449 return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); 3450 } 3451 case 128: { 3452 auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); 3453 return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); 3454 } 3455 default: { 3456 auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 3457 return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 3458 } 3459 } 3460 } 3461 3462 // NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length 3463 // vector types larger than NEON don't have a matching SubRegIndex. 3464 static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { 3465 assert(VT.isScalableVector() && 3466 VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock && 3467 "Expected to insert into a packed scalable vector!"); 3468 assert(V.getValueType().isFixedLengthVector() && 3469 "Expected to insert a fixed length vector!"); 3470 3471 SDLoc DL(V); 3472 switch (V.getValueType().getSizeInBits()) { 3473 case 64: { 3474 auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); 3475 auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); 3476 return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, 3477 SDValue(Container, 0), V, SubReg); 3478 } 3479 case 128: { 3480 auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); 3481 auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); 3482 return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, 3483 SDValue(Container, 0), V, SubReg); 3484 } 3485 default: { 3486 auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 3487 return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 3488 } 3489 } 3490 } 3491 3492 void AArch64DAGToDAGISel::Select(SDNode *Node) { 3493 // If we have a custom node, we already have selected! 3494 if (Node->isMachineOpcode()) { 3495 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); 3496 Node->setNodeId(-1); 3497 return; 3498 } 3499 3500 // Few custom selection stuff. 3501 EVT VT = Node->getValueType(0); 3502 3503 switch (Node->getOpcode()) { 3504 default: 3505 break; 3506 3507 case ISD::ATOMIC_CMP_SWAP: 3508 if (SelectCMP_SWAP(Node)) 3509 return; 3510 break; 3511 3512 case ISD::READ_REGISTER: 3513 if (tryReadRegister(Node)) 3514 return; 3515 break; 3516 3517 case ISD::WRITE_REGISTER: 3518 if (tryWriteRegister(Node)) 3519 return; 3520 break; 3521 3522 case ISD::ADD: 3523 if (tryMLAV64LaneV128(Node)) 3524 return; 3525 break; 3526 3527 case ISD::LOAD: { 3528 // Try to select as an indexed load. Fall through to normal processing 3529 // if we can't. 3530 if (tryIndexedLoad(Node)) 3531 return; 3532 break; 3533 } 3534 3535 case ISD::SRL: 3536 case ISD::AND: 3537 case ISD::SRA: 3538 case ISD::SIGN_EXTEND_INREG: 3539 if (tryBitfieldExtractOp(Node)) 3540 return; 3541 if (tryBitfieldInsertInZeroOp(Node)) 3542 return; 3543 LLVM_FALLTHROUGH; 3544 case ISD::ROTR: 3545 case ISD::SHL: 3546 if (tryShiftAmountMod(Node)) 3547 return; 3548 break; 3549 3550 case ISD::SIGN_EXTEND: 3551 if (tryBitfieldExtractOpFromSExt(Node)) 3552 return; 3553 break; 3554 3555 case ISD::FP_EXTEND: 3556 if (tryHighFPExt(Node)) 3557 return; 3558 break; 3559 3560 case ISD::OR: 3561 if (tryBitfieldInsertOp(Node)) 3562 return; 3563 break; 3564 3565 case ISD::EXTRACT_SUBVECTOR: { 3566 // Bail when not a "cast" like extract_subvector. 3567 if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0) 3568 break; 3569 3570 // Bail when normal isel can do the job. 3571 EVT InVT = Node->getOperand(0).getValueType(); 3572 if (VT.isScalableVector() || InVT.isFixedLengthVector()) 3573 break; 3574 3575 // NOTE: We can only get here when doing fixed length SVE code generation. 3576 // We do manual selection because the types involved are not linked to real 3577 // registers (despite being legal) and must be coerced into SVE registers. 3578 // 3579 // NOTE: If the above changes, be aware that selection will still not work 3580 // because the td definition of extract_vector does not support extracting 3581 // a fixed length vector from a scalable vector. 3582 3583 ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0))); 3584 return; 3585 } 3586 3587 case ISD::INSERT_SUBVECTOR: { 3588 // Bail when not a "cast" like insert_subvector. 3589 if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0) 3590 break; 3591 if (!Node->getOperand(0).isUndef()) 3592 break; 3593 3594 // Bail when normal isel should do the job. 3595 EVT InVT = Node->getOperand(1).getValueType(); 3596 if (VT.isFixedLengthVector() || InVT.isScalableVector()) 3597 break; 3598 3599 // NOTE: We can only get here when doing fixed length SVE code generation. 3600 // We do manual selection because the types involved are not linked to real 3601 // registers (despite being legal) and must be coerced into SVE registers. 3602 // 3603 // NOTE: If the above changes, be aware that selection will still not work 3604 // because the td definition of insert_vector does not support inserting a 3605 // fixed length vector into a scalable vector. 3606 3607 ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1))); 3608 return; 3609 } 3610 3611 case ISD::Constant: { 3612 // Materialize zero constants as copies from WZR/XZR. This allows 3613 // the coalescer to propagate these into other instructions. 3614 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node); 3615 if (ConstNode->isZero()) { 3616 if (VT == MVT::i32) { 3617 SDValue New = CurDAG->getCopyFromReg( 3618 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32); 3619 ReplaceNode(Node, New.getNode()); 3620 return; 3621 } else if (VT == MVT::i64) { 3622 SDValue New = CurDAG->getCopyFromReg( 3623 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64); 3624 ReplaceNode(Node, New.getNode()); 3625 return; 3626 } 3627 } 3628 break; 3629 } 3630 3631 case ISD::FrameIndex: { 3632 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. 3633 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 3634 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); 3635 const TargetLowering *TLI = getTargetLowering(); 3636 SDValue TFI = CurDAG->getTargetFrameIndex( 3637 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3638 SDLoc DL(Node); 3639 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32), 3640 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) }; 3641 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); 3642 return; 3643 } 3644 case ISD::INTRINSIC_W_CHAIN: { 3645 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 3646 switch (IntNo) { 3647 default: 3648 break; 3649 case Intrinsic::aarch64_ldaxp: 3650 case Intrinsic::aarch64_ldxp: { 3651 unsigned Op = 3652 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; 3653 SDValue MemAddr = Node->getOperand(2); 3654 SDLoc DL(Node); 3655 SDValue Chain = Node->getOperand(0); 3656 3657 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, 3658 MVT::Other, MemAddr, Chain); 3659 3660 // Transfer memoperands. 3661 MachineMemOperand *MemOp = 3662 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 3663 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 3664 ReplaceNode(Node, Ld); 3665 return; 3666 } 3667 case Intrinsic::aarch64_stlxp: 3668 case Intrinsic::aarch64_stxp: { 3669 unsigned Op = 3670 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; 3671 SDLoc DL(Node); 3672 SDValue Chain = Node->getOperand(0); 3673 SDValue ValLo = Node->getOperand(2); 3674 SDValue ValHi = Node->getOperand(3); 3675 SDValue MemAddr = Node->getOperand(4); 3676 3677 // Place arguments in the right order. 3678 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain}; 3679 3680 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); 3681 // Transfer memoperands. 3682 MachineMemOperand *MemOp = 3683 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 3684 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 3685 3686 ReplaceNode(Node, St); 3687 return; 3688 } 3689 case Intrinsic::aarch64_neon_ld1x2: 3690 if (VT == MVT::v8i8) { 3691 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); 3692 return; 3693 } else if (VT == MVT::v16i8) { 3694 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); 3695 return; 3696 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3697 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); 3698 return; 3699 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3700 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); 3701 return; 3702 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3703 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); 3704 return; 3705 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3706 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); 3707 return; 3708 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3709 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 3710 return; 3711 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3712 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); 3713 return; 3714 } 3715 break; 3716 case Intrinsic::aarch64_neon_ld1x3: 3717 if (VT == MVT::v8i8) { 3718 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); 3719 return; 3720 } else if (VT == MVT::v16i8) { 3721 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); 3722 return; 3723 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3724 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); 3725 return; 3726 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3727 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); 3728 return; 3729 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3730 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); 3731 return; 3732 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3733 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); 3734 return; 3735 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3736 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 3737 return; 3738 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3739 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); 3740 return; 3741 } 3742 break; 3743 case Intrinsic::aarch64_neon_ld1x4: 3744 if (VT == MVT::v8i8) { 3745 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); 3746 return; 3747 } else if (VT == MVT::v16i8) { 3748 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); 3749 return; 3750 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3751 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); 3752 return; 3753 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3754 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); 3755 return; 3756 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3757 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); 3758 return; 3759 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3760 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); 3761 return; 3762 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3763 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 3764 return; 3765 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3766 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); 3767 return; 3768 } 3769 break; 3770 case Intrinsic::aarch64_neon_ld2: 3771 if (VT == MVT::v8i8) { 3772 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); 3773 return; 3774 } else if (VT == MVT::v16i8) { 3775 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); 3776 return; 3777 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3778 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); 3779 return; 3780 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3781 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); 3782 return; 3783 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3784 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); 3785 return; 3786 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3787 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); 3788 return; 3789 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3790 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 3791 return; 3792 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3793 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); 3794 return; 3795 } 3796 break; 3797 case Intrinsic::aarch64_neon_ld3: 3798 if (VT == MVT::v8i8) { 3799 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); 3800 return; 3801 } else if (VT == MVT::v16i8) { 3802 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); 3803 return; 3804 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3805 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); 3806 return; 3807 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3808 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); 3809 return; 3810 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3811 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); 3812 return; 3813 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3814 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); 3815 return; 3816 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3817 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 3818 return; 3819 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3820 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); 3821 return; 3822 } 3823 break; 3824 case Intrinsic::aarch64_neon_ld4: 3825 if (VT == MVT::v8i8) { 3826 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); 3827 return; 3828 } else if (VT == MVT::v16i8) { 3829 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); 3830 return; 3831 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3832 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); 3833 return; 3834 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3835 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); 3836 return; 3837 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3838 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); 3839 return; 3840 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3841 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); 3842 return; 3843 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3844 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 3845 return; 3846 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3847 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); 3848 return; 3849 } 3850 break; 3851 case Intrinsic::aarch64_neon_ld2r: 3852 if (VT == MVT::v8i8) { 3853 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); 3854 return; 3855 } else if (VT == MVT::v16i8) { 3856 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); 3857 return; 3858 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3859 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); 3860 return; 3861 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3862 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); 3863 return; 3864 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3865 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); 3866 return; 3867 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3868 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); 3869 return; 3870 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3871 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); 3872 return; 3873 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3874 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); 3875 return; 3876 } 3877 break; 3878 case Intrinsic::aarch64_neon_ld3r: 3879 if (VT == MVT::v8i8) { 3880 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); 3881 return; 3882 } else if (VT == MVT::v16i8) { 3883 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); 3884 return; 3885 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3886 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); 3887 return; 3888 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3889 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); 3890 return; 3891 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3892 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); 3893 return; 3894 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3895 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); 3896 return; 3897 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3898 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); 3899 return; 3900 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3901 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); 3902 return; 3903 } 3904 break; 3905 case Intrinsic::aarch64_neon_ld4r: 3906 if (VT == MVT::v8i8) { 3907 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); 3908 return; 3909 } else if (VT == MVT::v16i8) { 3910 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); 3911 return; 3912 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3913 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); 3914 return; 3915 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3916 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); 3917 return; 3918 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3919 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); 3920 return; 3921 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3922 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); 3923 return; 3924 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3925 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); 3926 return; 3927 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3928 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); 3929 return; 3930 } 3931 break; 3932 case Intrinsic::aarch64_neon_ld2lane: 3933 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3934 SelectLoadLane(Node, 2, AArch64::LD2i8); 3935 return; 3936 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3937 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 3938 SelectLoadLane(Node, 2, AArch64::LD2i16); 3939 return; 3940 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3941 VT == MVT::v2f32) { 3942 SelectLoadLane(Node, 2, AArch64::LD2i32); 3943 return; 3944 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3945 VT == MVT::v1f64) { 3946 SelectLoadLane(Node, 2, AArch64::LD2i64); 3947 return; 3948 } 3949 break; 3950 case Intrinsic::aarch64_neon_ld3lane: 3951 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3952 SelectLoadLane(Node, 3, AArch64::LD3i8); 3953 return; 3954 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3955 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 3956 SelectLoadLane(Node, 3, AArch64::LD3i16); 3957 return; 3958 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3959 VT == MVT::v2f32) { 3960 SelectLoadLane(Node, 3, AArch64::LD3i32); 3961 return; 3962 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3963 VT == MVT::v1f64) { 3964 SelectLoadLane(Node, 3, AArch64::LD3i64); 3965 return; 3966 } 3967 break; 3968 case Intrinsic::aarch64_neon_ld4lane: 3969 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3970 SelectLoadLane(Node, 4, AArch64::LD4i8); 3971 return; 3972 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3973 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 3974 SelectLoadLane(Node, 4, AArch64::LD4i16); 3975 return; 3976 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3977 VT == MVT::v2f32) { 3978 SelectLoadLane(Node, 4, AArch64::LD4i32); 3979 return; 3980 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3981 VT == MVT::v1f64) { 3982 SelectLoadLane(Node, 4, AArch64::LD4i64); 3983 return; 3984 } 3985 break; 3986 case Intrinsic::aarch64_ld64b: 3987 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0); 3988 return; 3989 case Intrinsic::aarch64_sve_ld2_sret: { 3990 if (VT == MVT::nxv16i8) { 3991 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B, 3992 true); 3993 return; 3994 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 3995 VT == MVT::nxv8bf16) { 3996 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H, 3997 true); 3998 return; 3999 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4000 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W, 4001 true); 4002 return; 4003 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4004 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D, 4005 true); 4006 return; 4007 } 4008 break; 4009 } 4010 case Intrinsic::aarch64_sve_ld3_sret: { 4011 if (VT == MVT::nxv16i8) { 4012 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B, 4013 true); 4014 return; 4015 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4016 VT == MVT::nxv8bf16) { 4017 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H, 4018 true); 4019 return; 4020 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4021 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W, 4022 true); 4023 return; 4024 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4025 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D, 4026 true); 4027 return; 4028 } 4029 break; 4030 } 4031 case Intrinsic::aarch64_sve_ld4_sret: { 4032 if (VT == MVT::nxv16i8) { 4033 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B, 4034 true); 4035 return; 4036 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4037 VT == MVT::nxv8bf16) { 4038 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H, 4039 true); 4040 return; 4041 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4042 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W, 4043 true); 4044 return; 4045 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4046 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D, 4047 true); 4048 return; 4049 } 4050 break; 4051 } 4052 } 4053 } break; 4054 case ISD::INTRINSIC_WO_CHAIN: { 4055 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 4056 switch (IntNo) { 4057 default: 4058 break; 4059 case Intrinsic::aarch64_tagp: 4060 SelectTagP(Node); 4061 return; 4062 case Intrinsic::aarch64_neon_tbl2: 4063 SelectTable(Node, 2, 4064 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two, 4065 false); 4066 return; 4067 case Intrinsic::aarch64_neon_tbl3: 4068 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three 4069 : AArch64::TBLv16i8Three, 4070 false); 4071 return; 4072 case Intrinsic::aarch64_neon_tbl4: 4073 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four 4074 : AArch64::TBLv16i8Four, 4075 false); 4076 return; 4077 case Intrinsic::aarch64_neon_tbx2: 4078 SelectTable(Node, 2, 4079 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two, 4080 true); 4081 return; 4082 case Intrinsic::aarch64_neon_tbx3: 4083 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three 4084 : AArch64::TBXv16i8Three, 4085 true); 4086 return; 4087 case Intrinsic::aarch64_neon_tbx4: 4088 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four 4089 : AArch64::TBXv16i8Four, 4090 true); 4091 return; 4092 case Intrinsic::aarch64_neon_smull: 4093 case Intrinsic::aarch64_neon_umull: 4094 if (tryMULLV64LaneV128(IntNo, Node)) 4095 return; 4096 break; 4097 case Intrinsic::swift_async_context_addr: { 4098 SDLoc DL(Node); 4099 CurDAG->SelectNodeTo(Node, AArch64::SUBXri, MVT::i64, 4100 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, 4101 AArch64::FP, MVT::i64), 4102 CurDAG->getTargetConstant(8, DL, MVT::i32), 4103 CurDAG->getTargetConstant(0, DL, MVT::i32)); 4104 auto &MF = CurDAG->getMachineFunction(); 4105 MF.getFrameInfo().setFrameAddressIsTaken(true); 4106 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); 4107 return; 4108 } 4109 } 4110 break; 4111 } 4112 case ISD::INTRINSIC_VOID: { 4113 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 4114 if (Node->getNumOperands() >= 3) 4115 VT = Node->getOperand(2)->getValueType(0); 4116 switch (IntNo) { 4117 default: 4118 break; 4119 case Intrinsic::aarch64_neon_st1x2: { 4120 if (VT == MVT::v8i8) { 4121 SelectStore(Node, 2, AArch64::ST1Twov8b); 4122 return; 4123 } else if (VT == MVT::v16i8) { 4124 SelectStore(Node, 2, AArch64::ST1Twov16b); 4125 return; 4126 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4127 VT == MVT::v4bf16) { 4128 SelectStore(Node, 2, AArch64::ST1Twov4h); 4129 return; 4130 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4131 VT == MVT::v8bf16) { 4132 SelectStore(Node, 2, AArch64::ST1Twov8h); 4133 return; 4134 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4135 SelectStore(Node, 2, AArch64::ST1Twov2s); 4136 return; 4137 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4138 SelectStore(Node, 2, AArch64::ST1Twov4s); 4139 return; 4140 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4141 SelectStore(Node, 2, AArch64::ST1Twov2d); 4142 return; 4143 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4144 SelectStore(Node, 2, AArch64::ST1Twov1d); 4145 return; 4146 } 4147 break; 4148 } 4149 case Intrinsic::aarch64_neon_st1x3: { 4150 if (VT == MVT::v8i8) { 4151 SelectStore(Node, 3, AArch64::ST1Threev8b); 4152 return; 4153 } else if (VT == MVT::v16i8) { 4154 SelectStore(Node, 3, AArch64::ST1Threev16b); 4155 return; 4156 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4157 VT == MVT::v4bf16) { 4158 SelectStore(Node, 3, AArch64::ST1Threev4h); 4159 return; 4160 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4161 VT == MVT::v8bf16) { 4162 SelectStore(Node, 3, AArch64::ST1Threev8h); 4163 return; 4164 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4165 SelectStore(Node, 3, AArch64::ST1Threev2s); 4166 return; 4167 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4168 SelectStore(Node, 3, AArch64::ST1Threev4s); 4169 return; 4170 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4171 SelectStore(Node, 3, AArch64::ST1Threev2d); 4172 return; 4173 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4174 SelectStore(Node, 3, AArch64::ST1Threev1d); 4175 return; 4176 } 4177 break; 4178 } 4179 case Intrinsic::aarch64_neon_st1x4: { 4180 if (VT == MVT::v8i8) { 4181 SelectStore(Node, 4, AArch64::ST1Fourv8b); 4182 return; 4183 } else if (VT == MVT::v16i8) { 4184 SelectStore(Node, 4, AArch64::ST1Fourv16b); 4185 return; 4186 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4187 VT == MVT::v4bf16) { 4188 SelectStore(Node, 4, AArch64::ST1Fourv4h); 4189 return; 4190 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4191 VT == MVT::v8bf16) { 4192 SelectStore(Node, 4, AArch64::ST1Fourv8h); 4193 return; 4194 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4195 SelectStore(Node, 4, AArch64::ST1Fourv2s); 4196 return; 4197 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4198 SelectStore(Node, 4, AArch64::ST1Fourv4s); 4199 return; 4200 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4201 SelectStore(Node, 4, AArch64::ST1Fourv2d); 4202 return; 4203 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4204 SelectStore(Node, 4, AArch64::ST1Fourv1d); 4205 return; 4206 } 4207 break; 4208 } 4209 case Intrinsic::aarch64_neon_st2: { 4210 if (VT == MVT::v8i8) { 4211 SelectStore(Node, 2, AArch64::ST2Twov8b); 4212 return; 4213 } else if (VT == MVT::v16i8) { 4214 SelectStore(Node, 2, AArch64::ST2Twov16b); 4215 return; 4216 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4217 VT == MVT::v4bf16) { 4218 SelectStore(Node, 2, AArch64::ST2Twov4h); 4219 return; 4220 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4221 VT == MVT::v8bf16) { 4222 SelectStore(Node, 2, AArch64::ST2Twov8h); 4223 return; 4224 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4225 SelectStore(Node, 2, AArch64::ST2Twov2s); 4226 return; 4227 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4228 SelectStore(Node, 2, AArch64::ST2Twov4s); 4229 return; 4230 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4231 SelectStore(Node, 2, AArch64::ST2Twov2d); 4232 return; 4233 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4234 SelectStore(Node, 2, AArch64::ST1Twov1d); 4235 return; 4236 } 4237 break; 4238 } 4239 case Intrinsic::aarch64_neon_st3: { 4240 if (VT == MVT::v8i8) { 4241 SelectStore(Node, 3, AArch64::ST3Threev8b); 4242 return; 4243 } else if (VT == MVT::v16i8) { 4244 SelectStore(Node, 3, AArch64::ST3Threev16b); 4245 return; 4246 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4247 VT == MVT::v4bf16) { 4248 SelectStore(Node, 3, AArch64::ST3Threev4h); 4249 return; 4250 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4251 VT == MVT::v8bf16) { 4252 SelectStore(Node, 3, AArch64::ST3Threev8h); 4253 return; 4254 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4255 SelectStore(Node, 3, AArch64::ST3Threev2s); 4256 return; 4257 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4258 SelectStore(Node, 3, AArch64::ST3Threev4s); 4259 return; 4260 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4261 SelectStore(Node, 3, AArch64::ST3Threev2d); 4262 return; 4263 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4264 SelectStore(Node, 3, AArch64::ST1Threev1d); 4265 return; 4266 } 4267 break; 4268 } 4269 case Intrinsic::aarch64_neon_st4: { 4270 if (VT == MVT::v8i8) { 4271 SelectStore(Node, 4, AArch64::ST4Fourv8b); 4272 return; 4273 } else if (VT == MVT::v16i8) { 4274 SelectStore(Node, 4, AArch64::ST4Fourv16b); 4275 return; 4276 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4277 VT == MVT::v4bf16) { 4278 SelectStore(Node, 4, AArch64::ST4Fourv4h); 4279 return; 4280 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4281 VT == MVT::v8bf16) { 4282 SelectStore(Node, 4, AArch64::ST4Fourv8h); 4283 return; 4284 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4285 SelectStore(Node, 4, AArch64::ST4Fourv2s); 4286 return; 4287 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4288 SelectStore(Node, 4, AArch64::ST4Fourv4s); 4289 return; 4290 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4291 SelectStore(Node, 4, AArch64::ST4Fourv2d); 4292 return; 4293 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4294 SelectStore(Node, 4, AArch64::ST1Fourv1d); 4295 return; 4296 } 4297 break; 4298 } 4299 case Intrinsic::aarch64_neon_st2lane: { 4300 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4301 SelectStoreLane(Node, 2, AArch64::ST2i8); 4302 return; 4303 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4304 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4305 SelectStoreLane(Node, 2, AArch64::ST2i16); 4306 return; 4307 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4308 VT == MVT::v2f32) { 4309 SelectStoreLane(Node, 2, AArch64::ST2i32); 4310 return; 4311 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4312 VT == MVT::v1f64) { 4313 SelectStoreLane(Node, 2, AArch64::ST2i64); 4314 return; 4315 } 4316 break; 4317 } 4318 case Intrinsic::aarch64_neon_st3lane: { 4319 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4320 SelectStoreLane(Node, 3, AArch64::ST3i8); 4321 return; 4322 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4323 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4324 SelectStoreLane(Node, 3, AArch64::ST3i16); 4325 return; 4326 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4327 VT == MVT::v2f32) { 4328 SelectStoreLane(Node, 3, AArch64::ST3i32); 4329 return; 4330 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4331 VT == MVT::v1f64) { 4332 SelectStoreLane(Node, 3, AArch64::ST3i64); 4333 return; 4334 } 4335 break; 4336 } 4337 case Intrinsic::aarch64_neon_st4lane: { 4338 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4339 SelectStoreLane(Node, 4, AArch64::ST4i8); 4340 return; 4341 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4342 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4343 SelectStoreLane(Node, 4, AArch64::ST4i16); 4344 return; 4345 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4346 VT == MVT::v2f32) { 4347 SelectStoreLane(Node, 4, AArch64::ST4i32); 4348 return; 4349 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4350 VT == MVT::v1f64) { 4351 SelectStoreLane(Node, 4, AArch64::ST4i64); 4352 return; 4353 } 4354 break; 4355 } 4356 case Intrinsic::aarch64_sve_st2: { 4357 if (VT == MVT::nxv16i8) { 4358 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM); 4359 return; 4360 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4361 VT == MVT::nxv8bf16) { 4362 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM); 4363 return; 4364 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4365 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM); 4366 return; 4367 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4368 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM); 4369 return; 4370 } 4371 break; 4372 } 4373 case Intrinsic::aarch64_sve_st3: { 4374 if (VT == MVT::nxv16i8) { 4375 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM); 4376 return; 4377 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4378 VT == MVT::nxv8bf16) { 4379 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM); 4380 return; 4381 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4382 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM); 4383 return; 4384 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4385 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM); 4386 return; 4387 } 4388 break; 4389 } 4390 case Intrinsic::aarch64_sve_st4: { 4391 if (VT == MVT::nxv16i8) { 4392 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM); 4393 return; 4394 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4395 VT == MVT::nxv8bf16) { 4396 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM); 4397 return; 4398 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4399 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM); 4400 return; 4401 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4402 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM); 4403 return; 4404 } 4405 break; 4406 } 4407 } 4408 break; 4409 } 4410 case AArch64ISD::LD2post: { 4411 if (VT == MVT::v8i8) { 4412 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); 4413 return; 4414 } else if (VT == MVT::v16i8) { 4415 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); 4416 return; 4417 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4418 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); 4419 return; 4420 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4421 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); 4422 return; 4423 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4424 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); 4425 return; 4426 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4427 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); 4428 return; 4429 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4430 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 4431 return; 4432 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4433 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); 4434 return; 4435 } 4436 break; 4437 } 4438 case AArch64ISD::LD3post: { 4439 if (VT == MVT::v8i8) { 4440 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); 4441 return; 4442 } else if (VT == MVT::v16i8) { 4443 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); 4444 return; 4445 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4446 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); 4447 return; 4448 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4449 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); 4450 return; 4451 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4452 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); 4453 return; 4454 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4455 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); 4456 return; 4457 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4458 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 4459 return; 4460 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4461 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); 4462 return; 4463 } 4464 break; 4465 } 4466 case AArch64ISD::LD4post: { 4467 if (VT == MVT::v8i8) { 4468 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); 4469 return; 4470 } else if (VT == MVT::v16i8) { 4471 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); 4472 return; 4473 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4474 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); 4475 return; 4476 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4477 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); 4478 return; 4479 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4480 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); 4481 return; 4482 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4483 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); 4484 return; 4485 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4486 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 4487 return; 4488 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4489 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); 4490 return; 4491 } 4492 break; 4493 } 4494 case AArch64ISD::LD1x2post: { 4495 if (VT == MVT::v8i8) { 4496 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); 4497 return; 4498 } else if (VT == MVT::v16i8) { 4499 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); 4500 return; 4501 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4502 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); 4503 return; 4504 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4505 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); 4506 return; 4507 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4508 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); 4509 return; 4510 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4511 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); 4512 return; 4513 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4514 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 4515 return; 4516 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4517 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); 4518 return; 4519 } 4520 break; 4521 } 4522 case AArch64ISD::LD1x3post: { 4523 if (VT == MVT::v8i8) { 4524 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); 4525 return; 4526 } else if (VT == MVT::v16i8) { 4527 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); 4528 return; 4529 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4530 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); 4531 return; 4532 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4533 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); 4534 return; 4535 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4536 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); 4537 return; 4538 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4539 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); 4540 return; 4541 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4542 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 4543 return; 4544 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4545 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); 4546 return; 4547 } 4548 break; 4549 } 4550 case AArch64ISD::LD1x4post: { 4551 if (VT == MVT::v8i8) { 4552 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); 4553 return; 4554 } else if (VT == MVT::v16i8) { 4555 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); 4556 return; 4557 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4558 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); 4559 return; 4560 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4561 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); 4562 return; 4563 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4564 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); 4565 return; 4566 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4567 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); 4568 return; 4569 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4570 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 4571 return; 4572 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4573 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); 4574 return; 4575 } 4576 break; 4577 } 4578 case AArch64ISD::LD1DUPpost: { 4579 if (VT == MVT::v8i8) { 4580 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); 4581 return; 4582 } else if (VT == MVT::v16i8) { 4583 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); 4584 return; 4585 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4586 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); 4587 return; 4588 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4589 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); 4590 return; 4591 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4592 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); 4593 return; 4594 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4595 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); 4596 return; 4597 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4598 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); 4599 return; 4600 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4601 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); 4602 return; 4603 } 4604 break; 4605 } 4606 case AArch64ISD::LD2DUPpost: { 4607 if (VT == MVT::v8i8) { 4608 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); 4609 return; 4610 } else if (VT == MVT::v16i8) { 4611 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); 4612 return; 4613 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4614 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); 4615 return; 4616 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4617 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); 4618 return; 4619 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4620 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); 4621 return; 4622 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4623 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); 4624 return; 4625 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4626 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); 4627 return; 4628 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4629 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); 4630 return; 4631 } 4632 break; 4633 } 4634 case AArch64ISD::LD3DUPpost: { 4635 if (VT == MVT::v8i8) { 4636 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); 4637 return; 4638 } else if (VT == MVT::v16i8) { 4639 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); 4640 return; 4641 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4642 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); 4643 return; 4644 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4645 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); 4646 return; 4647 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4648 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); 4649 return; 4650 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4651 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); 4652 return; 4653 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4654 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); 4655 return; 4656 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4657 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); 4658 return; 4659 } 4660 break; 4661 } 4662 case AArch64ISD::LD4DUPpost: { 4663 if (VT == MVT::v8i8) { 4664 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); 4665 return; 4666 } else if (VT == MVT::v16i8) { 4667 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); 4668 return; 4669 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4670 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); 4671 return; 4672 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4673 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); 4674 return; 4675 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4676 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); 4677 return; 4678 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4679 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); 4680 return; 4681 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4682 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); 4683 return; 4684 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4685 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); 4686 return; 4687 } 4688 break; 4689 } 4690 case AArch64ISD::LD1LANEpost: { 4691 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4692 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); 4693 return; 4694 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4695 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4696 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); 4697 return; 4698 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4699 VT == MVT::v2f32) { 4700 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); 4701 return; 4702 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4703 VT == MVT::v1f64) { 4704 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); 4705 return; 4706 } 4707 break; 4708 } 4709 case AArch64ISD::LD2LANEpost: { 4710 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4711 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); 4712 return; 4713 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4714 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4715 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); 4716 return; 4717 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4718 VT == MVT::v2f32) { 4719 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); 4720 return; 4721 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4722 VT == MVT::v1f64) { 4723 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); 4724 return; 4725 } 4726 break; 4727 } 4728 case AArch64ISD::LD3LANEpost: { 4729 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4730 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); 4731 return; 4732 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4733 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4734 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); 4735 return; 4736 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4737 VT == MVT::v2f32) { 4738 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); 4739 return; 4740 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4741 VT == MVT::v1f64) { 4742 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); 4743 return; 4744 } 4745 break; 4746 } 4747 case AArch64ISD::LD4LANEpost: { 4748 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4749 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); 4750 return; 4751 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4752 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4753 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); 4754 return; 4755 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4756 VT == MVT::v2f32) { 4757 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); 4758 return; 4759 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4760 VT == MVT::v1f64) { 4761 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); 4762 return; 4763 } 4764 break; 4765 } 4766 case AArch64ISD::ST2post: { 4767 VT = Node->getOperand(1).getValueType(); 4768 if (VT == MVT::v8i8) { 4769 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); 4770 return; 4771 } else if (VT == MVT::v16i8) { 4772 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); 4773 return; 4774 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4775 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); 4776 return; 4777 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4778 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); 4779 return; 4780 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4781 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); 4782 return; 4783 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4784 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); 4785 return; 4786 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4787 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); 4788 return; 4789 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4790 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 4791 return; 4792 } 4793 break; 4794 } 4795 case AArch64ISD::ST3post: { 4796 VT = Node->getOperand(1).getValueType(); 4797 if (VT == MVT::v8i8) { 4798 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); 4799 return; 4800 } else if (VT == MVT::v16i8) { 4801 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); 4802 return; 4803 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4804 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); 4805 return; 4806 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4807 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); 4808 return; 4809 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4810 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); 4811 return; 4812 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4813 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); 4814 return; 4815 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4816 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); 4817 return; 4818 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4819 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 4820 return; 4821 } 4822 break; 4823 } 4824 case AArch64ISD::ST4post: { 4825 VT = Node->getOperand(1).getValueType(); 4826 if (VT == MVT::v8i8) { 4827 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); 4828 return; 4829 } else if (VT == MVT::v16i8) { 4830 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); 4831 return; 4832 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4833 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); 4834 return; 4835 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4836 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); 4837 return; 4838 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4839 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); 4840 return; 4841 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4842 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); 4843 return; 4844 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4845 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); 4846 return; 4847 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4848 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 4849 return; 4850 } 4851 break; 4852 } 4853 case AArch64ISD::ST1x2post: { 4854 VT = Node->getOperand(1).getValueType(); 4855 if (VT == MVT::v8i8) { 4856 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); 4857 return; 4858 } else if (VT == MVT::v16i8) { 4859 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); 4860 return; 4861 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4862 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); 4863 return; 4864 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4865 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); 4866 return; 4867 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4868 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); 4869 return; 4870 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4871 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); 4872 return; 4873 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4874 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 4875 return; 4876 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4877 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); 4878 return; 4879 } 4880 break; 4881 } 4882 case AArch64ISD::ST1x3post: { 4883 VT = Node->getOperand(1).getValueType(); 4884 if (VT == MVT::v8i8) { 4885 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); 4886 return; 4887 } else if (VT == MVT::v16i8) { 4888 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); 4889 return; 4890 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4891 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); 4892 return; 4893 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) { 4894 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); 4895 return; 4896 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4897 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); 4898 return; 4899 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4900 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); 4901 return; 4902 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4903 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 4904 return; 4905 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4906 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); 4907 return; 4908 } 4909 break; 4910 } 4911 case AArch64ISD::ST1x4post: { 4912 VT = Node->getOperand(1).getValueType(); 4913 if (VT == MVT::v8i8) { 4914 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); 4915 return; 4916 } else if (VT == MVT::v16i8) { 4917 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); 4918 return; 4919 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4920 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); 4921 return; 4922 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4923 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); 4924 return; 4925 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4926 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); 4927 return; 4928 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4929 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); 4930 return; 4931 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4932 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 4933 return; 4934 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4935 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); 4936 return; 4937 } 4938 break; 4939 } 4940 case AArch64ISD::ST2LANEpost: { 4941 VT = Node->getOperand(1).getValueType(); 4942 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4943 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); 4944 return; 4945 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4946 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4947 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); 4948 return; 4949 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4950 VT == MVT::v2f32) { 4951 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); 4952 return; 4953 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4954 VT == MVT::v1f64) { 4955 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); 4956 return; 4957 } 4958 break; 4959 } 4960 case AArch64ISD::ST3LANEpost: { 4961 VT = Node->getOperand(1).getValueType(); 4962 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4963 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); 4964 return; 4965 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4966 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4967 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); 4968 return; 4969 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4970 VT == MVT::v2f32) { 4971 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); 4972 return; 4973 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4974 VT == MVT::v1f64) { 4975 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); 4976 return; 4977 } 4978 break; 4979 } 4980 case AArch64ISD::ST4LANEpost: { 4981 VT = Node->getOperand(1).getValueType(); 4982 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4983 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); 4984 return; 4985 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4986 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4987 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); 4988 return; 4989 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4990 VT == MVT::v2f32) { 4991 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); 4992 return; 4993 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4994 VT == MVT::v1f64) { 4995 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); 4996 return; 4997 } 4998 break; 4999 } 5000 case AArch64ISD::SVE_LD2_MERGE_ZERO: { 5001 if (VT == MVT::nxv16i8) { 5002 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B); 5003 return; 5004 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5005 VT == MVT::nxv8bf16) { 5006 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H); 5007 return; 5008 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5009 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W); 5010 return; 5011 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5012 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D); 5013 return; 5014 } 5015 break; 5016 } 5017 case AArch64ISD::SVE_LD3_MERGE_ZERO: { 5018 if (VT == MVT::nxv16i8) { 5019 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B); 5020 return; 5021 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5022 VT == MVT::nxv8bf16) { 5023 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H); 5024 return; 5025 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5026 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W); 5027 return; 5028 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5029 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D); 5030 return; 5031 } 5032 break; 5033 } 5034 case AArch64ISD::SVE_LD4_MERGE_ZERO: { 5035 if (VT == MVT::nxv16i8) { 5036 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B); 5037 return; 5038 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 5039 VT == MVT::nxv8bf16) { 5040 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H); 5041 return; 5042 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 5043 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W); 5044 return; 5045 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 5046 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D); 5047 return; 5048 } 5049 break; 5050 } 5051 } 5052 5053 // Select the default instruction 5054 SelectCode(Node); 5055 } 5056 5057 /// createAArch64ISelDag - This pass converts a legalized DAG into a 5058 /// AArch64-specific DAG, ready for instruction scheduling. 5059 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, 5060 CodeGenOpt::Level OptLevel) { 5061 return new AArch64DAGToDAGISel(TM, OptLevel); 5062 } 5063 5064 /// When \p PredVT is a scalable vector predicate in the form 5065 /// MVT::nx<M>xi1, it builds the correspondent scalable vector of 5066 /// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting 5067 /// structured vectors (NumVec >1), the output data type is 5068 /// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input 5069 /// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid 5070 /// EVT. 5071 static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, 5072 unsigned NumVec) { 5073 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors."); 5074 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1) 5075 return EVT(); 5076 5077 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 && 5078 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1) 5079 return EVT(); 5080 5081 ElementCount EC = PredVT.getVectorElementCount(); 5082 EVT ScalarVT = 5083 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue()); 5084 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec); 5085 5086 return MemVT; 5087 } 5088 5089 /// Return the EVT of the data associated to a memory operation in \p 5090 /// Root. If such EVT cannot be retrived, it returns an invalid EVT. 5091 static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { 5092 if (isa<MemSDNode>(Root)) 5093 return cast<MemSDNode>(Root)->getMemoryVT(); 5094 5095 if (isa<MemIntrinsicSDNode>(Root)) 5096 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT(); 5097 5098 const unsigned Opcode = Root->getOpcode(); 5099 // For custom ISD nodes, we have to look at them individually to extract the 5100 // type of the data moved to/from memory. 5101 switch (Opcode) { 5102 case AArch64ISD::LD1_MERGE_ZERO: 5103 case AArch64ISD::LD1S_MERGE_ZERO: 5104 case AArch64ISD::LDNF1_MERGE_ZERO: 5105 case AArch64ISD::LDNF1S_MERGE_ZERO: 5106 return cast<VTSDNode>(Root->getOperand(3))->getVT(); 5107 case AArch64ISD::ST1_PRED: 5108 return cast<VTSDNode>(Root->getOperand(4))->getVT(); 5109 case AArch64ISD::SVE_LD2_MERGE_ZERO: 5110 return getPackedVectorTypeFromPredicateType( 5111 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2); 5112 case AArch64ISD::SVE_LD3_MERGE_ZERO: 5113 return getPackedVectorTypeFromPredicateType( 5114 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3); 5115 case AArch64ISD::SVE_LD4_MERGE_ZERO: 5116 return getPackedVectorTypeFromPredicateType( 5117 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4); 5118 default: 5119 break; 5120 } 5121 5122 if (Opcode != ISD::INTRINSIC_VOID) 5123 return EVT(); 5124 5125 const unsigned IntNo = 5126 cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue(); 5127 if (IntNo == Intrinsic::aarch64_sme_ldr || 5128 IntNo == Intrinsic::aarch64_sme_str) 5129 return MVT::nxv16i8; 5130 5131 if (IntNo != Intrinsic::aarch64_sve_prf) 5132 return EVT(); 5133 5134 // We are using an SVE prefetch intrinsic. Type must be inferred 5135 // from the width of the predicate. 5136 return getPackedVectorTypeFromPredicateType( 5137 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1); 5138 } 5139 5140 /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode: 5141 /// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max 5142 /// where Root is the memory access using N for its address. 5143 template <int64_t Min, int64_t Max> 5144 bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, 5145 SDValue &Base, 5146 SDValue &OffImm) { 5147 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root); 5148 const DataLayout &DL = CurDAG->getDataLayout(); 5149 const MachineFrameInfo &MFI = MF->getFrameInfo(); 5150 5151 if (N.getOpcode() == ISD::FrameIndex) { 5152 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 5153 // We can only encode VL scaled offsets, so only fold in frame indexes 5154 // referencing SVE objects. 5155 if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector) { 5156 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 5157 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 5158 return true; 5159 } 5160 5161 return false; 5162 } 5163 5164 if (MemVT == EVT()) 5165 return false; 5166 5167 if (N.getOpcode() != ISD::ADD) 5168 return false; 5169 5170 SDValue VScale = N.getOperand(1); 5171 if (VScale.getOpcode() != ISD::VSCALE) 5172 return false; 5173 5174 TypeSize TS = MemVT.getSizeInBits(); 5175 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinSize()) / 8; 5176 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue(); 5177 5178 if ((MulImm % MemWidthBytes) != 0) 5179 return false; 5180 5181 int64_t Offset = MulImm / MemWidthBytes; 5182 if (Offset < Min || Offset > Max) 5183 return false; 5184 5185 Base = N.getOperand(0); 5186 if (Base.getOpcode() == ISD::FrameIndex) { 5187 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 5188 // We can only encode VL scaled offsets, so only fold in frame indexes 5189 // referencing SVE objects. 5190 if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector) 5191 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 5192 } 5193 5194 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64); 5195 return true; 5196 } 5197 5198 /// Select register plus register addressing mode for SVE, with scaled 5199 /// offset. 5200 bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale, 5201 SDValue &Base, 5202 SDValue &Offset) { 5203 if (N.getOpcode() != ISD::ADD) 5204 return false; 5205 5206 // Process an ADD node. 5207 const SDValue LHS = N.getOperand(0); 5208 const SDValue RHS = N.getOperand(1); 5209 5210 // 8 bit data does not come with the SHL node, so it is treated 5211 // separately. 5212 if (Scale == 0) { 5213 Base = LHS; 5214 Offset = RHS; 5215 return true; 5216 } 5217 5218 if (auto C = dyn_cast<ConstantSDNode>(RHS)) { 5219 int64_t ImmOff = C->getSExtValue(); 5220 unsigned Size = 1 << Scale; 5221 5222 // To use the reg+reg addressing mode, the immediate must be a multiple of 5223 // the vector element's byte size. 5224 if (ImmOff % Size) 5225 return false; 5226 5227 SDLoc DL(N); 5228 Base = LHS; 5229 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64); 5230 SDValue Ops[] = {Offset}; 5231 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 5232 Offset = SDValue(MI, 0); 5233 return true; 5234 } 5235 5236 // Check if the RHS is a shift node with a constant. 5237 if (RHS.getOpcode() != ISD::SHL) 5238 return false; 5239 5240 const SDValue ShiftRHS = RHS.getOperand(1); 5241 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS)) 5242 if (C->getZExtValue() == Scale) { 5243 Base = LHS; 5244 Offset = RHS.getOperand(0); 5245 return true; 5246 } 5247 5248 return false; 5249 } 5250 5251 bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) { 5252 const AArch64TargetLowering *TLI = 5253 static_cast<const AArch64TargetLowering *>(getTargetLowering()); 5254 5255 return TLI->isAllActivePredicate(*CurDAG, N); 5256 } 5257 5258 bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned Scale, 5259 SDValue &Base, SDValue &Offset) { 5260 if (N.getOpcode() != ISD::ADD) { 5261 Base = N; 5262 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); 5263 return true; 5264 } 5265 5266 // Process an ADD node. 5267 const SDValue LHS = N.getOperand(0); 5268 const SDValue RHS = N.getOperand(1); 5269 5270 if (auto C = dyn_cast<ConstantSDNode>(RHS)) { 5271 int64_t ImmOff = C->getSExtValue(); 5272 unsigned MaxSize = (1 << Scale) - 1; 5273 5274 if (ImmOff < 0 || ImmOff > MaxSize) 5275 return false; 5276 5277 Base = LHS; 5278 Offset = CurDAG->getTargetConstant(ImmOff, SDLoc(N), MVT::i64); 5279 return true; 5280 } 5281 5282 return false; 5283 } 5284