1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the ARM target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ARM.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMTargetMachine.h" 16 #include "MCTargetDesc/ARMAddressingModes.h" 17 #include "Utils/ARMBaseInfo.h" 18 #include "llvm/ADT/APSInt.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/CodeGen/SelectionDAG.h" 25 #include "llvm/CodeGen/SelectionDAGISel.h" 26 #include "llvm/CodeGen/TargetLowering.h" 27 #include "llvm/IR/CallingConv.h" 28 #include "llvm/IR/Constants.h" 29 #include "llvm/IR/DerivedTypes.h" 30 #include "llvm/IR/Function.h" 31 #include "llvm/IR/Intrinsics.h" 32 #include "llvm/IR/IntrinsicsARM.h" 33 #include "llvm/IR/LLVMContext.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Target/TargetOptions.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "arm-isel" 42 43 static cl::opt<bool> 44 DisableShifterOp("disable-shifter-op", cl::Hidden, 45 cl::desc("Disable isel of shifter-op"), 46 cl::init(false)); 47 48 //===--------------------------------------------------------------------===// 49 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 50 /// instructions for SelectionDAG operations. 51 /// 52 namespace { 53 54 class ARMDAGToDAGISel : public SelectionDAGISel { 55 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 56 /// make the right decision when generating code for different targets. 57 const ARMSubtarget *Subtarget; 58 59 public: 60 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) 61 : SelectionDAGISel(tm, OptLevel) {} 62 63 bool runOnMachineFunction(MachineFunction &MF) override { 64 // Reset the subtarget each time through. 65 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 66 SelectionDAGISel::runOnMachineFunction(MF); 67 return true; 68 } 69 70 StringRef getPassName() const override { return "ARM Instruction Selection"; } 71 72 void PreprocessISelDAG() override; 73 74 /// getI32Imm - Return a target constant of type i32 with the specified 75 /// value. 76 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 77 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 78 } 79 80 void Select(SDNode *N) override; 81 82 bool hasNoVMLxHazardUse(SDNode *N) const; 83 bool isShifterOpProfitable(const SDValue &Shift, 84 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 85 bool SelectRegShifterOperand(SDValue N, SDValue &A, 86 SDValue &B, SDValue &C, 87 bool CheckProfitability = true); 88 bool SelectImmShifterOperand(SDValue N, SDValue &A, 89 SDValue &B, bool CheckProfitability = true); 90 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B, 91 SDValue &C) { 92 // Don't apply the profitability check 93 return SelectRegShifterOperand(N, A, B, C, false); 94 } 95 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) { 96 // Don't apply the profitability check 97 return SelectImmShifterOperand(N, A, B, false); 98 } 99 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) { 100 if (!N.hasOneUse()) 101 return false; 102 return SelectImmShifterOperand(N, A, B, false); 103 } 104 105 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); 106 107 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 108 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 109 110 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 111 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 112 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 113 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 114 return true; 115 } 116 117 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 118 SDValue &Offset, SDValue &Opc); 119 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 120 SDValue &Offset, SDValue &Opc); 121 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 122 SDValue &Offset, SDValue &Opc); 123 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 124 bool SelectAddrMode3(SDValue N, SDValue &Base, 125 SDValue &Offset, SDValue &Opc); 126 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 127 SDValue &Offset, SDValue &Opc); 128 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16); 129 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); 130 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset); 131 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 132 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 133 134 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 135 136 // Thumb Addressing Modes: 137 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 138 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset); 139 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 140 SDValue &OffImm); 141 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 142 SDValue &OffImm); 143 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 144 SDValue &OffImm); 145 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 146 SDValue &OffImm); 147 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 148 template <unsigned Shift> 149 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 150 151 // Thumb 2 Addressing Modes: 152 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 153 template <unsigned Shift> 154 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); 155 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 156 SDValue &OffImm); 157 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 158 SDValue &OffImm); 159 template <unsigned Shift> 160 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); 161 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, 162 unsigned Shift); 163 template <unsigned Shift> 164 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 165 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 166 SDValue &OffReg, SDValue &ShImm); 167 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 168 169 template<int Min, int Max> 170 bool SelectImmediateInRange(SDValue N, SDValue &OffImm); 171 172 inline bool is_so_imm(unsigned Imm) const { 173 return ARM_AM::getSOImmVal(Imm) != -1; 174 } 175 176 inline bool is_so_imm_not(unsigned Imm) const { 177 return ARM_AM::getSOImmVal(~Imm) != -1; 178 } 179 180 inline bool is_t2_so_imm(unsigned Imm) const { 181 return ARM_AM::getT2SOImmVal(Imm) != -1; 182 } 183 184 inline bool is_t2_so_imm_not(unsigned Imm) const { 185 return ARM_AM::getT2SOImmVal(~Imm) != -1; 186 } 187 188 // Include the pieces autogenerated from the target description. 189 #include "ARMGenDAGISel.inc" 190 191 private: 192 void transferMemOperands(SDNode *Src, SDNode *Dst); 193 194 /// Indexed (pre/post inc/dec) load matching code for ARM. 195 bool tryARMIndexedLoad(SDNode *N); 196 bool tryT1IndexedLoad(SDNode *N); 197 bool tryT2IndexedLoad(SDNode *N); 198 bool tryMVEIndexedLoad(SDNode *N); 199 bool tryFMULFixed(SDNode *N, SDLoc dl); 200 bool tryFP_TO_INT(SDNode *N, SDLoc dl); 201 bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul, 202 bool IsUnsigned, 203 bool FixedToFloat); 204 205 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 206 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 207 /// loads of D registers and even subregs and odd subregs of Q registers. 208 /// For NumVecs <= 2, QOpcodes1 is not used. 209 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 210 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 211 const uint16_t *QOpcodes1); 212 213 /// SelectVST - Select NEON store intrinsics. NumVecs should 214 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 215 /// stores of D registers and even subregs and odd subregs of Q registers. 216 /// For NumVecs <= 2, QOpcodes1 is not used. 217 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 218 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 219 const uint16_t *QOpcodes1); 220 221 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 222 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 223 /// load/store of D registers and Q registers. 224 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 225 unsigned NumVecs, const uint16_t *DOpcodes, 226 const uint16_t *QOpcodes); 227 228 /// Helper functions for setting up clusters of MVE predication operands. 229 template <typename SDValueVector> 230 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 231 SDValue PredicateMask); 232 template <typename SDValueVector> 233 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 234 SDValue PredicateMask, SDValue Inactive); 235 236 template <typename SDValueVector> 237 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc); 238 template <typename SDValueVector> 239 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy); 240 241 /// SelectMVE_WB - Select MVE writeback load/store intrinsics. 242 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated); 243 244 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics. 245 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate, 246 bool HasSaturationOperand); 247 248 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics. 249 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 250 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated); 251 252 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between 253 /// vector lanes. 254 void SelectMVE_VSHLC(SDNode *N, bool Predicated); 255 256 /// Select long MVE vector reductions with two vector operands 257 /// Stride is the number of vector element widths the instruction can operate 258 /// on: 259 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32] 260 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32] 261 /// Stride is used when addressing the OpcodesS array which contains multiple 262 /// opcodes for each element width. 263 /// TySize is the index into the list of element types listed above 264 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 265 const uint16_t *OpcodesS, const uint16_t *OpcodesU, 266 size_t Stride, size_t TySize); 267 268 /// Select a 64-bit MVE vector reduction with two vector operands 269 /// arm_mve_vmlldava_[predicated] 270 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 271 const uint16_t *OpcodesU); 272 /// Select a 72-bit MVE vector rounding reduction with two vector operands 273 /// int_arm_mve_vrmlldavha[_predicated] 274 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 275 const uint16_t *OpcodesU); 276 277 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs 278 /// should be 2 or 4. The opcode array specifies the instructions 279 /// used for 8, 16 and 32-bit lane sizes respectively, and each 280 /// pointer points to a set of NumVecs sub-opcodes used for the 281 /// different stages (e.g. VLD20 versus VLD21) of each load family. 282 void SelectMVE_VLD(SDNode *N, unsigned NumVecs, 283 const uint16_t *const *Opcodes, bool HasWriteback); 284 285 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an 286 /// array of 3 elements for the 8, 16 and 32-bit lane sizes. 287 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 288 bool Wrapping, bool Predicated); 289 290 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D, 291 /// CX1DA, CX2D, CX2DA, CX3, CX3DA). 292 /// \arg \c NumExtraOps number of extra operands besides the coprocossor, 293 /// the accumulator and the immediate operand, i.e. 0 294 /// for CX1*, 1 for CX2*, 2 for CX3* 295 /// \arg \c HasAccum whether the instruction has an accumulator operand 296 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps, 297 bool HasAccum); 298 299 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 300 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used 301 /// for loading D registers. 302 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, 303 unsigned NumVecs, const uint16_t *DOpcodes, 304 const uint16_t *QOpcodes0 = nullptr, 305 const uint16_t *QOpcodes1 = nullptr); 306 307 /// Try to select SBFX/UBFX instructions for ARM. 308 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 309 310 bool tryInsertVectorElt(SDNode *N); 311 312 // Select special operations if node forms integer ABS pattern 313 bool tryABSOp(SDNode *N); 314 315 bool tryReadRegister(SDNode *N); 316 bool tryWriteRegister(SDNode *N); 317 318 bool tryInlineAsm(SDNode *N); 319 320 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); 321 322 void SelectCMP_SWAP(SDNode *N); 323 324 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 325 /// inline asm expressions. 326 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 327 std::vector<SDValue> &OutOps) override; 328 329 // Form pairs of consecutive R, S, D, or Q registers. 330 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 331 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 332 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 333 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 334 335 // Form sequences of 4 consecutive S, D, or Q registers. 336 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 337 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 338 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 339 340 // Get the alignment operand for a NEON VLD or VST instruction. 341 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 342 bool is64BitVector); 343 344 /// Checks if N is a multiplication by a constant where we can extract out a 345 /// power of two from the constant so that it can be used in a shift, but only 346 /// if it simplifies the materialization of the constant. Returns true if it 347 /// is, and assigns to PowerOfTwo the power of two that should be extracted 348 /// out and to NewMulConst the new constant to be multiplied by. 349 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 350 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 351 352 /// Replace N with M in CurDAG, in a way that also ensures that M gets 353 /// selected when N would have been selected. 354 void replaceDAGValue(const SDValue &N, SDValue M); 355 }; 356 } 357 358 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 359 /// operand. If so Imm will receive the 32-bit value. 360 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 361 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 362 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 363 return true; 364 } 365 return false; 366 } 367 368 // isInt32Immediate - This method tests to see if a constant operand. 369 // If so Imm will receive the 32 bit value. 370 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 371 return isInt32Immediate(N.getNode(), Imm); 372 } 373 374 // isOpcWithIntImmediate - This method tests to see if the node is a specific 375 // opcode and that it has a immediate integer right operand. 376 // If so Imm will receive the 32 bit value. 377 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 378 return N->getOpcode() == Opc && 379 isInt32Immediate(N->getOperand(1).getNode(), Imm); 380 } 381 382 /// Check whether a particular node is a constant value representable as 383 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 384 /// 385 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 386 static bool isScaledConstantInRange(SDValue Node, int Scale, 387 int RangeMin, int RangeMax, 388 int &ScaledConstant) { 389 assert(Scale > 0 && "Invalid scale!"); 390 391 // Check that this is a constant. 392 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 393 if (!C) 394 return false; 395 396 ScaledConstant = (int) C->getZExtValue(); 397 if ((ScaledConstant % Scale) != 0) 398 return false; 399 400 ScaledConstant /= Scale; 401 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 402 } 403 404 void ARMDAGToDAGISel::PreprocessISelDAG() { 405 if (!Subtarget->hasV6T2Ops()) 406 return; 407 408 bool isThumb2 = Subtarget->isThumb(); 409 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 410 E = CurDAG->allnodes_end(); I != E; ) { 411 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 412 413 if (N->getOpcode() != ISD::ADD) 414 continue; 415 416 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 417 // leading zeros, followed by consecutive set bits, followed by 1 or 2 418 // trailing zeros, e.g. 1020. 419 // Transform the expression to 420 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 421 // of trailing zeros of c2. The left shift would be folded as an shifter 422 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 423 // node (UBFX). 424 425 SDValue N0 = N->getOperand(0); 426 SDValue N1 = N->getOperand(1); 427 unsigned And_imm = 0; 428 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 429 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 430 std::swap(N0, N1); 431 } 432 if (!And_imm) 433 continue; 434 435 // Check if the AND mask is an immediate of the form: 000.....1111111100 436 unsigned TZ = countTrailingZeros(And_imm); 437 if (TZ != 1 && TZ != 2) 438 // Be conservative here. Shifter operands aren't always free. e.g. On 439 // Swift, left shifter operand of 1 / 2 for free but others are not. 440 // e.g. 441 // ubfx r3, r1, #16, #8 442 // ldr.w r3, [r0, r3, lsl #2] 443 // vs. 444 // mov.w r9, #1020 445 // and.w r2, r9, r1, lsr #14 446 // ldr r2, [r0, r2] 447 continue; 448 And_imm >>= TZ; 449 if (And_imm & (And_imm + 1)) 450 continue; 451 452 // Look for (and (srl X, c1), c2). 453 SDValue Srl = N1.getOperand(0); 454 unsigned Srl_imm = 0; 455 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 456 (Srl_imm <= 2)) 457 continue; 458 459 // Make sure first operand is not a shifter operand which would prevent 460 // folding of the left shift. 461 SDValue CPTmp0; 462 SDValue CPTmp1; 463 SDValue CPTmp2; 464 if (isThumb2) { 465 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 466 continue; 467 } else { 468 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 469 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 470 continue; 471 } 472 473 // Now make the transformation. 474 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 475 Srl.getOperand(0), 476 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 477 MVT::i32)); 478 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 479 Srl, 480 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 481 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 482 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 483 CurDAG->UpdateNodeOperands(N, N0, N1); 484 } 485 } 486 487 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 488 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 489 /// least on current ARM implementations) which should be avoidded. 490 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 491 if (OptLevel == CodeGenOpt::None) 492 return true; 493 494 if (!Subtarget->hasVMLxHazards()) 495 return true; 496 497 if (!N->hasOneUse()) 498 return false; 499 500 SDNode *Use = *N->use_begin(); 501 if (Use->getOpcode() == ISD::CopyToReg) 502 return true; 503 if (Use->isMachineOpcode()) { 504 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 505 CurDAG->getSubtarget().getInstrInfo()); 506 507 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 508 if (MCID.mayStore()) 509 return true; 510 unsigned Opcode = MCID.getOpcode(); 511 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 512 return true; 513 // vmlx feeding into another vmlx. We actually want to unfold 514 // the use later in the MLxExpansion pass. e.g. 515 // vmla 516 // vmla (stall 8 cycles) 517 // 518 // vmul (5 cycles) 519 // vadd (5 cycles) 520 // vmla 521 // This adds up to about 18 - 19 cycles. 522 // 523 // vmla 524 // vmul (stall 4 cycles) 525 // vadd adds up to about 14 cycles. 526 return TII->isFpMLxInstruction(Opcode); 527 } 528 529 return false; 530 } 531 532 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 533 ARM_AM::ShiftOpc ShOpcVal, 534 unsigned ShAmt) { 535 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 536 return true; 537 if (Shift.hasOneUse()) 538 return true; 539 // R << 2 is free. 540 return ShOpcVal == ARM_AM::lsl && 541 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 542 } 543 544 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 545 unsigned MaxShift, 546 unsigned &PowerOfTwo, 547 SDValue &NewMulConst) const { 548 assert(N.getOpcode() == ISD::MUL); 549 assert(MaxShift > 0); 550 551 // If the multiply is used in more than one place then changing the constant 552 // will make other uses incorrect, so don't. 553 if (!N.hasOneUse()) return false; 554 // Check if the multiply is by a constant 555 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 556 if (!MulConst) return false; 557 // If the constant is used in more than one place then modifying it will mean 558 // we need to materialize two constants instead of one, which is a bad idea. 559 if (!MulConst->hasOneUse()) return false; 560 unsigned MulConstVal = MulConst->getZExtValue(); 561 if (MulConstVal == 0) return false; 562 563 // Find the largest power of 2 that MulConstVal is a multiple of 564 PowerOfTwo = MaxShift; 565 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 566 --PowerOfTwo; 567 if (PowerOfTwo == 0) return false; 568 } 569 570 // Only optimise if the new cost is better 571 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 572 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 573 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget); 574 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget); 575 return NewCost < OldCost; 576 } 577 578 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 579 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 580 ReplaceUses(N, M); 581 } 582 583 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 584 SDValue &BaseReg, 585 SDValue &Opc, 586 bool CheckProfitability) { 587 if (DisableShifterOp) 588 return false; 589 590 // If N is a multiply-by-constant and it's profitable to extract a shift and 591 // use it in a shifted operand do so. 592 if (N.getOpcode() == ISD::MUL) { 593 unsigned PowerOfTwo = 0; 594 SDValue NewMulConst; 595 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 596 HandleSDNode Handle(N); 597 SDLoc Loc(N); 598 replaceDAGValue(N.getOperand(1), NewMulConst); 599 BaseReg = Handle.getValue(); 600 Opc = CurDAG->getTargetConstant( 601 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); 602 return true; 603 } 604 } 605 606 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 607 608 // Don't match base register only case. That is matched to a separate 609 // lower complexity pattern with explicit register operand. 610 if (ShOpcVal == ARM_AM::no_shift) return false; 611 612 BaseReg = N.getOperand(0); 613 unsigned ShImmVal = 0; 614 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 615 if (!RHS) return false; 616 ShImmVal = RHS->getZExtValue() & 31; 617 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 618 SDLoc(N), MVT::i32); 619 return true; 620 } 621 622 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 623 SDValue &BaseReg, 624 SDValue &ShReg, 625 SDValue &Opc, 626 bool CheckProfitability) { 627 if (DisableShifterOp) 628 return false; 629 630 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 631 632 // Don't match base register only case. That is matched to a separate 633 // lower complexity pattern with explicit register operand. 634 if (ShOpcVal == ARM_AM::no_shift) return false; 635 636 BaseReg = N.getOperand(0); 637 unsigned ShImmVal = 0; 638 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 639 if (RHS) return false; 640 641 ShReg = N.getOperand(1); 642 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 643 return false; 644 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 645 SDLoc(N), MVT::i32); 646 return true; 647 } 648 649 // Determine whether an ISD::OR's operands are suitable to turn the operation 650 // into an addition, which often has more compact encodings. 651 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { 652 assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); 653 Out = N; 654 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); 655 } 656 657 658 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 659 SDValue &Base, 660 SDValue &OffImm) { 661 // Match simple R + imm12 operands. 662 663 // Base only. 664 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 665 !CurDAG->isBaseWithConstantOffset(N)) { 666 if (N.getOpcode() == ISD::FrameIndex) { 667 // Match frame index. 668 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 669 Base = CurDAG->getTargetFrameIndex( 670 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 671 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 672 return true; 673 } 674 675 if (N.getOpcode() == ARMISD::Wrapper && 676 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 677 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 678 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 679 Base = N.getOperand(0); 680 } else 681 Base = N; 682 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 683 return true; 684 } 685 686 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 687 int RHSC = (int)RHS->getSExtValue(); 688 if (N.getOpcode() == ISD::SUB) 689 RHSC = -RHSC; 690 691 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 692 Base = N.getOperand(0); 693 if (Base.getOpcode() == ISD::FrameIndex) { 694 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 695 Base = CurDAG->getTargetFrameIndex( 696 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 697 } 698 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 699 return true; 700 } 701 } 702 703 // Base only. 704 Base = N; 705 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 706 return true; 707 } 708 709 710 711 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 712 SDValue &Opc) { 713 if (N.getOpcode() == ISD::MUL && 714 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 715 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 716 // X * [3,5,9] -> X + X * [2,4,8] etc. 717 int RHSC = (int)RHS->getZExtValue(); 718 if (RHSC & 1) { 719 RHSC = RHSC & ~1; 720 ARM_AM::AddrOpc AddSub = ARM_AM::add; 721 if (RHSC < 0) { 722 AddSub = ARM_AM::sub; 723 RHSC = - RHSC; 724 } 725 if (isPowerOf2_32(RHSC)) { 726 unsigned ShAmt = Log2_32(RHSC); 727 Base = Offset = N.getOperand(0); 728 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 729 ARM_AM::lsl), 730 SDLoc(N), MVT::i32); 731 return true; 732 } 733 } 734 } 735 } 736 737 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 738 // ISD::OR that is equivalent to an ISD::ADD. 739 !CurDAG->isBaseWithConstantOffset(N)) 740 return false; 741 742 // Leave simple R +/- imm12 operands for LDRi12 743 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 744 int RHSC; 745 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 746 -0x1000+1, 0x1000, RHSC)) // 12 bits. 747 return false; 748 } 749 750 // Otherwise this is R +/- [possibly shifted] R. 751 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 752 ARM_AM::ShiftOpc ShOpcVal = 753 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 754 unsigned ShAmt = 0; 755 756 Base = N.getOperand(0); 757 Offset = N.getOperand(1); 758 759 if (ShOpcVal != ARM_AM::no_shift) { 760 // Check to see if the RHS of the shift is a constant, if not, we can't fold 761 // it. 762 if (ConstantSDNode *Sh = 763 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 764 ShAmt = Sh->getZExtValue(); 765 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 766 Offset = N.getOperand(1).getOperand(0); 767 else { 768 ShAmt = 0; 769 ShOpcVal = ARM_AM::no_shift; 770 } 771 } else { 772 ShOpcVal = ARM_AM::no_shift; 773 } 774 } 775 776 // Try matching (R shl C) + (R). 777 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 778 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 779 N.getOperand(0).hasOneUse())) { 780 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 781 if (ShOpcVal != ARM_AM::no_shift) { 782 // Check to see if the RHS of the shift is a constant, if not, we can't 783 // fold it. 784 if (ConstantSDNode *Sh = 785 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 786 ShAmt = Sh->getZExtValue(); 787 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 788 Offset = N.getOperand(0).getOperand(0); 789 Base = N.getOperand(1); 790 } else { 791 ShAmt = 0; 792 ShOpcVal = ARM_AM::no_shift; 793 } 794 } else { 795 ShOpcVal = ARM_AM::no_shift; 796 } 797 } 798 } 799 800 // If Offset is a multiply-by-constant and it's profitable to extract a shift 801 // and use it in a shifted operand do so. 802 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 803 unsigned PowerOfTwo = 0; 804 SDValue NewMulConst; 805 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 806 HandleSDNode Handle(Offset); 807 replaceDAGValue(Offset.getOperand(1), NewMulConst); 808 Offset = Handle.getValue(); 809 ShAmt = PowerOfTwo; 810 ShOpcVal = ARM_AM::lsl; 811 } 812 } 813 814 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 815 SDLoc(N), MVT::i32); 816 return true; 817 } 818 819 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 820 SDValue &Offset, SDValue &Opc) { 821 unsigned Opcode = Op->getOpcode(); 822 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 823 ? cast<LoadSDNode>(Op)->getAddressingMode() 824 : cast<StoreSDNode>(Op)->getAddressingMode(); 825 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 826 ? ARM_AM::add : ARM_AM::sub; 827 int Val; 828 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 829 return false; 830 831 Offset = N; 832 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 833 unsigned ShAmt = 0; 834 if (ShOpcVal != ARM_AM::no_shift) { 835 // Check to see if the RHS of the shift is a constant, if not, we can't fold 836 // it. 837 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 838 ShAmt = Sh->getZExtValue(); 839 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 840 Offset = N.getOperand(0); 841 else { 842 ShAmt = 0; 843 ShOpcVal = ARM_AM::no_shift; 844 } 845 } else { 846 ShOpcVal = ARM_AM::no_shift; 847 } 848 } 849 850 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 851 SDLoc(N), MVT::i32); 852 return true; 853 } 854 855 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 856 SDValue &Offset, SDValue &Opc) { 857 unsigned Opcode = Op->getOpcode(); 858 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 859 ? cast<LoadSDNode>(Op)->getAddressingMode() 860 : cast<StoreSDNode>(Op)->getAddressingMode(); 861 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 862 ? ARM_AM::add : ARM_AM::sub; 863 int Val; 864 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 865 if (AddSub == ARM_AM::sub) Val *= -1; 866 Offset = CurDAG->getRegister(0, MVT::i32); 867 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); 868 return true; 869 } 870 871 return false; 872 } 873 874 875 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 876 SDValue &Offset, SDValue &Opc) { 877 unsigned Opcode = Op->getOpcode(); 878 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 879 ? cast<LoadSDNode>(Op)->getAddressingMode() 880 : cast<StoreSDNode>(Op)->getAddressingMode(); 881 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 882 ? ARM_AM::add : ARM_AM::sub; 883 int Val; 884 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 885 Offset = CurDAG->getRegister(0, MVT::i32); 886 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 887 ARM_AM::no_shift), 888 SDLoc(Op), MVT::i32); 889 return true; 890 } 891 892 return false; 893 } 894 895 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 896 Base = N; 897 return true; 898 } 899 900 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 901 SDValue &Base, SDValue &Offset, 902 SDValue &Opc) { 903 if (N.getOpcode() == ISD::SUB) { 904 // X - C is canonicalize to X + -C, no need to handle it here. 905 Base = N.getOperand(0); 906 Offset = N.getOperand(1); 907 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 908 MVT::i32); 909 return true; 910 } 911 912 if (!CurDAG->isBaseWithConstantOffset(N)) { 913 Base = N; 914 if (N.getOpcode() == ISD::FrameIndex) { 915 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 916 Base = CurDAG->getTargetFrameIndex( 917 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 918 } 919 Offset = CurDAG->getRegister(0, MVT::i32); 920 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 921 MVT::i32); 922 return true; 923 } 924 925 // If the RHS is +/- imm8, fold into addr mode. 926 int RHSC; 927 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 928 -256 + 1, 256, RHSC)) { // 8 bits. 929 Base = N.getOperand(0); 930 if (Base.getOpcode() == ISD::FrameIndex) { 931 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 932 Base = CurDAG->getTargetFrameIndex( 933 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 934 } 935 Offset = CurDAG->getRegister(0, MVT::i32); 936 937 ARM_AM::AddrOpc AddSub = ARM_AM::add; 938 if (RHSC < 0) { 939 AddSub = ARM_AM::sub; 940 RHSC = -RHSC; 941 } 942 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 943 MVT::i32); 944 return true; 945 } 946 947 Base = N.getOperand(0); 948 Offset = N.getOperand(1); 949 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 950 MVT::i32); 951 return true; 952 } 953 954 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 955 SDValue &Offset, SDValue &Opc) { 956 unsigned Opcode = Op->getOpcode(); 957 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 958 ? cast<LoadSDNode>(Op)->getAddressingMode() 959 : cast<StoreSDNode>(Op)->getAddressingMode(); 960 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 961 ? ARM_AM::add : ARM_AM::sub; 962 int Val; 963 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 964 Offset = CurDAG->getRegister(0, MVT::i32); 965 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 966 MVT::i32); 967 return true; 968 } 969 970 Offset = N; 971 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 972 MVT::i32); 973 return true; 974 } 975 976 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, 977 bool FP16) { 978 if (!CurDAG->isBaseWithConstantOffset(N)) { 979 Base = N; 980 if (N.getOpcode() == ISD::FrameIndex) { 981 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 982 Base = CurDAG->getTargetFrameIndex( 983 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 984 } else if (N.getOpcode() == ARMISD::Wrapper && 985 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 986 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 987 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 988 Base = N.getOperand(0); 989 } 990 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 991 SDLoc(N), MVT::i32); 992 return true; 993 } 994 995 // If the RHS is +/- imm8, fold into addr mode. 996 int RHSC; 997 const int Scale = FP16 ? 2 : 4; 998 999 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) { 1000 Base = N.getOperand(0); 1001 if (Base.getOpcode() == ISD::FrameIndex) { 1002 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1003 Base = CurDAG->getTargetFrameIndex( 1004 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1005 } 1006 1007 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1008 if (RHSC < 0) { 1009 AddSub = ARM_AM::sub; 1010 RHSC = -RHSC; 1011 } 1012 1013 if (FP16) 1014 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC), 1015 SDLoc(N), MVT::i32); 1016 else 1017 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 1018 SDLoc(N), MVT::i32); 1019 1020 return true; 1021 } 1022 1023 Base = N; 1024 1025 if (FP16) 1026 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0), 1027 SDLoc(N), MVT::i32); 1028 else 1029 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1030 SDLoc(N), MVT::i32); 1031 1032 return true; 1033 } 1034 1035 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 1036 SDValue &Base, SDValue &Offset) { 1037 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false); 1038 } 1039 1040 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N, 1041 SDValue &Base, SDValue &Offset) { 1042 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true); 1043 } 1044 1045 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1046 SDValue &Align) { 1047 Addr = N; 1048 1049 unsigned Alignment = 0; 1050 1051 MemSDNode *MemN = cast<MemSDNode>(Parent); 1052 1053 if (isa<LSBaseSDNode>(MemN) || 1054 ((MemN->getOpcode() == ARMISD::VST1_UPD || 1055 MemN->getOpcode() == ARMISD::VLD1_UPD) && 1056 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 1057 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1058 // The maximum alignment is equal to the memory size being referenced. 1059 unsigned MMOAlign = MemN->getAlignment(); 1060 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 1061 if (MMOAlign >= MemSize && MemSize > 1) 1062 Alignment = MemSize; 1063 } else { 1064 // All other uses of addrmode6 are for intrinsics. For now just record 1065 // the raw alignment value; it will be refined later based on the legal 1066 // alignment operands for the intrinsic. 1067 Alignment = MemN->getAlignment(); 1068 } 1069 1070 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1071 return true; 1072 } 1073 1074 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1075 SDValue &Offset) { 1076 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1077 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1078 if (AM != ISD::POST_INC) 1079 return false; 1080 Offset = N; 1081 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1082 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1083 Offset = CurDAG->getRegister(0, MVT::i32); 1084 } 1085 return true; 1086 } 1087 1088 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1089 SDValue &Offset, SDValue &Label) { 1090 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1091 Offset = N.getOperand(0); 1092 SDValue N1 = N.getOperand(1); 1093 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 1094 SDLoc(N), MVT::i32); 1095 return true; 1096 } 1097 1098 return false; 1099 } 1100 1101 1102 //===----------------------------------------------------------------------===// 1103 // Thumb Addressing Modes 1104 //===----------------------------------------------------------------------===// 1105 1106 static bool shouldUseZeroOffsetLdSt(SDValue N) { 1107 // Negative numbers are difficult to materialise in thumb1. If we are 1108 // selecting the add of a negative, instead try to select ri with a zero 1109 // offset, so create the add node directly which will become a sub. 1110 if (N.getOpcode() != ISD::ADD) 1111 return false; 1112 1113 // Look for an imm which is not legal for ld/st, but is legal for sub. 1114 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) 1115 return C->getSExtValue() < 0 && C->getSExtValue() >= -255; 1116 1117 return false; 1118 } 1119 1120 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, 1121 SDValue &Offset) { 1122 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1123 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); 1124 if (!NC || !NC->isNullValue()) 1125 return false; 1126 1127 Base = Offset = N; 1128 return true; 1129 } 1130 1131 Base = N.getOperand(0); 1132 Offset = N.getOperand(1); 1133 return true; 1134 } 1135 1136 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base, 1137 SDValue &Offset) { 1138 if (shouldUseZeroOffsetLdSt(N)) 1139 return false; // Select ri instead 1140 return SelectThumbAddrModeRRSext(N, Base, Offset); 1141 } 1142 1143 bool 1144 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1145 SDValue &Base, SDValue &OffImm) { 1146 if (shouldUseZeroOffsetLdSt(N)) { 1147 Base = N; 1148 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1149 return true; 1150 } 1151 1152 if (!CurDAG->isBaseWithConstantOffset(N)) { 1153 if (N.getOpcode() == ISD::ADD) { 1154 return false; // We want to select register offset instead 1155 } else if (N.getOpcode() == ARMISD::Wrapper && 1156 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1157 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1158 N.getOperand(0).getOpcode() != ISD::TargetConstantPool && 1159 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1160 Base = N.getOperand(0); 1161 } else { 1162 Base = N; 1163 } 1164 1165 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1166 return true; 1167 } 1168 1169 // If the RHS is + imm5 * scale, fold into addr mode. 1170 int RHSC; 1171 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1172 Base = N.getOperand(0); 1173 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1174 return true; 1175 } 1176 1177 // Offset is too large, so use register offset instead. 1178 return false; 1179 } 1180 1181 bool 1182 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1183 SDValue &OffImm) { 1184 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1185 } 1186 1187 bool 1188 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1189 SDValue &OffImm) { 1190 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1191 } 1192 1193 bool 1194 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1195 SDValue &OffImm) { 1196 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1197 } 1198 1199 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1200 SDValue &Base, SDValue &OffImm) { 1201 if (N.getOpcode() == ISD::FrameIndex) { 1202 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1203 // Only multiples of 4 are allowed for the offset, so the frame object 1204 // alignment must be at least 4. 1205 MachineFrameInfo &MFI = MF->getFrameInfo(); 1206 if (MFI.getObjectAlign(FI) < Align(4)) 1207 MFI.setObjectAlignment(FI, Align(4)); 1208 Base = CurDAG->getTargetFrameIndex( 1209 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1210 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1211 return true; 1212 } 1213 1214 if (!CurDAG->isBaseWithConstantOffset(N)) 1215 return false; 1216 1217 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) { 1218 // If the RHS is + imm8 * scale, fold into addr mode. 1219 int RHSC; 1220 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1221 Base = N.getOperand(0); 1222 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1223 // Make sure the offset is inside the object, or we might fail to 1224 // allocate an emergency spill slot. (An out-of-range access is UB, but 1225 // it could show up anyway.) 1226 MachineFrameInfo &MFI = MF->getFrameInfo(); 1227 if (RHSC * 4 < MFI.getObjectSize(FI)) { 1228 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1229 // indexed by the LHS must be 4-byte aligned. 1230 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4)) 1231 MFI.setObjectAlignment(FI, Align(4)); 1232 if (MFI.getObjectAlign(FI) >= Align(4)) { 1233 Base = CurDAG->getTargetFrameIndex( 1234 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1235 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1236 return true; 1237 } 1238 } 1239 } 1240 } 1241 1242 return false; 1243 } 1244 1245 template <unsigned Shift> 1246 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base, 1247 SDValue &OffImm) { 1248 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1249 int RHSC; 1250 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1251 RHSC)) { 1252 Base = N.getOperand(0); 1253 if (N.getOpcode() == ISD::SUB) 1254 RHSC = -RHSC; 1255 OffImm = 1256 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1257 return true; 1258 } 1259 } 1260 1261 // Base only. 1262 Base = N; 1263 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1264 return true; 1265 } 1266 1267 1268 //===----------------------------------------------------------------------===// 1269 // Thumb 2 Addressing Modes 1270 //===----------------------------------------------------------------------===// 1271 1272 1273 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1274 SDValue &Base, SDValue &OffImm) { 1275 // Match simple R + imm12 operands. 1276 1277 // Base only. 1278 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1279 !CurDAG->isBaseWithConstantOffset(N)) { 1280 if (N.getOpcode() == ISD::FrameIndex) { 1281 // Match frame index. 1282 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1283 Base = CurDAG->getTargetFrameIndex( 1284 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1285 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1286 return true; 1287 } 1288 1289 if (N.getOpcode() == ARMISD::Wrapper && 1290 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1291 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1292 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1293 Base = N.getOperand(0); 1294 if (Base.getOpcode() == ISD::TargetConstantPool) 1295 return false; // We want to select t2LDRpci instead. 1296 } else 1297 Base = N; 1298 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1299 return true; 1300 } 1301 1302 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1303 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1304 // Let t2LDRi8 handle (R - imm8). 1305 return false; 1306 1307 int RHSC = (int)RHS->getZExtValue(); 1308 if (N.getOpcode() == ISD::SUB) 1309 RHSC = -RHSC; 1310 1311 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1312 Base = N.getOperand(0); 1313 if (Base.getOpcode() == ISD::FrameIndex) { 1314 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1315 Base = CurDAG->getTargetFrameIndex( 1316 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1317 } 1318 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1319 return true; 1320 } 1321 } 1322 1323 // Base only. 1324 Base = N; 1325 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1326 return true; 1327 } 1328 1329 template <unsigned Shift> 1330 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, 1331 SDValue &OffImm) { 1332 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1333 int RHSC; 1334 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) { 1335 Base = N.getOperand(0); 1336 if (Base.getOpcode() == ISD::FrameIndex) { 1337 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1338 Base = CurDAG->getTargetFrameIndex( 1339 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1340 } 1341 1342 if (N.getOpcode() == ISD::SUB) 1343 RHSC = -RHSC; 1344 OffImm = 1345 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1346 return true; 1347 } 1348 } 1349 1350 // Base only. 1351 Base = N; 1352 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1353 return true; 1354 } 1355 1356 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1357 SDValue &Base, SDValue &OffImm) { 1358 // Match simple R - imm8 operands. 1359 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1360 !CurDAG->isBaseWithConstantOffset(N)) 1361 return false; 1362 1363 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1364 int RHSC = (int)RHS->getSExtValue(); 1365 if (N.getOpcode() == ISD::SUB) 1366 RHSC = -RHSC; 1367 1368 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1369 Base = N.getOperand(0); 1370 if (Base.getOpcode() == ISD::FrameIndex) { 1371 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1372 Base = CurDAG->getTargetFrameIndex( 1373 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1374 } 1375 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1376 return true; 1377 } 1378 } 1379 1380 return false; 1381 } 1382 1383 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1384 SDValue &OffImm){ 1385 unsigned Opcode = Op->getOpcode(); 1386 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1387 ? cast<LoadSDNode>(Op)->getAddressingMode() 1388 : cast<StoreSDNode>(Op)->getAddressingMode(); 1389 int RHSC; 1390 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1391 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1392 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) 1393 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1394 return true; 1395 } 1396 1397 return false; 1398 } 1399 1400 template <unsigned Shift> 1401 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base, 1402 SDValue &OffImm) { 1403 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1404 int RHSC; 1405 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1406 RHSC)) { 1407 Base = N.getOperand(0); 1408 if (Base.getOpcode() == ISD::FrameIndex) { 1409 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1410 Base = CurDAG->getTargetFrameIndex( 1411 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1412 } 1413 1414 if (N.getOpcode() == ISD::SUB) 1415 RHSC = -RHSC; 1416 OffImm = 1417 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1418 return true; 1419 } 1420 } 1421 1422 // Base only. 1423 Base = N; 1424 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1425 return true; 1426 } 1427 1428 template <unsigned Shift> 1429 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1430 SDValue &OffImm) { 1431 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); 1432 } 1433 1434 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1435 SDValue &OffImm, 1436 unsigned Shift) { 1437 unsigned Opcode = Op->getOpcode(); 1438 ISD::MemIndexedMode AM; 1439 switch (Opcode) { 1440 case ISD::LOAD: 1441 AM = cast<LoadSDNode>(Op)->getAddressingMode(); 1442 break; 1443 case ISD::STORE: 1444 AM = cast<StoreSDNode>(Op)->getAddressingMode(); 1445 break; 1446 case ISD::MLOAD: 1447 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode(); 1448 break; 1449 case ISD::MSTORE: 1450 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode(); 1451 break; 1452 default: 1453 llvm_unreachable("Unexpected Opcode for Imm7Offset"); 1454 } 1455 1456 int RHSC; 1457 // 7 bit constant, shifted by Shift. 1458 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { 1459 OffImm = 1460 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1461 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) 1462 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N), 1463 MVT::i32); 1464 return true; 1465 } 1466 return false; 1467 } 1468 1469 template <int Min, int Max> 1470 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) { 1471 int Val; 1472 if (isScaledConstantInRange(N, 1, Min, Max, Val)) { 1473 OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32); 1474 return true; 1475 } 1476 return false; 1477 } 1478 1479 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1480 SDValue &Base, 1481 SDValue &OffReg, SDValue &ShImm) { 1482 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1483 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1484 return false; 1485 1486 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1487 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1488 int RHSC = (int)RHS->getZExtValue(); 1489 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1490 return false; 1491 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1492 return false; 1493 } 1494 1495 // Look for (R + R) or (R + (R << [1,2,3])). 1496 unsigned ShAmt = 0; 1497 Base = N.getOperand(0); 1498 OffReg = N.getOperand(1); 1499 1500 // Swap if it is ((R << c) + R). 1501 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1502 if (ShOpcVal != ARM_AM::lsl) { 1503 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1504 if (ShOpcVal == ARM_AM::lsl) 1505 std::swap(Base, OffReg); 1506 } 1507 1508 if (ShOpcVal == ARM_AM::lsl) { 1509 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1510 // it. 1511 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1512 ShAmt = Sh->getZExtValue(); 1513 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1514 OffReg = OffReg.getOperand(0); 1515 else { 1516 ShAmt = 0; 1517 } 1518 } 1519 } 1520 1521 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1522 // and use it in a shifted operand do so. 1523 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1524 unsigned PowerOfTwo = 0; 1525 SDValue NewMulConst; 1526 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1527 HandleSDNode Handle(OffReg); 1528 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1529 OffReg = Handle.getValue(); 1530 ShAmt = PowerOfTwo; 1531 } 1532 } 1533 1534 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1535 1536 return true; 1537 } 1538 1539 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1540 SDValue &OffImm) { 1541 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1542 // instructions. 1543 Base = N; 1544 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1545 1546 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1547 return true; 1548 1549 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1550 if (!RHS) 1551 return true; 1552 1553 uint32_t RHSC = (int)RHS->getZExtValue(); 1554 if (RHSC > 1020 || RHSC % 4 != 0) 1555 return true; 1556 1557 Base = N.getOperand(0); 1558 if (Base.getOpcode() == ISD::FrameIndex) { 1559 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1560 Base = CurDAG->getTargetFrameIndex( 1561 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1562 } 1563 1564 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1565 return true; 1566 } 1567 1568 //===--------------------------------------------------------------------===// 1569 1570 /// getAL - Returns a ARMCC::AL immediate node. 1571 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1572 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1573 } 1574 1575 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 1576 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1577 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 1578 } 1579 1580 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1581 LoadSDNode *LD = cast<LoadSDNode>(N); 1582 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1583 if (AM == ISD::UNINDEXED) 1584 return false; 1585 1586 EVT LoadedVT = LD->getMemoryVT(); 1587 SDValue Offset, AMOpc; 1588 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1589 unsigned Opcode = 0; 1590 bool Match = false; 1591 if (LoadedVT == MVT::i32 && isPre && 1592 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1593 Opcode = ARM::LDR_PRE_IMM; 1594 Match = true; 1595 } else if (LoadedVT == MVT::i32 && !isPre && 1596 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1597 Opcode = ARM::LDR_POST_IMM; 1598 Match = true; 1599 } else if (LoadedVT == MVT::i32 && 1600 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1601 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1602 Match = true; 1603 1604 } else if (LoadedVT == MVT::i16 && 1605 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1606 Match = true; 1607 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1608 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1609 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1610 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1611 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1612 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1613 Match = true; 1614 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1615 } 1616 } else { 1617 if (isPre && 1618 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1619 Match = true; 1620 Opcode = ARM::LDRB_PRE_IMM; 1621 } else if (!isPre && 1622 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1623 Match = true; 1624 Opcode = ARM::LDRB_POST_IMM; 1625 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1626 Match = true; 1627 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1628 } 1629 } 1630 } 1631 1632 if (Match) { 1633 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1634 SDValue Chain = LD->getChain(); 1635 SDValue Base = LD->getBasePtr(); 1636 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1637 CurDAG->getRegister(0, MVT::i32), Chain }; 1638 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1639 MVT::Other, Ops); 1640 transferMemOperands(N, New); 1641 ReplaceNode(N, New); 1642 return true; 1643 } else { 1644 SDValue Chain = LD->getChain(); 1645 SDValue Base = LD->getBasePtr(); 1646 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1647 CurDAG->getRegister(0, MVT::i32), Chain }; 1648 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1649 MVT::Other, Ops); 1650 transferMemOperands(N, New); 1651 ReplaceNode(N, New); 1652 return true; 1653 } 1654 } 1655 1656 return false; 1657 } 1658 1659 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { 1660 LoadSDNode *LD = cast<LoadSDNode>(N); 1661 EVT LoadedVT = LD->getMemoryVT(); 1662 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1663 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || 1664 LoadedVT.getSimpleVT().SimpleTy != MVT::i32) 1665 return false; 1666 1667 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); 1668 if (!COffs || COffs->getZExtValue() != 4) 1669 return false; 1670 1671 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. 1672 // The encoding of LDM is not how the rest of ISel expects a post-inc load to 1673 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after 1674 // ISel. 1675 SDValue Chain = LD->getChain(); 1676 SDValue Base = LD->getBasePtr(); 1677 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), 1678 CurDAG->getRegister(0, MVT::i32), Chain }; 1679 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, 1680 MVT::i32, MVT::Other, Ops); 1681 transferMemOperands(N, New); 1682 ReplaceNode(N, New); 1683 return true; 1684 } 1685 1686 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1687 LoadSDNode *LD = cast<LoadSDNode>(N); 1688 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1689 if (AM == ISD::UNINDEXED) 1690 return false; 1691 1692 EVT LoadedVT = LD->getMemoryVT(); 1693 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1694 SDValue Offset; 1695 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1696 unsigned Opcode = 0; 1697 bool Match = false; 1698 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1699 switch (LoadedVT.getSimpleVT().SimpleTy) { 1700 case MVT::i32: 1701 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1702 break; 1703 case MVT::i16: 1704 if (isSExtLd) 1705 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1706 else 1707 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1708 break; 1709 case MVT::i8: 1710 case MVT::i1: 1711 if (isSExtLd) 1712 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1713 else 1714 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1715 break; 1716 default: 1717 return false; 1718 } 1719 Match = true; 1720 } 1721 1722 if (Match) { 1723 SDValue Chain = LD->getChain(); 1724 SDValue Base = LD->getBasePtr(); 1725 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1726 CurDAG->getRegister(0, MVT::i32), Chain }; 1727 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1728 MVT::Other, Ops); 1729 transferMemOperands(N, New); 1730 ReplaceNode(N, New); 1731 return true; 1732 } 1733 1734 return false; 1735 } 1736 1737 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { 1738 EVT LoadedVT; 1739 unsigned Opcode = 0; 1740 bool isSExtLd, isPre; 1741 Align Alignment; 1742 ARMVCC::VPTCodes Pred; 1743 SDValue PredReg; 1744 SDValue Chain, Base, Offset; 1745 1746 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1747 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1748 if (AM == ISD::UNINDEXED) 1749 return false; 1750 LoadedVT = LD->getMemoryVT(); 1751 if (!LoadedVT.isVector()) 1752 return false; 1753 1754 Chain = LD->getChain(); 1755 Base = LD->getBasePtr(); 1756 Offset = LD->getOffset(); 1757 Alignment = LD->getAlign(); 1758 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1759 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1760 Pred = ARMVCC::None; 1761 PredReg = CurDAG->getRegister(0, MVT::i32); 1762 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { 1763 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1764 if (AM == ISD::UNINDEXED) 1765 return false; 1766 LoadedVT = LD->getMemoryVT(); 1767 if (!LoadedVT.isVector()) 1768 return false; 1769 1770 Chain = LD->getChain(); 1771 Base = LD->getBasePtr(); 1772 Offset = LD->getOffset(); 1773 Alignment = LD->getAlign(); 1774 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1775 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1776 Pred = ARMVCC::Then; 1777 PredReg = LD->getMask(); 1778 } else 1779 llvm_unreachable("Expected a Load or a Masked Load!"); 1780 1781 // We allow LE non-masked loads to change the type (for example use a vldrb.8 1782 // as opposed to a vldrw.32). This can allow extra addressing modes or 1783 // alignments for what is otherwise an equivalent instruction. 1784 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N); 1785 1786 SDValue NewOffset; 1787 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 && 1788 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) { 1789 if (isSExtLd) 1790 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; 1791 else 1792 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; 1793 } else if (LoadedVT == MVT::v8i8 && 1794 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1795 if (isSExtLd) 1796 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; 1797 else 1798 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; 1799 } else if (LoadedVT == MVT::v4i8 && 1800 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1801 if (isSExtLd) 1802 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; 1803 else 1804 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; 1805 } else if (Alignment >= Align(4) && 1806 (CanChangeType || LoadedVT == MVT::v4i32 || 1807 LoadedVT == MVT::v4f32) && 1808 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2)) 1809 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; 1810 else if (Alignment >= Align(2) && 1811 (CanChangeType || LoadedVT == MVT::v8i16 || 1812 LoadedVT == MVT::v8f16) && 1813 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) 1814 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; 1815 else if ((CanChangeType || LoadedVT == MVT::v16i8) && 1816 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) 1817 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; 1818 else 1819 return false; 1820 1821 SDValue Ops[] = {Base, NewOffset, 1822 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg, 1823 Chain}; 1824 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, 1825 N->getValueType(0), MVT::Other, Ops); 1826 transferMemOperands(N, New); 1827 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 1828 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 1829 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 1830 CurDAG->RemoveDeadNode(N); 1831 return true; 1832 } 1833 1834 /// Form a GPRPair pseudo register from a pair of GPR regs. 1835 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1836 SDLoc dl(V0.getNode()); 1837 SDValue RegClass = 1838 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1839 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1840 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1841 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1842 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1843 } 1844 1845 /// Form a D register from a pair of S registers. 1846 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1847 SDLoc dl(V0.getNode()); 1848 SDValue RegClass = 1849 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1850 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1851 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1852 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1853 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1854 } 1855 1856 /// Form a quad register from a pair of D registers. 1857 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1858 SDLoc dl(V0.getNode()); 1859 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1860 MVT::i32); 1861 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1862 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1863 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1864 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1865 } 1866 1867 /// Form 4 consecutive D registers from a pair of Q registers. 1868 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1869 SDLoc dl(V0.getNode()); 1870 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1871 MVT::i32); 1872 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1873 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1874 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1875 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1876 } 1877 1878 /// Form 4 consecutive S registers. 1879 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1880 SDValue V2, SDValue V3) { 1881 SDLoc dl(V0.getNode()); 1882 SDValue RegClass = 1883 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1884 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1885 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1886 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1887 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1888 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1889 V2, SubReg2, V3, SubReg3 }; 1890 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1891 } 1892 1893 /// Form 4 consecutive D registers. 1894 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1895 SDValue V2, SDValue V3) { 1896 SDLoc dl(V0.getNode()); 1897 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1898 MVT::i32); 1899 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1900 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1901 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1902 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1903 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1904 V2, SubReg2, V3, SubReg3 }; 1905 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1906 } 1907 1908 /// Form 4 consecutive Q registers. 1909 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1910 SDValue V2, SDValue V3) { 1911 SDLoc dl(V0.getNode()); 1912 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1913 MVT::i32); 1914 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1915 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1916 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1917 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1918 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1919 V2, SubReg2, V3, SubReg3 }; 1920 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1921 } 1922 1923 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1924 /// of a NEON VLD or VST instruction. The supported values depend on the 1925 /// number of registers being loaded. 1926 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1927 unsigned NumVecs, bool is64BitVector) { 1928 unsigned NumRegs = NumVecs; 1929 if (!is64BitVector && NumVecs < 3) 1930 NumRegs *= 2; 1931 1932 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 1933 if (Alignment >= 32 && NumRegs == 4) 1934 Alignment = 32; 1935 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1936 Alignment = 16; 1937 else if (Alignment >= 8) 1938 Alignment = 8; 1939 else 1940 Alignment = 0; 1941 1942 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1943 } 1944 1945 static bool isVLDfixed(unsigned Opc) 1946 { 1947 switch (Opc) { 1948 default: return false; 1949 case ARM::VLD1d8wb_fixed : return true; 1950 case ARM::VLD1d16wb_fixed : return true; 1951 case ARM::VLD1d64Qwb_fixed : return true; 1952 case ARM::VLD1d32wb_fixed : return true; 1953 case ARM::VLD1d64wb_fixed : return true; 1954 case ARM::VLD1d8TPseudoWB_fixed : return true; 1955 case ARM::VLD1d16TPseudoWB_fixed : return true; 1956 case ARM::VLD1d32TPseudoWB_fixed : return true; 1957 case ARM::VLD1d64TPseudoWB_fixed : return true; 1958 case ARM::VLD1d8QPseudoWB_fixed : return true; 1959 case ARM::VLD1d16QPseudoWB_fixed : return true; 1960 case ARM::VLD1d32QPseudoWB_fixed : return true; 1961 case ARM::VLD1d64QPseudoWB_fixed : return true; 1962 case ARM::VLD1q8wb_fixed : return true; 1963 case ARM::VLD1q16wb_fixed : return true; 1964 case ARM::VLD1q32wb_fixed : return true; 1965 case ARM::VLD1q64wb_fixed : return true; 1966 case ARM::VLD1DUPd8wb_fixed : return true; 1967 case ARM::VLD1DUPd16wb_fixed : return true; 1968 case ARM::VLD1DUPd32wb_fixed : return true; 1969 case ARM::VLD1DUPq8wb_fixed : return true; 1970 case ARM::VLD1DUPq16wb_fixed : return true; 1971 case ARM::VLD1DUPq32wb_fixed : return true; 1972 case ARM::VLD2d8wb_fixed : return true; 1973 case ARM::VLD2d16wb_fixed : return true; 1974 case ARM::VLD2d32wb_fixed : return true; 1975 case ARM::VLD2q8PseudoWB_fixed : return true; 1976 case ARM::VLD2q16PseudoWB_fixed : return true; 1977 case ARM::VLD2q32PseudoWB_fixed : return true; 1978 case ARM::VLD2DUPd8wb_fixed : return true; 1979 case ARM::VLD2DUPd16wb_fixed : return true; 1980 case ARM::VLD2DUPd32wb_fixed : return true; 1981 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true; 1982 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true; 1983 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true; 1984 } 1985 } 1986 1987 static bool isVSTfixed(unsigned Opc) 1988 { 1989 switch (Opc) { 1990 default: return false; 1991 case ARM::VST1d8wb_fixed : return true; 1992 case ARM::VST1d16wb_fixed : return true; 1993 case ARM::VST1d32wb_fixed : return true; 1994 case ARM::VST1d64wb_fixed : return true; 1995 case ARM::VST1q8wb_fixed : return true; 1996 case ARM::VST1q16wb_fixed : return true; 1997 case ARM::VST1q32wb_fixed : return true; 1998 case ARM::VST1q64wb_fixed : return true; 1999 case ARM::VST1d8TPseudoWB_fixed : return true; 2000 case ARM::VST1d16TPseudoWB_fixed : return true; 2001 case ARM::VST1d32TPseudoWB_fixed : return true; 2002 case ARM::VST1d64TPseudoWB_fixed : return true; 2003 case ARM::VST1d8QPseudoWB_fixed : return true; 2004 case ARM::VST1d16QPseudoWB_fixed : return true; 2005 case ARM::VST1d32QPseudoWB_fixed : return true; 2006 case ARM::VST1d64QPseudoWB_fixed : return true; 2007 case ARM::VST2d8wb_fixed : return true; 2008 case ARM::VST2d16wb_fixed : return true; 2009 case ARM::VST2d32wb_fixed : return true; 2010 case ARM::VST2q8PseudoWB_fixed : return true; 2011 case ARM::VST2q16PseudoWB_fixed : return true; 2012 case ARM::VST2q32PseudoWB_fixed : return true; 2013 } 2014 } 2015 2016 // Get the register stride update opcode of a VLD/VST instruction that 2017 // is otherwise equivalent to the given fixed stride updating instruction. 2018 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 2019 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 2020 && "Incorrect fixed stride updating instruction."); 2021 switch (Opc) { 2022 default: break; 2023 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 2024 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 2025 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 2026 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 2027 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 2028 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 2029 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 2030 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 2031 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 2032 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 2033 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register; 2034 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register; 2035 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register; 2036 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 2037 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register; 2038 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register; 2039 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register; 2040 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 2041 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; 2042 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; 2043 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; 2044 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; 2045 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; 2046 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; 2047 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register; 2048 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register; 2049 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register; 2050 2051 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 2052 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 2053 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 2054 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 2055 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 2056 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 2057 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 2058 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 2059 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register; 2060 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register; 2061 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register; 2062 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 2063 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register; 2064 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register; 2065 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register; 2066 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 2067 2068 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 2069 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 2070 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 2071 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 2072 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 2073 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 2074 2075 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 2076 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 2077 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 2078 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 2079 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 2080 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 2081 2082 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 2083 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 2084 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 2085 } 2086 return Opc; // If not one we handle, return it unchanged. 2087 } 2088 2089 /// Returns true if the given increment is a Constant known to be equal to the 2090 /// access size performed by a NEON load/store. This means the "[rN]!" form can 2091 /// be used. 2092 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { 2093 auto C = dyn_cast<ConstantSDNode>(Inc); 2094 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; 2095 } 2096 2097 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 2098 const uint16_t *DOpcodes, 2099 const uint16_t *QOpcodes0, 2100 const uint16_t *QOpcodes1) { 2101 assert(Subtarget->hasNEON()); 2102 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 2103 SDLoc dl(N); 2104 2105 SDValue MemAddr, Align; 2106 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2107 // nodes are not intrinsics. 2108 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2109 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2110 return; 2111 2112 SDValue Chain = N->getOperand(0); 2113 EVT VT = N->getValueType(0); 2114 bool is64BitVector = VT.is64BitVector(); 2115 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2116 2117 unsigned OpcodeIndex; 2118 switch (VT.getSimpleVT().SimpleTy) { 2119 default: llvm_unreachable("unhandled vld type"); 2120 // Double-register operations: 2121 case MVT::v8i8: OpcodeIndex = 0; break; 2122 case MVT::v4f16: 2123 case MVT::v4bf16: 2124 case MVT::v4i16: OpcodeIndex = 1; break; 2125 case MVT::v2f32: 2126 case MVT::v2i32: OpcodeIndex = 2; break; 2127 case MVT::v1i64: OpcodeIndex = 3; break; 2128 // Quad-register operations: 2129 case MVT::v16i8: OpcodeIndex = 0; break; 2130 case MVT::v8f16: 2131 case MVT::v8bf16: 2132 case MVT::v8i16: OpcodeIndex = 1; break; 2133 case MVT::v4f32: 2134 case MVT::v4i32: OpcodeIndex = 2; break; 2135 case MVT::v2f64: 2136 case MVT::v2i64: OpcodeIndex = 3; break; 2137 } 2138 2139 EVT ResTy; 2140 if (NumVecs == 1) 2141 ResTy = VT; 2142 else { 2143 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2144 if (!is64BitVector) 2145 ResTyElts *= 2; 2146 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2147 } 2148 std::vector<EVT> ResTys; 2149 ResTys.push_back(ResTy); 2150 if (isUpdating) 2151 ResTys.push_back(MVT::i32); 2152 ResTys.push_back(MVT::Other); 2153 2154 SDValue Pred = getAL(CurDAG, dl); 2155 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2156 SDNode *VLd; 2157 SmallVector<SDValue, 7> Ops; 2158 2159 // Double registers and VLD1/VLD2 quad registers are directly supported. 2160 if (is64BitVector || NumVecs <= 2) { 2161 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2162 QOpcodes0[OpcodeIndex]); 2163 Ops.push_back(MemAddr); 2164 Ops.push_back(Align); 2165 if (isUpdating) { 2166 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2167 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2168 if (!IsImmUpdate) { 2169 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 2170 // check for the opcode rather than the number of vector elements. 2171 if (isVLDfixed(Opc)) 2172 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2173 Ops.push_back(Inc); 2174 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in 2175 // the operands if not such an opcode. 2176 } else if (!isVLDfixed(Opc)) 2177 Ops.push_back(Reg0); 2178 } 2179 Ops.push_back(Pred); 2180 Ops.push_back(Reg0); 2181 Ops.push_back(Chain); 2182 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2183 2184 } else { 2185 // Otherwise, quad registers are loaded with two separate instructions, 2186 // where one loads the even registers and the other loads the odd registers. 2187 EVT AddrTy = MemAddr.getValueType(); 2188 2189 // Load the even subregs. This is always an updating load, so that it 2190 // provides the address to the second load for the odd subregs. 2191 SDValue ImplDef = 2192 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2193 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 2194 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2195 ResTy, AddrTy, MVT::Other, OpsA); 2196 Chain = SDValue(VLdA, 2); 2197 2198 // Load the odd subregs. 2199 Ops.push_back(SDValue(VLdA, 1)); 2200 Ops.push_back(Align); 2201 if (isUpdating) { 2202 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2203 assert(isa<ConstantSDNode>(Inc.getNode()) && 2204 "only constant post-increment update allowed for VLD3/4"); 2205 (void)Inc; 2206 Ops.push_back(Reg0); 2207 } 2208 Ops.push_back(SDValue(VLdA, 0)); 2209 Ops.push_back(Pred); 2210 Ops.push_back(Reg0); 2211 Ops.push_back(Chain); 2212 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 2213 } 2214 2215 // Transfer memoperands. 2216 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2217 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp}); 2218 2219 if (NumVecs == 1) { 2220 ReplaceNode(N, VLd); 2221 return; 2222 } 2223 2224 // Extract out the subregisters. 2225 SDValue SuperReg = SDValue(VLd, 0); 2226 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2227 ARM::qsub_3 == ARM::qsub_0 + 3, 2228 "Unexpected subreg numbering"); 2229 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 2230 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2231 ReplaceUses(SDValue(N, Vec), 2232 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2233 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 2234 if (isUpdating) 2235 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 2236 CurDAG->RemoveDeadNode(N); 2237 } 2238 2239 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 2240 const uint16_t *DOpcodes, 2241 const uint16_t *QOpcodes0, 2242 const uint16_t *QOpcodes1) { 2243 assert(Subtarget->hasNEON()); 2244 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 2245 SDLoc dl(N); 2246 2247 SDValue MemAddr, Align; 2248 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2249 // nodes are not intrinsics. 2250 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2251 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2252 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2253 return; 2254 2255 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2256 2257 SDValue Chain = N->getOperand(0); 2258 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2259 bool is64BitVector = VT.is64BitVector(); 2260 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2261 2262 unsigned OpcodeIndex; 2263 switch (VT.getSimpleVT().SimpleTy) { 2264 default: llvm_unreachable("unhandled vst type"); 2265 // Double-register operations: 2266 case MVT::v8i8: OpcodeIndex = 0; break; 2267 case MVT::v4f16: 2268 case MVT::v4bf16: 2269 case MVT::v4i16: OpcodeIndex = 1; break; 2270 case MVT::v2f32: 2271 case MVT::v2i32: OpcodeIndex = 2; break; 2272 case MVT::v1i64: OpcodeIndex = 3; break; 2273 // Quad-register operations: 2274 case MVT::v16i8: OpcodeIndex = 0; break; 2275 case MVT::v8f16: 2276 case MVT::v8bf16: 2277 case MVT::v8i16: OpcodeIndex = 1; break; 2278 case MVT::v4f32: 2279 case MVT::v4i32: OpcodeIndex = 2; break; 2280 case MVT::v2f64: 2281 case MVT::v2i64: OpcodeIndex = 3; break; 2282 } 2283 2284 std::vector<EVT> ResTys; 2285 if (isUpdating) 2286 ResTys.push_back(MVT::i32); 2287 ResTys.push_back(MVT::Other); 2288 2289 SDValue Pred = getAL(CurDAG, dl); 2290 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2291 SmallVector<SDValue, 7> Ops; 2292 2293 // Double registers and VST1/VST2 quad registers are directly supported. 2294 if (is64BitVector || NumVecs <= 2) { 2295 SDValue SrcReg; 2296 if (NumVecs == 1) { 2297 SrcReg = N->getOperand(Vec0Idx); 2298 } else if (is64BitVector) { 2299 // Form a REG_SEQUENCE to force register allocation. 2300 SDValue V0 = N->getOperand(Vec0Idx + 0); 2301 SDValue V1 = N->getOperand(Vec0Idx + 1); 2302 if (NumVecs == 2) 2303 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2304 else { 2305 SDValue V2 = N->getOperand(Vec0Idx + 2); 2306 // If it's a vst3, form a quad D-register and leave the last part as 2307 // an undef. 2308 SDValue V3 = (NumVecs == 3) 2309 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2310 : N->getOperand(Vec0Idx + 3); 2311 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2312 } 2313 } else { 2314 // Form a QQ register. 2315 SDValue Q0 = N->getOperand(Vec0Idx); 2316 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2317 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2318 } 2319 2320 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2321 QOpcodes0[OpcodeIndex]); 2322 Ops.push_back(MemAddr); 2323 Ops.push_back(Align); 2324 if (isUpdating) { 2325 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2326 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2327 if (!IsImmUpdate) { 2328 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so 2329 // check for the opcode rather than the number of vector elements. 2330 if (isVSTfixed(Opc)) 2331 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2332 Ops.push_back(Inc); 2333 } 2334 // VST1/VST2 fixed increment does not need Reg0 so only include it in 2335 // the operands if not such an opcode. 2336 else if (!isVSTfixed(Opc)) 2337 Ops.push_back(Reg0); 2338 } 2339 Ops.push_back(SrcReg); 2340 Ops.push_back(Pred); 2341 Ops.push_back(Reg0); 2342 Ops.push_back(Chain); 2343 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2344 2345 // Transfer memoperands. 2346 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp}); 2347 2348 ReplaceNode(N, VSt); 2349 return; 2350 } 2351 2352 // Otherwise, quad registers are stored with two separate instructions, 2353 // where one stores the even registers and the other stores the odd registers. 2354 2355 // Form the QQQQ REG_SEQUENCE. 2356 SDValue V0 = N->getOperand(Vec0Idx + 0); 2357 SDValue V1 = N->getOperand(Vec0Idx + 1); 2358 SDValue V2 = N->getOperand(Vec0Idx + 2); 2359 SDValue V3 = (NumVecs == 3) 2360 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2361 : N->getOperand(Vec0Idx + 3); 2362 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2363 2364 // Store the even D registers. This is always an updating store, so that it 2365 // provides the address to the second store for the odd subregs. 2366 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2367 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2368 MemAddr.getValueType(), 2369 MVT::Other, OpsA); 2370 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp}); 2371 Chain = SDValue(VStA, 1); 2372 2373 // Store the odd D registers. 2374 Ops.push_back(SDValue(VStA, 0)); 2375 Ops.push_back(Align); 2376 if (isUpdating) { 2377 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2378 assert(isa<ConstantSDNode>(Inc.getNode()) && 2379 "only constant post-increment update allowed for VST3/4"); 2380 (void)Inc; 2381 Ops.push_back(Reg0); 2382 } 2383 Ops.push_back(RegSeq); 2384 Ops.push_back(Pred); 2385 Ops.push_back(Reg0); 2386 Ops.push_back(Chain); 2387 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2388 Ops); 2389 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp}); 2390 ReplaceNode(N, VStB); 2391 } 2392 2393 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2394 unsigned NumVecs, 2395 const uint16_t *DOpcodes, 2396 const uint16_t *QOpcodes) { 2397 assert(Subtarget->hasNEON()); 2398 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2399 SDLoc dl(N); 2400 2401 SDValue MemAddr, Align; 2402 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2403 // nodes are not intrinsics. 2404 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2405 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2406 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2407 return; 2408 2409 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2410 2411 SDValue Chain = N->getOperand(0); 2412 unsigned Lane = 2413 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); 2414 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2415 bool is64BitVector = VT.is64BitVector(); 2416 2417 unsigned Alignment = 0; 2418 if (NumVecs != 3) { 2419 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2420 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2421 if (Alignment > NumBytes) 2422 Alignment = NumBytes; 2423 if (Alignment < 8 && Alignment < NumBytes) 2424 Alignment = 0; 2425 // Alignment must be a power of two; make sure of that. 2426 Alignment = (Alignment & -Alignment); 2427 if (Alignment == 1) 2428 Alignment = 0; 2429 } 2430 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2431 2432 unsigned OpcodeIndex; 2433 switch (VT.getSimpleVT().SimpleTy) { 2434 default: llvm_unreachable("unhandled vld/vst lane type"); 2435 // Double-register operations: 2436 case MVT::v8i8: OpcodeIndex = 0; break; 2437 case MVT::v4f16: 2438 case MVT::v4bf16: 2439 case MVT::v4i16: OpcodeIndex = 1; break; 2440 case MVT::v2f32: 2441 case MVT::v2i32: OpcodeIndex = 2; break; 2442 // Quad-register operations: 2443 case MVT::v8f16: 2444 case MVT::v8bf16: 2445 case MVT::v8i16: OpcodeIndex = 0; break; 2446 case MVT::v4f32: 2447 case MVT::v4i32: OpcodeIndex = 1; break; 2448 } 2449 2450 std::vector<EVT> ResTys; 2451 if (IsLoad) { 2452 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2453 if (!is64BitVector) 2454 ResTyElts *= 2; 2455 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2456 MVT::i64, ResTyElts)); 2457 } 2458 if (isUpdating) 2459 ResTys.push_back(MVT::i32); 2460 ResTys.push_back(MVT::Other); 2461 2462 SDValue Pred = getAL(CurDAG, dl); 2463 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2464 2465 SmallVector<SDValue, 8> Ops; 2466 Ops.push_back(MemAddr); 2467 Ops.push_back(Align); 2468 if (isUpdating) { 2469 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2470 bool IsImmUpdate = 2471 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2472 Ops.push_back(IsImmUpdate ? Reg0 : Inc); 2473 } 2474 2475 SDValue SuperReg; 2476 SDValue V0 = N->getOperand(Vec0Idx + 0); 2477 SDValue V1 = N->getOperand(Vec0Idx + 1); 2478 if (NumVecs == 2) { 2479 if (is64BitVector) 2480 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2481 else 2482 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2483 } else { 2484 SDValue V2 = N->getOperand(Vec0Idx + 2); 2485 SDValue V3 = (NumVecs == 3) 2486 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2487 : N->getOperand(Vec0Idx + 3); 2488 if (is64BitVector) 2489 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2490 else 2491 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2492 } 2493 Ops.push_back(SuperReg); 2494 Ops.push_back(getI32Imm(Lane, dl)); 2495 Ops.push_back(Pred); 2496 Ops.push_back(Reg0); 2497 Ops.push_back(Chain); 2498 2499 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2500 QOpcodes[OpcodeIndex]); 2501 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2502 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp}); 2503 if (!IsLoad) { 2504 ReplaceNode(N, VLdLn); 2505 return; 2506 } 2507 2508 // Extract the subregisters. 2509 SuperReg = SDValue(VLdLn, 0); 2510 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2511 ARM::qsub_3 == ARM::qsub_0 + 3, 2512 "Unexpected subreg numbering"); 2513 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2514 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2515 ReplaceUses(SDValue(N, Vec), 2516 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2517 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2518 if (isUpdating) 2519 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2520 CurDAG->RemoveDeadNode(N); 2521 } 2522 2523 template <typename SDValueVector> 2524 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2525 SDValue PredicateMask) { 2526 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2527 Ops.push_back(PredicateMask); 2528 } 2529 2530 template <typename SDValueVector> 2531 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2532 SDValue PredicateMask, 2533 SDValue Inactive) { 2534 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2535 Ops.push_back(PredicateMask); 2536 Ops.push_back(Inactive); 2537 } 2538 2539 template <typename SDValueVector> 2540 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) { 2541 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2542 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2543 } 2544 2545 template <typename SDValueVector> 2546 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2547 EVT InactiveTy) { 2548 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2549 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2550 Ops.push_back(SDValue( 2551 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0)); 2552 } 2553 2554 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, 2555 bool Predicated) { 2556 SDLoc Loc(N); 2557 SmallVector<SDValue, 8> Ops; 2558 2559 uint16_t Opcode; 2560 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) { 2561 case 32: 2562 Opcode = Opcodes[0]; 2563 break; 2564 case 64: 2565 Opcode = Opcodes[1]; 2566 break; 2567 default: 2568 llvm_unreachable("bad vector element size in SelectMVE_WB"); 2569 } 2570 2571 Ops.push_back(N->getOperand(2)); // vector of base addresses 2572 2573 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2574 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset 2575 2576 if (Predicated) 2577 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2578 else 2579 AddEmptyMVEPredicateToOps(Ops, Loc); 2580 2581 Ops.push_back(N->getOperand(0)); // chain 2582 2583 SmallVector<EVT, 8> VTs; 2584 VTs.push_back(N->getValueType(1)); 2585 VTs.push_back(N->getValueType(0)); 2586 VTs.push_back(N->getValueType(2)); 2587 2588 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops); 2589 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 2590 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 2591 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 2592 transferMemOperands(N, New); 2593 CurDAG->RemoveDeadNode(N); 2594 } 2595 2596 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, 2597 bool Immediate, 2598 bool HasSaturationOperand) { 2599 SDLoc Loc(N); 2600 SmallVector<SDValue, 8> Ops; 2601 2602 // Two 32-bit halves of the value to be shifted 2603 Ops.push_back(N->getOperand(1)); 2604 Ops.push_back(N->getOperand(2)); 2605 2606 // The shift count 2607 if (Immediate) { 2608 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2609 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2610 } else { 2611 Ops.push_back(N->getOperand(3)); 2612 } 2613 2614 // The immediate saturation operand, if any 2615 if (HasSaturationOperand) { 2616 int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(); 2617 int SatBit = (SatOp == 64 ? 0 : 1); 2618 Ops.push_back(getI32Imm(SatBit, Loc)); 2619 } 2620 2621 // MVE scalar shifts are IT-predicable, so include the standard 2622 // predicate arguments. 2623 Ops.push_back(getAL(CurDAG, Loc)); 2624 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2625 2626 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2627 } 2628 2629 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 2630 uint16_t OpcodeWithNoCarry, 2631 bool Add, bool Predicated) { 2632 SDLoc Loc(N); 2633 SmallVector<SDValue, 8> Ops; 2634 uint16_t Opcode; 2635 2636 unsigned FirstInputOp = Predicated ? 2 : 1; 2637 2638 // Two input vectors and the input carry flag 2639 Ops.push_back(N->getOperand(FirstInputOp)); 2640 Ops.push_back(N->getOperand(FirstInputOp + 1)); 2641 SDValue CarryIn = N->getOperand(FirstInputOp + 2); 2642 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn); 2643 uint32_t CarryMask = 1 << 29; 2644 uint32_t CarryExpected = Add ? 0 : CarryMask; 2645 if (CarryInConstant && 2646 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) { 2647 Opcode = OpcodeWithNoCarry; 2648 } else { 2649 Ops.push_back(CarryIn); 2650 Opcode = OpcodeWithCarry; 2651 } 2652 2653 if (Predicated) 2654 AddMVEPredicateToOps(Ops, Loc, 2655 N->getOperand(FirstInputOp + 3), // predicate 2656 N->getOperand(FirstInputOp - 1)); // inactive 2657 else 2658 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2659 2660 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2661 } 2662 2663 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) { 2664 SDLoc Loc(N); 2665 SmallVector<SDValue, 8> Ops; 2666 2667 // One vector input, followed by a 32-bit word of bits to shift in 2668 // and then an immediate shift count 2669 Ops.push_back(N->getOperand(1)); 2670 Ops.push_back(N->getOperand(2)); 2671 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2672 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2673 2674 if (Predicated) 2675 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2676 else 2677 AddEmptyMVEPredicateToOps(Ops, Loc); 2678 2679 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), makeArrayRef(Ops)); 2680 } 2681 2682 static bool SDValueToConstBool(SDValue SDVal) { 2683 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant"); 2684 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal); 2685 uint64_t Value = SDValConstant->getZExtValue(); 2686 assert((Value == 0 || Value == 1) && "expected value 0 or 1"); 2687 return Value; 2688 } 2689 2690 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 2691 const uint16_t *OpcodesS, 2692 const uint16_t *OpcodesU, 2693 size_t Stride, size_t TySize) { 2694 assert(TySize < Stride && "Invalid TySize"); 2695 bool IsUnsigned = SDValueToConstBool(N->getOperand(1)); 2696 bool IsSub = SDValueToConstBool(N->getOperand(2)); 2697 bool IsExchange = SDValueToConstBool(N->getOperand(3)); 2698 if (IsUnsigned) { 2699 assert(!IsSub && 2700 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist"); 2701 assert(!IsExchange && 2702 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist"); 2703 } 2704 2705 auto OpIsZero = [N](size_t OpNo) { 2706 if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo))) 2707 if (OpConst->getZExtValue() == 0) 2708 return true; 2709 return false; 2710 }; 2711 2712 // If the input accumulator value is not zero, select an instruction with 2713 // accumulator, otherwise select an instruction without accumulator 2714 bool IsAccum = !(OpIsZero(4) && OpIsZero(5)); 2715 2716 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS; 2717 if (IsSub) 2718 Opcodes += 4 * Stride; 2719 if (IsExchange) 2720 Opcodes += 2 * Stride; 2721 if (IsAccum) 2722 Opcodes += Stride; 2723 uint16_t Opcode = Opcodes[TySize]; 2724 2725 SDLoc Loc(N); 2726 SmallVector<SDValue, 8> Ops; 2727 // Push the accumulator operands, if they are used 2728 if (IsAccum) { 2729 Ops.push_back(N->getOperand(4)); 2730 Ops.push_back(N->getOperand(5)); 2731 } 2732 // Push the two vector operands 2733 Ops.push_back(N->getOperand(6)); 2734 Ops.push_back(N->getOperand(7)); 2735 2736 if (Predicated) 2737 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8)); 2738 else 2739 AddEmptyMVEPredicateToOps(Ops, Loc); 2740 2741 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2742 } 2743 2744 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated, 2745 const uint16_t *OpcodesS, 2746 const uint16_t *OpcodesU) { 2747 EVT VecTy = N->getOperand(6).getValueType(); 2748 size_t SizeIndex; 2749 switch (VecTy.getVectorElementType().getSizeInBits()) { 2750 case 16: 2751 SizeIndex = 0; 2752 break; 2753 case 32: 2754 SizeIndex = 1; 2755 break; 2756 default: 2757 llvm_unreachable("bad vector element size"); 2758 } 2759 2760 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex); 2761 } 2762 2763 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, 2764 const uint16_t *OpcodesS, 2765 const uint16_t *OpcodesU) { 2766 assert( 2767 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() == 2768 32 && 2769 "bad vector element size"); 2770 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0); 2771 } 2772 2773 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs, 2774 const uint16_t *const *Opcodes, 2775 bool HasWriteback) { 2776 EVT VT = N->getValueType(0); 2777 SDLoc Loc(N); 2778 2779 const uint16_t *OurOpcodes; 2780 switch (VT.getVectorElementType().getSizeInBits()) { 2781 case 8: 2782 OurOpcodes = Opcodes[0]; 2783 break; 2784 case 16: 2785 OurOpcodes = Opcodes[1]; 2786 break; 2787 case 32: 2788 OurOpcodes = Opcodes[2]; 2789 break; 2790 default: 2791 llvm_unreachable("bad vector element size in SelectMVE_VLD"); 2792 } 2793 2794 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2); 2795 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other}; 2796 unsigned PtrOperand = HasWriteback ? 1 : 2; 2797 2798 auto Data = SDValue( 2799 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0); 2800 SDValue Chain = N->getOperand(0); 2801 // Add a MVE_VLDn instruction for each Vec, except the last 2802 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) { 2803 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2804 auto LoadInst = 2805 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops); 2806 Data = SDValue(LoadInst, 0); 2807 Chain = SDValue(LoadInst, 1); 2808 transferMemOperands(N, LoadInst); 2809 } 2810 // The last may need a writeback on it 2811 if (HasWriteback) 2812 ResultTys = {DataTy, MVT::i32, MVT::Other}; 2813 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2814 auto LoadInst = 2815 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops); 2816 transferMemOperands(N, LoadInst); 2817 2818 unsigned i; 2819 for (i = 0; i < NumVecs; i++) 2820 ReplaceUses(SDValue(N, i), 2821 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, 2822 SDValue(LoadInst, 0))); 2823 if (HasWriteback) 2824 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1)); 2825 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1)); 2826 CurDAG->RemoveDeadNode(N); 2827 } 2828 2829 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 2830 bool Wrapping, bool Predicated) { 2831 EVT VT = N->getValueType(0); 2832 SDLoc Loc(N); 2833 2834 uint16_t Opcode; 2835 switch (VT.getScalarSizeInBits()) { 2836 case 8: 2837 Opcode = Opcodes[0]; 2838 break; 2839 case 16: 2840 Opcode = Opcodes[1]; 2841 break; 2842 case 32: 2843 Opcode = Opcodes[2]; 2844 break; 2845 default: 2846 llvm_unreachable("bad vector element size in SelectMVE_VxDUP"); 2847 } 2848 2849 SmallVector<SDValue, 8> Ops; 2850 unsigned OpIdx = 1; 2851 2852 SDValue Inactive; 2853 if (Predicated) 2854 Inactive = N->getOperand(OpIdx++); 2855 2856 Ops.push_back(N->getOperand(OpIdx++)); // base 2857 if (Wrapping) 2858 Ops.push_back(N->getOperand(OpIdx++)); // limit 2859 2860 SDValue ImmOp = N->getOperand(OpIdx++); // step 2861 int ImmValue = cast<ConstantSDNode>(ImmOp)->getZExtValue(); 2862 Ops.push_back(getI32Imm(ImmValue, Loc)); 2863 2864 if (Predicated) 2865 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive); 2866 else 2867 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2868 2869 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2870 } 2871 2872 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode, 2873 size_t NumExtraOps, bool HasAccum) { 2874 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian(); 2875 SDLoc Loc(N); 2876 SmallVector<SDValue, 8> Ops; 2877 2878 unsigned OpIdx = 1; 2879 2880 // Convert and append the immediate operand designating the coprocessor. 2881 SDValue ImmCorpoc = N->getOperand(OpIdx++); 2882 uint32_t ImmCoprocVal = cast<ConstantSDNode>(ImmCorpoc)->getZExtValue(); 2883 Ops.push_back(getI32Imm(ImmCoprocVal, Loc)); 2884 2885 // For accumulating variants copy the low and high order parts of the 2886 // accumulator into a register pair and add it to the operand vector. 2887 if (HasAccum) { 2888 SDValue AccLo = N->getOperand(OpIdx++); 2889 SDValue AccHi = N->getOperand(OpIdx++); 2890 if (IsBigEndian) 2891 std::swap(AccLo, AccHi); 2892 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0)); 2893 } 2894 2895 // Copy extra operands as-is. 2896 for (size_t I = 0; I < NumExtraOps; I++) 2897 Ops.push_back(N->getOperand(OpIdx++)); 2898 2899 // Convert and append the immediate operand 2900 SDValue Imm = N->getOperand(OpIdx); 2901 uint32_t ImmVal = cast<ConstantSDNode>(Imm)->getZExtValue(); 2902 Ops.push_back(getI32Imm(ImmVal, Loc)); 2903 2904 // Accumulating variants are IT-predicable, add predicate operands. 2905 if (HasAccum) { 2906 SDValue Pred = getAL(CurDAG, Loc); 2907 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2908 Ops.push_back(Pred); 2909 Ops.push_back(PredReg); 2910 } 2911 2912 // Create the CDE intruction 2913 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops); 2914 SDValue ResultPair = SDValue(InstrNode, 0); 2915 2916 // The original intrinsic had two outputs, and the output of the dual-register 2917 // CDE instruction is a register pair. We need to extract the two subregisters 2918 // and replace all uses of the original outputs with the extracted 2919 // subregisters. 2920 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1}; 2921 if (IsBigEndian) 2922 std::swap(SubRegs[0], SubRegs[1]); 2923 2924 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) { 2925 if (SDValue(N, ResIdx).use_empty()) 2926 continue; 2927 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc, 2928 MVT::i32, ResultPair); 2929 ReplaceUses(SDValue(N, ResIdx), SubReg); 2930 } 2931 2932 CurDAG->RemoveDeadNode(N); 2933 } 2934 2935 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, 2936 bool isUpdating, unsigned NumVecs, 2937 const uint16_t *DOpcodes, 2938 const uint16_t *QOpcodes0, 2939 const uint16_t *QOpcodes1) { 2940 assert(Subtarget->hasNEON()); 2941 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2942 SDLoc dl(N); 2943 2944 SDValue MemAddr, Align; 2945 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2946 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2947 return; 2948 2949 SDValue Chain = N->getOperand(0); 2950 EVT VT = N->getValueType(0); 2951 bool is64BitVector = VT.is64BitVector(); 2952 2953 unsigned Alignment = 0; 2954 if (NumVecs != 3) { 2955 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2956 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2957 if (Alignment > NumBytes) 2958 Alignment = NumBytes; 2959 if (Alignment < 8 && Alignment < NumBytes) 2960 Alignment = 0; 2961 // Alignment must be a power of two; make sure of that. 2962 Alignment = (Alignment & -Alignment); 2963 if (Alignment == 1) 2964 Alignment = 0; 2965 } 2966 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2967 2968 unsigned OpcodeIndex; 2969 switch (VT.getSimpleVT().SimpleTy) { 2970 default: llvm_unreachable("unhandled vld-dup type"); 2971 case MVT::v8i8: 2972 case MVT::v16i8: OpcodeIndex = 0; break; 2973 case MVT::v4i16: 2974 case MVT::v8i16: 2975 case MVT::v4f16: 2976 case MVT::v8f16: 2977 case MVT::v4bf16: 2978 case MVT::v8bf16: 2979 OpcodeIndex = 1; break; 2980 case MVT::v2f32: 2981 case MVT::v2i32: 2982 case MVT::v4f32: 2983 case MVT::v4i32: OpcodeIndex = 2; break; 2984 case MVT::v1f64: 2985 case MVT::v1i64: OpcodeIndex = 3; break; 2986 } 2987 2988 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2989 if (!is64BitVector) 2990 ResTyElts *= 2; 2991 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2992 2993 std::vector<EVT> ResTys; 2994 ResTys.push_back(ResTy); 2995 if (isUpdating) 2996 ResTys.push_back(MVT::i32); 2997 ResTys.push_back(MVT::Other); 2998 2999 SDValue Pred = getAL(CurDAG, dl); 3000 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3001 3002 SmallVector<SDValue, 6> Ops; 3003 Ops.push_back(MemAddr); 3004 Ops.push_back(Align); 3005 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] 3006 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex] 3007 : QOpcodes1[OpcodeIndex]; 3008 if (isUpdating) { 3009 SDValue Inc = N->getOperand(2); 3010 bool IsImmUpdate = 3011 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 3012 if (IsImmUpdate) { 3013 if (!isVLDfixed(Opc)) 3014 Ops.push_back(Reg0); 3015 } else { 3016 if (isVLDfixed(Opc)) 3017 Opc = getVLDSTRegisterUpdateOpcode(Opc); 3018 Ops.push_back(Inc); 3019 } 3020 } 3021 if (is64BitVector || NumVecs == 1) { 3022 // Double registers and VLD1 quad registers are directly supported. 3023 } else if (NumVecs == 2) { 3024 const SDValue OpsA[] = {MemAddr, Align, Pred, Reg0, Chain}; 3025 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy, 3026 MVT::Other, OpsA); 3027 Chain = SDValue(VLdA, 1); 3028 } else { 3029 SDValue ImplDef = SDValue( 3030 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 3031 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain}; 3032 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy, 3033 MVT::Other, OpsA); 3034 Ops.push_back(SDValue(VLdA, 0)); 3035 Chain = SDValue(VLdA, 1); 3036 } 3037 3038 Ops.push_back(Pred); 3039 Ops.push_back(Reg0); 3040 Ops.push_back(Chain); 3041 3042 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 3043 3044 // Transfer memoperands. 3045 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3046 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp}); 3047 3048 // Extract the subregisters. 3049 if (NumVecs == 1) { 3050 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); 3051 } else { 3052 SDValue SuperReg = SDValue(VLdDup, 0); 3053 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 3054 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 3055 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) { 3056 ReplaceUses(SDValue(N, Vec), 3057 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 3058 } 3059 } 3060 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 3061 if (isUpdating) 3062 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 3063 CurDAG->RemoveDeadNode(N); 3064 } 3065 3066 bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) { 3067 if (!Subtarget->hasMVEIntegerOps()) 3068 return false; 3069 3070 SDLoc dl(N); 3071 3072 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and 3073 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent 3074 // inserts of the correct type: 3075 SDValue Ins1 = SDValue(N, 0); 3076 SDValue Ins2 = N->getOperand(0); 3077 EVT VT = Ins1.getValueType(); 3078 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() || 3079 !isa<ConstantSDNode>(Ins1.getOperand(2)) || 3080 !isa<ConstantSDNode>(Ins2.getOperand(2)) || 3081 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT)) 3082 return false; 3083 3084 unsigned Lane1 = Ins1.getConstantOperandVal(2); 3085 unsigned Lane2 = Ins2.getConstantOperandVal(2); 3086 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1) 3087 return false; 3088 3089 // If the inserted values will be able to use T/B already, leave it to the 3090 // existing tablegen patterns. For example VCVTT/VCVTB. 3091 SDValue Val1 = Ins1.getOperand(1); 3092 SDValue Val2 = Ins2.getOperand(1); 3093 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND) 3094 return false; 3095 3096 // Check if the inserted values are both extracts. 3097 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3098 Val1.getOpcode() == ARMISD::VGETLANEu) && 3099 (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3100 Val2.getOpcode() == ARMISD::VGETLANEu) && 3101 isa<ConstantSDNode>(Val1.getOperand(1)) && 3102 isa<ConstantSDNode>(Val2.getOperand(1)) && 3103 (Val1.getOperand(0).getValueType() == MVT::v8f16 || 3104 Val1.getOperand(0).getValueType() == MVT::v8i16) && 3105 (Val2.getOperand(0).getValueType() == MVT::v8f16 || 3106 Val2.getOperand(0).getValueType() == MVT::v8i16)) { 3107 unsigned ExtractLane1 = Val1.getConstantOperandVal(1); 3108 unsigned ExtractLane2 = Val2.getConstantOperandVal(1); 3109 3110 // If the two extracted lanes are from the same place and adjacent, this 3111 // simplifies into a f32 lane move. 3112 if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 && 3113 ExtractLane1 == ExtractLane2 + 1) { 3114 SDValue NewExt = CurDAG->getTargetExtractSubreg( 3115 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0)); 3116 SDValue NewIns = CurDAG->getTargetInsertSubreg( 3117 ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0), 3118 NewExt); 3119 ReplaceUses(Ins1, NewIns); 3120 return true; 3121 } 3122 3123 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for 3124 // extracting odd lanes. 3125 if (VT == MVT::v8i16) { 3126 SDValue Inp1 = CurDAG->getTargetExtractSubreg( 3127 ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0)); 3128 SDValue Inp2 = CurDAG->getTargetExtractSubreg( 3129 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0)); 3130 if (ExtractLane1 % 2 != 0) 3131 Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0); 3132 if (ExtractLane2 % 2 != 0) 3133 Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0); 3134 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1); 3135 SDValue NewIns = 3136 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3137 Ins2.getOperand(0), SDValue(VINS, 0)); 3138 ReplaceUses(Ins1, NewIns); 3139 return true; 3140 } 3141 } 3142 3143 // The inserted values are not extracted - if they are f16 then insert them 3144 // directly using a VINS. 3145 if (VT == MVT::v8f16) { 3146 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1); 3147 SDValue NewIns = 3148 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3149 Ins2.getOperand(0), SDValue(VINS, 0)); 3150 ReplaceUses(Ins1, NewIns); 3151 return true; 3152 } 3153 3154 return false; 3155 } 3156 3157 bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N, 3158 SDNode *FMul, 3159 bool IsUnsigned, 3160 bool FixedToFloat) { 3161 auto Type = N->getValueType(0); 3162 unsigned ScalarBits = Type.getScalarSizeInBits(); 3163 if (ScalarBits > 32) 3164 return false; 3165 3166 SDNodeFlags FMulFlags = FMul->getFlags(); 3167 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is 3168 // allowed in 16 bit unsigned floats 3169 if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned) 3170 return false; 3171 3172 SDValue ImmNode = FMul->getOperand(1); 3173 SDValue VecVal = FMul->getOperand(0); 3174 if (VecVal->getOpcode() == ISD::UINT_TO_FP || 3175 VecVal->getOpcode() == ISD::SINT_TO_FP) 3176 VecVal = VecVal->getOperand(0); 3177 3178 if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits) 3179 return false; 3180 3181 if (ImmNode.getOpcode() == ISD::BITCAST) { 3182 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits) 3183 return false; 3184 ImmNode = ImmNode.getOperand(0); 3185 } 3186 3187 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits) 3188 return false; 3189 3190 APFloat ImmAPF(0.0f); 3191 switch (ImmNode.getOpcode()) { 3192 case ARMISD::VMOVIMM: 3193 case ARMISD::VDUP: { 3194 if (!isa<ConstantSDNode>(ImmNode.getOperand(0))) 3195 return false; 3196 unsigned Imm = ImmNode.getConstantOperandVal(0); 3197 if (ImmNode.getOpcode() == ARMISD::VMOVIMM) 3198 Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits); 3199 ImmAPF = 3200 APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(), 3201 APInt(ScalarBits, Imm)); 3202 break; 3203 } 3204 case ARMISD::VMOVFPIMM: { 3205 ImmAPF = APFloat(ARM_AM::getFPImmFloat(ImmNode.getConstantOperandVal(0))); 3206 break; 3207 } 3208 default: 3209 return false; 3210 } 3211 3212 // Where n is the number of fractional bits, multiplying by 2^n will convert 3213 // from float to fixed and multiplying by 2^-n will convert from fixed to 3214 // float. Taking log2 of the factor (after taking the inverse in the case of 3215 // float to fixed) will give n. 3216 APFloat ToConvert = ImmAPF; 3217 if (FixedToFloat) { 3218 if (!ImmAPF.getExactInverse(&ToConvert)) 3219 return false; 3220 } 3221 APSInt Converted(64, 0); 3222 bool IsExact; 3223 ToConvert.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven, 3224 &IsExact); 3225 if (!IsExact || !Converted.isPowerOf2()) 3226 return false; 3227 3228 unsigned FracBits = Converted.logBase2(); 3229 if (FracBits > ScalarBits) 3230 return false; 3231 3232 SmallVector<SDValue, 3> Ops{ 3233 VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)}; 3234 AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type); 3235 3236 unsigned int Opcode; 3237 switch (ScalarBits) { 3238 case 16: 3239 if (FixedToFloat) 3240 Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix; 3241 else 3242 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; 3243 break; 3244 case 32: 3245 if (FixedToFloat) 3246 Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix; 3247 else 3248 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; 3249 break; 3250 default: 3251 llvm_unreachable("unexpected number of scalar bits"); 3252 break; 3253 } 3254 3255 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops)); 3256 return true; 3257 } 3258 3259 bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) { 3260 // Transform a floating-point to fixed-point conversion to a VCVT 3261 if (!Subtarget->hasMVEFloatOps()) 3262 return false; 3263 EVT Type = N->getValueType(0); 3264 if (!Type.isVector()) 3265 return false; 3266 unsigned int ScalarBits = Type.getScalarSizeInBits(); 3267 3268 bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT; 3269 SDNode *Node = N->getOperand(0).getNode(); 3270 3271 // floating-point to fixed-point with one fractional bit gets turned into an 3272 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y)) 3273 if (Node->getOpcode() == ISD::FADD) { 3274 if (Node->getOperand(0) != Node->getOperand(1)) 3275 return false; 3276 SDNodeFlags Flags = Node->getFlags(); 3277 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is 3278 // allowed in 16 bit unsigned floats 3279 if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned) 3280 return false; 3281 3282 unsigned Opcode; 3283 switch (ScalarBits) { 3284 case 16: 3285 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; 3286 break; 3287 case 32: 3288 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; 3289 break; 3290 } 3291 SmallVector<SDValue, 3> Ops{Node->getOperand(0), 3292 CurDAG->getConstant(1, dl, MVT::i32)}; 3293 AddEmptyMVEPredicateToOps(Ops, dl, Type); 3294 3295 ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops)); 3296 return true; 3297 } 3298 3299 if (Node->getOpcode() != ISD::FMUL) 3300 return false; 3301 3302 return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false); 3303 } 3304 3305 bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) { 3306 // Transform a fixed-point to floating-point conversion to a VCVT 3307 if (!Subtarget->hasMVEFloatOps()) 3308 return false; 3309 auto Type = N->getValueType(0); 3310 if (!Type.isVector()) 3311 return false; 3312 3313 auto LHS = N->getOperand(0); 3314 if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP) 3315 return false; 3316 3317 return transformFixedFloatingPointConversion( 3318 N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true); 3319 } 3320 3321 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 3322 if (!Subtarget->hasV6T2Ops()) 3323 return false; 3324 3325 unsigned Opc = isSigned 3326 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 3327 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 3328 SDLoc dl(N); 3329 3330 // For unsigned extracts, check for a shift right and mask 3331 unsigned And_imm = 0; 3332 if (N->getOpcode() == ISD::AND) { 3333 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 3334 3335 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 3336 if (And_imm & (And_imm + 1)) 3337 return false; 3338 3339 unsigned Srl_imm = 0; 3340 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 3341 Srl_imm)) { 3342 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3343 3344 // Mask off the unnecessary bits of the AND immediate; normally 3345 // DAGCombine will do this, but that might not happen if 3346 // targetShrinkDemandedConstant chooses a different immediate. 3347 And_imm &= -1U >> Srl_imm; 3348 3349 // Note: The width operand is encoded as width-1. 3350 unsigned Width = countTrailingOnes(And_imm) - 1; 3351 unsigned LSB = Srl_imm; 3352 3353 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3354 3355 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 3356 // It's cheaper to use a right shift to extract the top bits. 3357 if (Subtarget->isThumb()) { 3358 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 3359 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3360 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3361 getAL(CurDAG, dl), Reg0, Reg0 }; 3362 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3363 return true; 3364 } 3365 3366 // ARM models shift instructions as MOVsi with shifter operand. 3367 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 3368 SDValue ShOpc = 3369 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 3370 MVT::i32); 3371 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 3372 getAL(CurDAG, dl), Reg0, Reg0 }; 3373 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 3374 return true; 3375 } 3376 3377 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3378 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3379 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3380 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3381 getAL(CurDAG, dl), Reg0 }; 3382 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3383 return true; 3384 } 3385 } 3386 return false; 3387 } 3388 3389 // Otherwise, we're looking for a shift of a shift 3390 unsigned Shl_imm = 0; 3391 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 3392 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 3393 unsigned Srl_imm = 0; 3394 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 3395 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3396 // Note: The width operand is encoded as width-1. 3397 unsigned Width = 32 - Srl_imm - 1; 3398 int LSB = Srl_imm - Shl_imm; 3399 if (LSB < 0) 3400 return false; 3401 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3402 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3403 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3404 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3405 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3406 getAL(CurDAG, dl), Reg0 }; 3407 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3408 return true; 3409 } 3410 } 3411 3412 // Or we are looking for a shift of an and, with a mask operand 3413 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 3414 isShiftedMask_32(And_imm)) { 3415 unsigned Srl_imm = 0; 3416 unsigned LSB = countTrailingZeros(And_imm); 3417 // Shift must be the same as the ands lsb 3418 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 3419 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3420 unsigned MSB = 31 - countLeadingZeros(And_imm); 3421 // Note: The width operand is encoded as width-1. 3422 unsigned Width = MSB - LSB; 3423 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3424 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3425 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3426 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 3427 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3428 getAL(CurDAG, dl), Reg0 }; 3429 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3430 return true; 3431 } 3432 } 3433 3434 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 3435 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 3436 unsigned LSB = 0; 3437 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 3438 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 3439 return false; 3440 3441 if (LSB + Width > 32) 3442 return false; 3443 3444 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3445 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx"); 3446 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3447 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3448 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 3449 getAL(CurDAG, dl), Reg0 }; 3450 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3451 return true; 3452 } 3453 3454 return false; 3455 } 3456 3457 /// Target-specific DAG combining for ISD::XOR. 3458 /// Target-independent combining lowers SELECT_CC nodes of the form 3459 /// select_cc setg[ge] X, 0, X, -X 3460 /// select_cc setgt X, -1, X, -X 3461 /// select_cc setl[te] X, 0, -X, X 3462 /// select_cc setlt X, 1, -X, X 3463 /// which represent Integer ABS into: 3464 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y) 3465 /// ARM instruction selection detects the latter and matches it to 3466 /// ARM::ABS or ARM::t2ABS machine node. 3467 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 3468 SDValue XORSrc0 = N->getOperand(0); 3469 SDValue XORSrc1 = N->getOperand(1); 3470 EVT VT = N->getValueType(0); 3471 3472 if (Subtarget->isThumb1Only()) 3473 return false; 3474 3475 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) 3476 return false; 3477 3478 SDValue ADDSrc0 = XORSrc0.getOperand(0); 3479 SDValue ADDSrc1 = XORSrc0.getOperand(1); 3480 SDValue SRASrc0 = XORSrc1.getOperand(0); 3481 SDValue SRASrc1 = XORSrc1.getOperand(1); 3482 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 3483 EVT XType = SRASrc0.getValueType(); 3484 unsigned Size = XType.getSizeInBits() - 1; 3485 3486 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && 3487 XType.isInteger() && SRAConstant != nullptr && 3488 Size == SRAConstant->getZExtValue()) { 3489 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 3490 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); 3491 return true; 3492 } 3493 3494 return false; 3495 } 3496 3497 /// We've got special pseudo-instructions for these 3498 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3499 unsigned Opcode; 3500 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3501 if (MemTy == MVT::i8) 3502 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8; 3503 else if (MemTy == MVT::i16) 3504 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16; 3505 else if (MemTy == MVT::i32) 3506 Opcode = ARM::CMP_SWAP_32; 3507 else 3508 llvm_unreachable("Unknown AtomicCmpSwap type"); 3509 3510 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3511 N->getOperand(0)}; 3512 SDNode *CmpSwap = CurDAG->getMachineNode( 3513 Opcode, SDLoc(N), 3514 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 3515 3516 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3517 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3518 3519 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3520 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3521 CurDAG->RemoveDeadNode(N); 3522 } 3523 3524 static Optional<std::pair<unsigned, unsigned>> 3525 getContiguousRangeOfSetBits(const APInt &A) { 3526 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1; 3527 unsigned LastOne = A.countTrailingZeros(); 3528 if (A.countPopulation() != (FirstOne - LastOne + 1)) 3529 return Optional<std::pair<unsigned,unsigned>>(); 3530 return std::make_pair(FirstOne, LastOne); 3531 } 3532 3533 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { 3534 assert(N->getOpcode() == ARMISD::CMPZ); 3535 SwitchEQNEToPLMI = false; 3536 3537 if (!Subtarget->isThumb()) 3538 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and 3539 // LSR don't exist as standalone instructions - they need the barrel shifter. 3540 return; 3541 3542 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) 3543 SDValue And = N->getOperand(0); 3544 if (!And->hasOneUse()) 3545 return; 3546 3547 SDValue Zero = N->getOperand(1); 3548 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() || 3549 And->getOpcode() != ISD::AND) 3550 return; 3551 SDValue X = And.getOperand(0); 3552 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); 3553 3554 if (!C) 3555 return; 3556 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); 3557 if (!Range) 3558 return; 3559 3560 // There are several ways to lower this: 3561 SDNode *NewN; 3562 SDLoc dl(N); 3563 3564 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { 3565 if (Subtarget->isThumb2()) { 3566 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; 3567 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3568 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3569 CurDAG->getRegister(0, MVT::i32) }; 3570 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3571 } else { 3572 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, 3573 CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3574 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3575 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3576 } 3577 }; 3578 3579 if (Range->second == 0) { 3580 // 1. Mask includes the LSB -> Simply shift the top N bits off 3581 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3582 ReplaceNode(And.getNode(), NewN); 3583 } else if (Range->first == 31) { 3584 // 2. Mask includes the MSB -> Simply shift the bottom N bits off 3585 NewN = EmitShift(ARM::tLSRri, X, Range->second); 3586 ReplaceNode(And.getNode(), NewN); 3587 } else if (Range->first == Range->second) { 3588 // 3. Only one bit is set. We can shift this into the sign bit and use a 3589 // PL/MI comparison. 3590 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3591 ReplaceNode(And.getNode(), NewN); 3592 3593 SwitchEQNEToPLMI = true; 3594 } else if (!Subtarget->hasV6T2Ops()) { 3595 // 4. Do a double shift to clear bottom and top bits, but only in 3596 // thumb-1 mode as in thumb-2 we can use UBFX. 3597 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3598 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), 3599 Range->second + (31 - Range->first)); 3600 ReplaceNode(And.getNode(), NewN); 3601 } 3602 3603 } 3604 3605 void ARMDAGToDAGISel::Select(SDNode *N) { 3606 SDLoc dl(N); 3607 3608 if (N->isMachineOpcode()) { 3609 N->setNodeId(-1); 3610 return; // Already selected. 3611 } 3612 3613 switch (N->getOpcode()) { 3614 default: break; 3615 case ISD::STORE: { 3616 // For Thumb1, match an sp-relative store in C++. This is a little 3617 // unfortunate, but I don't think I can make the chain check work 3618 // otherwise. (The chain of the store has to be the same as the chain 3619 // of the CopyFromReg, or else we can't replace the CopyFromReg with 3620 // a direct reference to "SP".) 3621 // 3622 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use 3623 // a different addressing mode from other four-byte stores. 3624 // 3625 // This pattern usually comes up with call arguments. 3626 StoreSDNode *ST = cast<StoreSDNode>(N); 3627 SDValue Ptr = ST->getBasePtr(); 3628 if (Subtarget->isThumb1Only() && ST->isUnindexed()) { 3629 int RHSC = 0; 3630 if (Ptr.getOpcode() == ISD::ADD && 3631 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) 3632 Ptr = Ptr.getOperand(0); 3633 3634 if (Ptr.getOpcode() == ISD::CopyFromReg && 3635 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP && 3636 Ptr.getOperand(0) == ST->getChain()) { 3637 SDValue Ops[] = {ST->getValue(), 3638 CurDAG->getRegister(ARM::SP, MVT::i32), 3639 CurDAG->getTargetConstant(RHSC, dl, MVT::i32), 3640 getAL(CurDAG, dl), 3641 CurDAG->getRegister(0, MVT::i32), 3642 ST->getChain()}; 3643 MachineSDNode *ResNode = 3644 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops); 3645 MachineMemOperand *MemOp = ST->getMemOperand(); 3646 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3647 ReplaceNode(N, ResNode); 3648 return; 3649 } 3650 } 3651 break; 3652 } 3653 case ISD::WRITE_REGISTER: 3654 if (tryWriteRegister(N)) 3655 return; 3656 break; 3657 case ISD::READ_REGISTER: 3658 if (tryReadRegister(N)) 3659 return; 3660 break; 3661 case ISD::INLINEASM: 3662 case ISD::INLINEASM_BR: 3663 if (tryInlineAsm(N)) 3664 return; 3665 break; 3666 case ISD::XOR: 3667 // Select special operations if XOR node forms integer ABS pattern 3668 if (tryABSOp(N)) 3669 return; 3670 // Other cases are autogenerated. 3671 break; 3672 case ISD::Constant: { 3673 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); 3674 // If we can't materialize the constant we need to use a literal pool 3675 if (ConstantMaterializationCost(Val, Subtarget) > 2) { 3676 SDValue CPIdx = CurDAG->getTargetConstantPool( 3677 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 3678 TLI->getPointerTy(CurDAG->getDataLayout())); 3679 3680 SDNode *ResNode; 3681 if (Subtarget->isThumb()) { 3682 SDValue Ops[] = { 3683 CPIdx, 3684 getAL(CurDAG, dl), 3685 CurDAG->getRegister(0, MVT::i32), 3686 CurDAG->getEntryNode() 3687 }; 3688 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 3689 Ops); 3690 } else { 3691 SDValue Ops[] = { 3692 CPIdx, 3693 CurDAG->getTargetConstant(0, dl, MVT::i32), 3694 getAL(CurDAG, dl), 3695 CurDAG->getRegister(0, MVT::i32), 3696 CurDAG->getEntryNode() 3697 }; 3698 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 3699 Ops); 3700 } 3701 // Annotate the Node with memory operand information so that MachineInstr 3702 // queries work properly. This e.g. gives the register allocation the 3703 // required information for rematerialization. 3704 MachineFunction& MF = CurDAG->getMachineFunction(); 3705 MachineMemOperand *MemOp = 3706 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), 3707 MachineMemOperand::MOLoad, 4, Align(4)); 3708 3709 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3710 3711 ReplaceNode(N, ResNode); 3712 return; 3713 } 3714 3715 // Other cases are autogenerated. 3716 break; 3717 } 3718 case ISD::FrameIndex: { 3719 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 3720 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 3721 SDValue TFI = CurDAG->getTargetFrameIndex( 3722 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3723 if (Subtarget->isThumb1Only()) { 3724 // Set the alignment of the frame object to 4, to avoid having to generate 3725 // more than one ADD 3726 MachineFrameInfo &MFI = MF->getFrameInfo(); 3727 if (MFI.getObjectAlign(FI) < Align(4)) 3728 MFI.setObjectAlignment(FI, Align(4)); 3729 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 3730 CurDAG->getTargetConstant(0, dl, MVT::i32)); 3731 return; 3732 } else { 3733 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 3734 ARM::t2ADDri : ARM::ADDri); 3735 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 3736 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3737 CurDAG->getRegister(0, MVT::i32) }; 3738 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3739 return; 3740 } 3741 } 3742 case ISD::INSERT_VECTOR_ELT: { 3743 if (tryInsertVectorElt(N)) 3744 return; 3745 break; 3746 } 3747 case ISD::SRL: 3748 if (tryV6T2BitfieldExtractOp(N, false)) 3749 return; 3750 break; 3751 case ISD::SIGN_EXTEND_INREG: 3752 case ISD::SRA: 3753 if (tryV6T2BitfieldExtractOp(N, true)) 3754 return; 3755 break; 3756 case ISD::FP_TO_UINT: 3757 case ISD::FP_TO_SINT: 3758 if (tryFP_TO_INT(N, dl)) 3759 return; 3760 break; 3761 case ISD::FMUL: 3762 if (tryFMULFixed(N, dl)) 3763 return; 3764 break; 3765 case ISD::MUL: 3766 if (Subtarget->isThumb1Only()) 3767 break; 3768 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 3769 unsigned RHSV = C->getZExtValue(); 3770 if (!RHSV) break; 3771 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 3772 unsigned ShImm = Log2_32(RHSV-1); 3773 if (ShImm >= 32) 3774 break; 3775 SDValue V = N->getOperand(0); 3776 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3777 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3778 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3779 if (Subtarget->isThumb()) { 3780 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3781 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 3782 return; 3783 } else { 3784 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3785 Reg0 }; 3786 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 3787 return; 3788 } 3789 } 3790 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 3791 unsigned ShImm = Log2_32(RHSV+1); 3792 if (ShImm >= 32) 3793 break; 3794 SDValue V = N->getOperand(0); 3795 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3796 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3797 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3798 if (Subtarget->isThumb()) { 3799 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3800 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 3801 return; 3802 } else { 3803 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3804 Reg0 }; 3805 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 3806 return; 3807 } 3808 } 3809 } 3810 break; 3811 case ISD::AND: { 3812 // Check for unsigned bitfield extract 3813 if (tryV6T2BitfieldExtractOp(N, false)) 3814 return; 3815 3816 // If an immediate is used in an AND node, it is possible that the immediate 3817 // can be more optimally materialized when negated. If this is the case we 3818 // can negate the immediate and use a BIC instead. 3819 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3820 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 3821 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 3822 3823 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 3824 // immediate can be negated and fit in the immediate operand of 3825 // a t2BIC, don't do any manual transform here as this can be 3826 // handled by the generic ISel machinery. 3827 bool PreferImmediateEncoding = 3828 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 3829 if (!PreferImmediateEncoding && 3830 ConstantMaterializationCost(Imm, Subtarget) > 3831 ConstantMaterializationCost(~Imm, Subtarget)) { 3832 // The current immediate costs more to materialize than a negated 3833 // immediate, so negate the immediate and use a BIC. 3834 SDValue NewImm = 3835 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32); 3836 // If the new constant didn't exist before, reposition it in the topological 3837 // ordering so it is just before N. Otherwise, don't touch its location. 3838 if (NewImm->getNodeId() == -1) 3839 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 3840 3841 if (!Subtarget->hasThumb2()) { 3842 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 3843 N->getOperand(0), NewImm, getAL(CurDAG, dl), 3844 CurDAG->getRegister(0, MVT::i32)}; 3845 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 3846 return; 3847 } else { 3848 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 3849 CurDAG->getRegister(0, MVT::i32), 3850 CurDAG->getRegister(0, MVT::i32)}; 3851 ReplaceNode(N, 3852 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 3853 return; 3854 } 3855 } 3856 } 3857 3858 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 3859 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 3860 // are entirely contributed by c2 and lower 16-bits are entirely contributed 3861 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 3862 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 3863 EVT VT = N->getValueType(0); 3864 if (VT != MVT::i32) 3865 break; 3866 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 3867 ? ARM::t2MOVTi16 3868 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 3869 if (!Opc) 3870 break; 3871 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3872 N1C = dyn_cast<ConstantSDNode>(N1); 3873 if (!N1C) 3874 break; 3875 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 3876 SDValue N2 = N0.getOperand(1); 3877 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 3878 if (!N2C) 3879 break; 3880 unsigned N1CVal = N1C->getZExtValue(); 3881 unsigned N2CVal = N2C->getZExtValue(); 3882 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 3883 (N1CVal & 0xffffU) == 0xffffU && 3884 (N2CVal & 0xffffU) == 0x0U) { 3885 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 3886 dl, MVT::i32); 3887 SDValue Ops[] = { N0.getOperand(0), Imm16, 3888 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 3889 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 3890 return; 3891 } 3892 } 3893 3894 break; 3895 } 3896 case ARMISD::UMAAL: { 3897 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 3898 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3899 N->getOperand(2), N->getOperand(3), 3900 getAL(CurDAG, dl), 3901 CurDAG->getRegister(0, MVT::i32) }; 3902 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 3903 return; 3904 } 3905 case ARMISD::UMLAL:{ 3906 if (Subtarget->isThumb()) { 3907 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3908 N->getOperand(3), getAL(CurDAG, dl), 3909 CurDAG->getRegister(0, MVT::i32)}; 3910 ReplaceNode( 3911 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 3912 return; 3913 }else{ 3914 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3915 N->getOperand(3), getAL(CurDAG, dl), 3916 CurDAG->getRegister(0, MVT::i32), 3917 CurDAG->getRegister(0, MVT::i32) }; 3918 ReplaceNode(N, CurDAG->getMachineNode( 3919 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 3920 MVT::i32, MVT::i32, Ops)); 3921 return; 3922 } 3923 } 3924 case ARMISD::SMLAL:{ 3925 if (Subtarget->isThumb()) { 3926 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3927 N->getOperand(3), getAL(CurDAG, dl), 3928 CurDAG->getRegister(0, MVT::i32)}; 3929 ReplaceNode( 3930 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 3931 return; 3932 }else{ 3933 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3934 N->getOperand(3), getAL(CurDAG, dl), 3935 CurDAG->getRegister(0, MVT::i32), 3936 CurDAG->getRegister(0, MVT::i32) }; 3937 ReplaceNode(N, CurDAG->getMachineNode( 3938 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 3939 MVT::i32, MVT::i32, Ops)); 3940 return; 3941 } 3942 } 3943 case ARMISD::SUBE: { 3944 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) 3945 break; 3946 // Look for a pattern to match SMMLS 3947 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) 3948 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || 3949 N->getOperand(2).getOpcode() != ARMISD::SUBC || 3950 !SDValue(N, 1).use_empty()) 3951 break; 3952 3953 if (Subtarget->isThumb()) 3954 assert(Subtarget->hasThumb2() && 3955 "This pattern should not be generated for Thumb"); 3956 3957 SDValue SmulLoHi = N->getOperand(1); 3958 SDValue Subc = N->getOperand(2); 3959 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0)); 3960 3961 if (!Zero || Zero->getZExtValue() != 0 || 3962 Subc.getOperand(1) != SmulLoHi.getValue(0) || 3963 N->getOperand(1) != SmulLoHi.getValue(1) || 3964 N->getOperand(2) != Subc.getValue(1)) 3965 break; 3966 3967 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; 3968 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), 3969 N->getOperand(0), getAL(CurDAG, dl), 3970 CurDAG->getRegister(0, MVT::i32) }; 3971 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); 3972 return; 3973 } 3974 case ISD::LOAD: { 3975 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3976 return; 3977 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 3978 if (tryT2IndexedLoad(N)) 3979 return; 3980 } else if (Subtarget->isThumb()) { 3981 if (tryT1IndexedLoad(N)) 3982 return; 3983 } else if (tryARMIndexedLoad(N)) 3984 return; 3985 // Other cases are autogenerated. 3986 break; 3987 } 3988 case ISD::MLOAD: 3989 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3990 return; 3991 // Other cases are autogenerated. 3992 break; 3993 case ARMISD::WLSSETUP: { 3994 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32, 3995 N->getOperand(0)); 3996 ReplaceUses(N, New); 3997 CurDAG->RemoveDeadNode(N); 3998 return; 3999 } 4000 case ARMISD::WLS: { 4001 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, 4002 N->getOperand(1), N->getOperand(2), 4003 N->getOperand(0)); 4004 ReplaceUses(N, New); 4005 CurDAG->RemoveDeadNode(N); 4006 return; 4007 } 4008 case ARMISD::LE: { 4009 SDValue Ops[] = { N->getOperand(1), 4010 N->getOperand(2), 4011 N->getOperand(0) }; 4012 unsigned Opc = ARM::t2LoopEnd; 4013 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 4014 ReplaceUses(N, New); 4015 CurDAG->RemoveDeadNode(N); 4016 return; 4017 } 4018 case ARMISD::LDRD: { 4019 if (Subtarget->isThumb2()) 4020 break; // TableGen handles isel in this case. 4021 SDValue Base, RegOffset, ImmOffset; 4022 const SDValue &Chain = N->getOperand(0); 4023 const SDValue &Addr = N->getOperand(1); 4024 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 4025 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 4026 // The register-offset variant of LDRD mandates that the register 4027 // allocated to RegOffset is not reused in any of the remaining operands. 4028 // This restriction is currently not enforced. Therefore emitting this 4029 // variant is explicitly avoided. 4030 Base = Addr; 4031 RegOffset = CurDAG->getRegister(0, MVT::i32); 4032 } 4033 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain}; 4034 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl, 4035 {MVT::Untyped, MVT::Other}, Ops); 4036 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 4037 SDValue(New, 0)); 4038 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 4039 SDValue(New, 0)); 4040 transferMemOperands(N, New); 4041 ReplaceUses(SDValue(N, 0), Lo); 4042 ReplaceUses(SDValue(N, 1), Hi); 4043 ReplaceUses(SDValue(N, 2), SDValue(New, 1)); 4044 CurDAG->RemoveDeadNode(N); 4045 return; 4046 } 4047 case ARMISD::STRD: { 4048 if (Subtarget->isThumb2()) 4049 break; // TableGen handles isel in this case. 4050 SDValue Base, RegOffset, ImmOffset; 4051 const SDValue &Chain = N->getOperand(0); 4052 const SDValue &Addr = N->getOperand(3); 4053 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 4054 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 4055 // The register-offset variant of STRD mandates that the register 4056 // allocated to RegOffset is not reused in any of the remaining operands. 4057 // This restriction is currently not enforced. Therefore emitting this 4058 // variant is explicitly avoided. 4059 Base = Addr; 4060 RegOffset = CurDAG->getRegister(0, MVT::i32); 4061 } 4062 SDNode *RegPair = 4063 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2)); 4064 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain}; 4065 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops); 4066 transferMemOperands(N, New); 4067 ReplaceUses(SDValue(N, 0), SDValue(New, 0)); 4068 CurDAG->RemoveDeadNode(N); 4069 return; 4070 } 4071 case ARMISD::LOOP_DEC: { 4072 SDValue Ops[] = { N->getOperand(1), 4073 N->getOperand(2), 4074 N->getOperand(0) }; 4075 SDNode *Dec = 4076 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 4077 CurDAG->getVTList(MVT::i32, MVT::Other), Ops); 4078 ReplaceUses(N, Dec); 4079 CurDAG->RemoveDeadNode(N); 4080 return; 4081 } 4082 case ARMISD::BRCOND: { 4083 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4084 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 4085 // Pattern complexity = 6 cost = 1 size = 0 4086 4087 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4088 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 4089 // Pattern complexity = 6 cost = 1 size = 0 4090 4091 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4092 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 4093 // Pattern complexity = 6 cost = 1 size = 0 4094 4095 unsigned Opc = Subtarget->isThumb() ? 4096 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 4097 SDValue Chain = N->getOperand(0); 4098 SDValue N1 = N->getOperand(1); 4099 SDValue N2 = N->getOperand(2); 4100 SDValue N3 = N->getOperand(3); 4101 SDValue InFlag = N->getOperand(4); 4102 assert(N1.getOpcode() == ISD::BasicBlock); 4103 assert(N2.getOpcode() == ISD::Constant); 4104 assert(N3.getOpcode() == ISD::Register); 4105 4106 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue(); 4107 4108 if (InFlag.getOpcode() == ARMISD::CMPZ) { 4109 if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { 4110 SDValue Int = InFlag.getOperand(0); 4111 uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue(); 4112 4113 // Handle low-overhead loops. 4114 if (ID == Intrinsic::loop_decrement_reg) { 4115 SDValue Elements = Int.getOperand(2); 4116 SDValue Size = CurDAG->getTargetConstant( 4117 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, 4118 MVT::i32); 4119 4120 SDValue Args[] = { Elements, Size, Int.getOperand(0) }; 4121 SDNode *LoopDec = 4122 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 4123 CurDAG->getVTList(MVT::i32, MVT::Other), 4124 Args); 4125 ReplaceUses(Int.getNode(), LoopDec); 4126 4127 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain }; 4128 SDNode *LoopEnd = 4129 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs); 4130 4131 ReplaceUses(N, LoopEnd); 4132 CurDAG->RemoveDeadNode(N); 4133 CurDAG->RemoveDeadNode(InFlag.getNode()); 4134 CurDAG->RemoveDeadNode(Int.getNode()); 4135 return; 4136 } 4137 } 4138 4139 bool SwitchEQNEToPLMI; 4140 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 4141 InFlag = N->getOperand(4); 4142 4143 if (SwitchEQNEToPLMI) { 4144 switch ((ARMCC::CondCodes)CC) { 4145 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 4146 case ARMCC::NE: 4147 CC = (unsigned)ARMCC::MI; 4148 break; 4149 case ARMCC::EQ: 4150 CC = (unsigned)ARMCC::PL; 4151 break; 4152 } 4153 } 4154 } 4155 4156 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); 4157 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; 4158 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 4159 MVT::Glue, Ops); 4160 Chain = SDValue(ResNode, 0); 4161 if (N->getNumValues() == 2) { 4162 InFlag = SDValue(ResNode, 1); 4163 ReplaceUses(SDValue(N, 1), InFlag); 4164 } 4165 ReplaceUses(SDValue(N, 0), 4166 SDValue(Chain.getNode(), Chain.getResNo())); 4167 CurDAG->RemoveDeadNode(N); 4168 return; 4169 } 4170 4171 case ARMISD::CMPZ: { 4172 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) 4173 // This allows us to avoid materializing the expensive negative constant. 4174 // The CMPZ #0 is useless and will be peepholed away but we need to keep it 4175 // for its glue output. 4176 SDValue X = N->getOperand(0); 4177 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); 4178 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { 4179 int64_t Addend = -C->getSExtValue(); 4180 4181 SDNode *Add = nullptr; 4182 // ADDS can be better than CMN if the immediate fits in a 4183 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. 4184 // Outside that range we can just use a CMN which is 32-bit but has a 4185 // 12-bit immediate range. 4186 if (Addend < 1<<8) { 4187 if (Subtarget->isThumb2()) { 4188 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), 4189 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 4190 CurDAG->getRegister(0, MVT::i32) }; 4191 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); 4192 } else { 4193 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; 4194 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 4195 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 4196 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 4197 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 4198 } 4199 } 4200 if (Add) { 4201 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; 4202 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); 4203 } 4204 } 4205 // Other cases are autogenerated. 4206 break; 4207 } 4208 4209 case ARMISD::CMOV: { 4210 SDValue InFlag = N->getOperand(4); 4211 4212 if (InFlag.getOpcode() == ARMISD::CMPZ) { 4213 bool SwitchEQNEToPLMI; 4214 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 4215 4216 if (SwitchEQNEToPLMI) { 4217 SDValue ARMcc = N->getOperand(2); 4218 ARMCC::CondCodes CC = 4219 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 4220 4221 switch (CC) { 4222 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 4223 case ARMCC::NE: 4224 CC = ARMCC::MI; 4225 break; 4226 case ARMCC::EQ: 4227 CC = ARMCC::PL; 4228 break; 4229 } 4230 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); 4231 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, 4232 N->getOperand(3), N->getOperand(4)}; 4233 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); 4234 } 4235 4236 } 4237 // Other cases are autogenerated. 4238 break; 4239 } 4240 4241 case ARMISD::VZIP: { 4242 unsigned Opc = 0; 4243 EVT VT = N->getValueType(0); 4244 switch (VT.getSimpleVT().SimpleTy) { 4245 default: return; 4246 case MVT::v8i8: Opc = ARM::VZIPd8; break; 4247 case MVT::v4f16: 4248 case MVT::v4i16: Opc = ARM::VZIPd16; break; 4249 case MVT::v2f32: 4250 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4251 case MVT::v2i32: Opc = ARM::VTRNd32; break; 4252 case MVT::v16i8: Opc = ARM::VZIPq8; break; 4253 case MVT::v8f16: 4254 case MVT::v8i16: Opc = ARM::VZIPq16; break; 4255 case MVT::v4f32: 4256 case MVT::v4i32: Opc = ARM::VZIPq32; break; 4257 } 4258 SDValue Pred = getAL(CurDAG, dl); 4259 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4260 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 4261 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4262 return; 4263 } 4264 case ARMISD::VUZP: { 4265 unsigned Opc = 0; 4266 EVT VT = N->getValueType(0); 4267 switch (VT.getSimpleVT().SimpleTy) { 4268 default: return; 4269 case MVT::v8i8: Opc = ARM::VUZPd8; break; 4270 case MVT::v4f16: 4271 case MVT::v4i16: Opc = ARM::VUZPd16; break; 4272 case MVT::v2f32: 4273 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4274 case MVT::v2i32: Opc = ARM::VTRNd32; break; 4275 case MVT::v16i8: Opc = ARM::VUZPq8; break; 4276 case MVT::v8f16: 4277 case MVT::v8i16: Opc = ARM::VUZPq16; break; 4278 case MVT::v4f32: 4279 case MVT::v4i32: Opc = ARM::VUZPq32; break; 4280 } 4281 SDValue Pred = getAL(CurDAG, dl); 4282 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4283 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 4284 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4285 return; 4286 } 4287 case ARMISD::VTRN: { 4288 unsigned Opc = 0; 4289 EVT VT = N->getValueType(0); 4290 switch (VT.getSimpleVT().SimpleTy) { 4291 default: return; 4292 case MVT::v8i8: Opc = ARM::VTRNd8; break; 4293 case MVT::v4f16: 4294 case MVT::v4i16: Opc = ARM::VTRNd16; break; 4295 case MVT::v2f32: 4296 case MVT::v2i32: Opc = ARM::VTRNd32; break; 4297 case MVT::v16i8: Opc = ARM::VTRNq8; break; 4298 case MVT::v8f16: 4299 case MVT::v8i16: Opc = ARM::VTRNq16; break; 4300 case MVT::v4f32: 4301 case MVT::v4i32: Opc = ARM::VTRNq32; break; 4302 } 4303 SDValue Pred = getAL(CurDAG, dl); 4304 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4305 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 4306 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4307 return; 4308 } 4309 case ARMISD::BUILD_VECTOR: { 4310 EVT VecVT = N->getValueType(0); 4311 EVT EltVT = VecVT.getVectorElementType(); 4312 unsigned NumElts = VecVT.getVectorNumElements(); 4313 if (EltVT == MVT::f64) { 4314 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 4315 ReplaceNode( 4316 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4317 return; 4318 } 4319 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 4320 if (NumElts == 2) { 4321 ReplaceNode( 4322 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4323 return; 4324 } 4325 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 4326 ReplaceNode(N, 4327 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 4328 N->getOperand(2), N->getOperand(3))); 4329 return; 4330 } 4331 4332 case ARMISD::VLD1DUP: { 4333 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, 4334 ARM::VLD1DUPd32 }; 4335 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, 4336 ARM::VLD1DUPq32 }; 4337 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes); 4338 return; 4339 } 4340 4341 case ARMISD::VLD2DUP: { 4342 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4343 ARM::VLD2DUPd32 }; 4344 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes); 4345 return; 4346 } 4347 4348 case ARMISD::VLD3DUP: { 4349 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 4350 ARM::VLD3DUPd16Pseudo, 4351 ARM::VLD3DUPd32Pseudo }; 4352 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes); 4353 return; 4354 } 4355 4356 case ARMISD::VLD4DUP: { 4357 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 4358 ARM::VLD4DUPd16Pseudo, 4359 ARM::VLD4DUPd32Pseudo }; 4360 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes); 4361 return; 4362 } 4363 4364 case ARMISD::VLD1DUP_UPD: { 4365 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, 4366 ARM::VLD1DUPd16wb_fixed, 4367 ARM::VLD1DUPd32wb_fixed }; 4368 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, 4369 ARM::VLD1DUPq16wb_fixed, 4370 ARM::VLD1DUPq32wb_fixed }; 4371 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes); 4372 return; 4373 } 4374 4375 case ARMISD::VLD2DUP_UPD: { 4376 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed, 4377 ARM::VLD2DUPd16wb_fixed, 4378 ARM::VLD2DUPd32wb_fixed, 4379 ARM::VLD1q64wb_fixed }; 4380 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4381 ARM::VLD2DUPq16EvenPseudo, 4382 ARM::VLD2DUPq32EvenPseudo }; 4383 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed, 4384 ARM::VLD2DUPq16OddPseudoWB_fixed, 4385 ARM::VLD2DUPq32OddPseudoWB_fixed }; 4386 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1); 4387 return; 4388 } 4389 4390 case ARMISD::VLD3DUP_UPD: { 4391 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 4392 ARM::VLD3DUPd16Pseudo_UPD, 4393 ARM::VLD3DUPd32Pseudo_UPD, 4394 ARM::VLD1d64TPseudoWB_fixed }; 4395 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4396 ARM::VLD3DUPq16EvenPseudo, 4397 ARM::VLD3DUPq32EvenPseudo }; 4398 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD, 4399 ARM::VLD3DUPq16OddPseudo_UPD, 4400 ARM::VLD3DUPq32OddPseudo_UPD }; 4401 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4402 return; 4403 } 4404 4405 case ARMISD::VLD4DUP_UPD: { 4406 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 4407 ARM::VLD4DUPd16Pseudo_UPD, 4408 ARM::VLD4DUPd32Pseudo_UPD, 4409 ARM::VLD1d64QPseudoWB_fixed }; 4410 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4411 ARM::VLD4DUPq16EvenPseudo, 4412 ARM::VLD4DUPq32EvenPseudo }; 4413 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD, 4414 ARM::VLD4DUPq16OddPseudo_UPD, 4415 ARM::VLD4DUPq32OddPseudo_UPD }; 4416 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4417 return; 4418 } 4419 4420 case ARMISD::VLD1_UPD: { 4421 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 4422 ARM::VLD1d16wb_fixed, 4423 ARM::VLD1d32wb_fixed, 4424 ARM::VLD1d64wb_fixed }; 4425 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 4426 ARM::VLD1q16wb_fixed, 4427 ARM::VLD1q32wb_fixed, 4428 ARM::VLD1q64wb_fixed }; 4429 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 4430 return; 4431 } 4432 4433 case ARMISD::VLD2_UPD: { 4434 if (Subtarget->hasNEON()) { 4435 static const uint16_t DOpcodes[] = { 4436 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed, 4437 ARM::VLD1q64wb_fixed}; 4438 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed, 4439 ARM::VLD2q16PseudoWB_fixed, 4440 ARM::VLD2q32PseudoWB_fixed}; 4441 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4442 } else { 4443 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, 4444 ARM::MVE_VLD21_8_wb}; 4445 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 4446 ARM::MVE_VLD21_16_wb}; 4447 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 4448 ARM::MVE_VLD21_32_wb}; 4449 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4450 SelectMVE_VLD(N, 2, Opcodes, true); 4451 } 4452 return; 4453 } 4454 4455 case ARMISD::VLD3_UPD: { 4456 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 4457 ARM::VLD3d16Pseudo_UPD, 4458 ARM::VLD3d32Pseudo_UPD, 4459 ARM::VLD1d64TPseudoWB_fixed}; 4460 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4461 ARM::VLD3q16Pseudo_UPD, 4462 ARM::VLD3q32Pseudo_UPD }; 4463 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 4464 ARM::VLD3q16oddPseudo_UPD, 4465 ARM::VLD3q32oddPseudo_UPD }; 4466 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4467 return; 4468 } 4469 4470 case ARMISD::VLD4_UPD: { 4471 if (Subtarget->hasNEON()) { 4472 static const uint16_t DOpcodes[] = { 4473 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD, 4474 ARM::VLD1d64QPseudoWB_fixed}; 4475 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD, 4476 ARM::VLD4q16Pseudo_UPD, 4477 ARM::VLD4q32Pseudo_UPD}; 4478 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD, 4479 ARM::VLD4q16oddPseudo_UPD, 4480 ARM::VLD4q32oddPseudo_UPD}; 4481 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4482 } else { 4483 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 4484 ARM::MVE_VLD42_8, 4485 ARM::MVE_VLD43_8_wb}; 4486 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 4487 ARM::MVE_VLD42_16, 4488 ARM::MVE_VLD43_16_wb}; 4489 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 4490 ARM::MVE_VLD42_32, 4491 ARM::MVE_VLD43_32_wb}; 4492 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4493 SelectMVE_VLD(N, 4, Opcodes, true); 4494 } 4495 return; 4496 } 4497 4498 case ARMISD::VLD1x2_UPD: { 4499 if (Subtarget->hasNEON()) { 4500 static const uint16_t DOpcodes[] = { 4501 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed, 4502 ARM::VLD1q64wb_fixed}; 4503 static const uint16_t QOpcodes[] = { 4504 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed, 4505 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed}; 4506 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4507 return; 4508 } 4509 break; 4510 } 4511 4512 case ARMISD::VLD1x3_UPD: { 4513 if (Subtarget->hasNEON()) { 4514 static const uint16_t DOpcodes[] = { 4515 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed, 4516 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed}; 4517 static const uint16_t QOpcodes0[] = { 4518 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD, 4519 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD}; 4520 static const uint16_t QOpcodes1[] = { 4521 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD, 4522 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD}; 4523 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4524 return; 4525 } 4526 break; 4527 } 4528 4529 case ARMISD::VLD1x4_UPD: { 4530 if (Subtarget->hasNEON()) { 4531 static const uint16_t DOpcodes[] = { 4532 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed, 4533 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed}; 4534 static const uint16_t QOpcodes0[] = { 4535 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD, 4536 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD}; 4537 static const uint16_t QOpcodes1[] = { 4538 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD, 4539 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD}; 4540 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4541 return; 4542 } 4543 break; 4544 } 4545 4546 case ARMISD::VLD2LN_UPD: { 4547 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 4548 ARM::VLD2LNd16Pseudo_UPD, 4549 ARM::VLD2LNd32Pseudo_UPD }; 4550 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 4551 ARM::VLD2LNq32Pseudo_UPD }; 4552 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 4553 return; 4554 } 4555 4556 case ARMISD::VLD3LN_UPD: { 4557 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 4558 ARM::VLD3LNd16Pseudo_UPD, 4559 ARM::VLD3LNd32Pseudo_UPD }; 4560 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 4561 ARM::VLD3LNq32Pseudo_UPD }; 4562 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 4563 return; 4564 } 4565 4566 case ARMISD::VLD4LN_UPD: { 4567 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 4568 ARM::VLD4LNd16Pseudo_UPD, 4569 ARM::VLD4LNd32Pseudo_UPD }; 4570 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 4571 ARM::VLD4LNq32Pseudo_UPD }; 4572 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 4573 return; 4574 } 4575 4576 case ARMISD::VST1_UPD: { 4577 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 4578 ARM::VST1d16wb_fixed, 4579 ARM::VST1d32wb_fixed, 4580 ARM::VST1d64wb_fixed }; 4581 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 4582 ARM::VST1q16wb_fixed, 4583 ARM::VST1q32wb_fixed, 4584 ARM::VST1q64wb_fixed }; 4585 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 4586 return; 4587 } 4588 4589 case ARMISD::VST2_UPD: { 4590 if (Subtarget->hasNEON()) { 4591 static const uint16_t DOpcodes[] = { 4592 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed, 4593 ARM::VST1q64wb_fixed}; 4594 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed, 4595 ARM::VST2q16PseudoWB_fixed, 4596 ARM::VST2q32PseudoWB_fixed}; 4597 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4598 return; 4599 } 4600 break; 4601 } 4602 4603 case ARMISD::VST3_UPD: { 4604 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 4605 ARM::VST3d16Pseudo_UPD, 4606 ARM::VST3d32Pseudo_UPD, 4607 ARM::VST1d64TPseudoWB_fixed}; 4608 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 4609 ARM::VST3q16Pseudo_UPD, 4610 ARM::VST3q32Pseudo_UPD }; 4611 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 4612 ARM::VST3q16oddPseudo_UPD, 4613 ARM::VST3q32oddPseudo_UPD }; 4614 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4615 return; 4616 } 4617 4618 case ARMISD::VST4_UPD: { 4619 if (Subtarget->hasNEON()) { 4620 static const uint16_t DOpcodes[] = { 4621 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD, 4622 ARM::VST1d64QPseudoWB_fixed}; 4623 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD, 4624 ARM::VST4q16Pseudo_UPD, 4625 ARM::VST4q32Pseudo_UPD}; 4626 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD, 4627 ARM::VST4q16oddPseudo_UPD, 4628 ARM::VST4q32oddPseudo_UPD}; 4629 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4630 return; 4631 } 4632 break; 4633 } 4634 4635 case ARMISD::VST1x2_UPD: { 4636 if (Subtarget->hasNEON()) { 4637 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed, 4638 ARM::VST1q16wb_fixed, 4639 ARM::VST1q32wb_fixed, 4640 ARM::VST1q64wb_fixed}; 4641 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4642 ARM::VST1d16QPseudoWB_fixed, 4643 ARM::VST1d32QPseudoWB_fixed, 4644 ARM::VST1d64QPseudoWB_fixed }; 4645 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4646 return; 4647 } 4648 break; 4649 } 4650 4651 case ARMISD::VST1x3_UPD: { 4652 if (Subtarget->hasNEON()) { 4653 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed, 4654 ARM::VST1d16TPseudoWB_fixed, 4655 ARM::VST1d32TPseudoWB_fixed, 4656 ARM::VST1d64TPseudoWB_fixed }; 4657 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 4658 ARM::VST1q16LowTPseudo_UPD, 4659 ARM::VST1q32LowTPseudo_UPD, 4660 ARM::VST1q64LowTPseudo_UPD }; 4661 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD, 4662 ARM::VST1q16HighTPseudo_UPD, 4663 ARM::VST1q32HighTPseudo_UPD, 4664 ARM::VST1q64HighTPseudo_UPD }; 4665 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4666 return; 4667 } 4668 break; 4669 } 4670 4671 case ARMISD::VST1x4_UPD: { 4672 if (Subtarget->hasNEON()) { 4673 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4674 ARM::VST1d16QPseudoWB_fixed, 4675 ARM::VST1d32QPseudoWB_fixed, 4676 ARM::VST1d64QPseudoWB_fixed }; 4677 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 4678 ARM::VST1q16LowQPseudo_UPD, 4679 ARM::VST1q32LowQPseudo_UPD, 4680 ARM::VST1q64LowQPseudo_UPD }; 4681 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD, 4682 ARM::VST1q16HighQPseudo_UPD, 4683 ARM::VST1q32HighQPseudo_UPD, 4684 ARM::VST1q64HighQPseudo_UPD }; 4685 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4686 return; 4687 } 4688 break; 4689 } 4690 case ARMISD::VST2LN_UPD: { 4691 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 4692 ARM::VST2LNd16Pseudo_UPD, 4693 ARM::VST2LNd32Pseudo_UPD }; 4694 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 4695 ARM::VST2LNq32Pseudo_UPD }; 4696 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 4697 return; 4698 } 4699 4700 case ARMISD::VST3LN_UPD: { 4701 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 4702 ARM::VST3LNd16Pseudo_UPD, 4703 ARM::VST3LNd32Pseudo_UPD }; 4704 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 4705 ARM::VST3LNq32Pseudo_UPD }; 4706 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 4707 return; 4708 } 4709 4710 case ARMISD::VST4LN_UPD: { 4711 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 4712 ARM::VST4LNd16Pseudo_UPD, 4713 ARM::VST4LNd32Pseudo_UPD }; 4714 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 4715 ARM::VST4LNq32Pseudo_UPD }; 4716 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 4717 return; 4718 } 4719 4720 case ISD::INTRINSIC_VOID: 4721 case ISD::INTRINSIC_W_CHAIN: { 4722 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 4723 switch (IntNo) { 4724 default: 4725 break; 4726 4727 case Intrinsic::arm_mrrc: 4728 case Intrinsic::arm_mrrc2: { 4729 SDLoc dl(N); 4730 SDValue Chain = N->getOperand(0); 4731 unsigned Opc; 4732 4733 if (Subtarget->isThumb()) 4734 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 4735 else 4736 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 4737 4738 SmallVector<SDValue, 5> Ops; 4739 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */ 4740 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */ 4741 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */ 4742 4743 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 4744 // instruction will always be '1111' but it is possible in assembly language to specify 4745 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 4746 if (Opc != ARM::MRRC2) { 4747 Ops.push_back(getAL(CurDAG, dl)); 4748 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4749 } 4750 4751 Ops.push_back(Chain); 4752 4753 // Writes to two registers. 4754 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 4755 4756 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 4757 return; 4758 } 4759 case Intrinsic::arm_ldaexd: 4760 case Intrinsic::arm_ldrexd: { 4761 SDLoc dl(N); 4762 SDValue Chain = N->getOperand(0); 4763 SDValue MemAddr = N->getOperand(2); 4764 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 4765 4766 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 4767 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 4768 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 4769 4770 // arm_ldrexd returns a i64 value in {i32, i32} 4771 std::vector<EVT> ResTys; 4772 if (isThumb) { 4773 ResTys.push_back(MVT::i32); 4774 ResTys.push_back(MVT::i32); 4775 } else 4776 ResTys.push_back(MVT::Untyped); 4777 ResTys.push_back(MVT::Other); 4778 4779 // Place arguments in the right order. 4780 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 4781 CurDAG->getRegister(0, MVT::i32), Chain}; 4782 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4783 // Transfer memoperands. 4784 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4785 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 4786 4787 // Remap uses. 4788 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 4789 if (!SDValue(N, 0).use_empty()) { 4790 SDValue Result; 4791 if (isThumb) 4792 Result = SDValue(Ld, 0); 4793 else { 4794 SDValue SubRegIdx = 4795 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 4796 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4797 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4798 Result = SDValue(ResNode,0); 4799 } 4800 ReplaceUses(SDValue(N, 0), Result); 4801 } 4802 if (!SDValue(N, 1).use_empty()) { 4803 SDValue Result; 4804 if (isThumb) 4805 Result = SDValue(Ld, 1); 4806 else { 4807 SDValue SubRegIdx = 4808 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 4809 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4810 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4811 Result = SDValue(ResNode,0); 4812 } 4813 ReplaceUses(SDValue(N, 1), Result); 4814 } 4815 ReplaceUses(SDValue(N, 2), OutChain); 4816 CurDAG->RemoveDeadNode(N); 4817 return; 4818 } 4819 case Intrinsic::arm_stlexd: 4820 case Intrinsic::arm_strexd: { 4821 SDLoc dl(N); 4822 SDValue Chain = N->getOperand(0); 4823 SDValue Val0 = N->getOperand(2); 4824 SDValue Val1 = N->getOperand(3); 4825 SDValue MemAddr = N->getOperand(4); 4826 4827 // Store exclusive double return a i32 value which is the return status 4828 // of the issued store. 4829 const EVT ResTys[] = {MVT::i32, MVT::Other}; 4830 4831 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 4832 // Place arguments in the right order. 4833 SmallVector<SDValue, 7> Ops; 4834 if (isThumb) { 4835 Ops.push_back(Val0); 4836 Ops.push_back(Val1); 4837 } else 4838 // arm_strexd uses GPRPair. 4839 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 4840 Ops.push_back(MemAddr); 4841 Ops.push_back(getAL(CurDAG, dl)); 4842 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4843 Ops.push_back(Chain); 4844 4845 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 4846 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 4847 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 4848 4849 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4850 // Transfer memoperands. 4851 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4852 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 4853 4854 ReplaceNode(N, St); 4855 return; 4856 } 4857 4858 case Intrinsic::arm_neon_vld1: { 4859 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 4860 ARM::VLD1d32, ARM::VLD1d64 }; 4861 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4862 ARM::VLD1q32, ARM::VLD1q64}; 4863 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 4864 return; 4865 } 4866 4867 case Intrinsic::arm_neon_vld1x2: { 4868 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4869 ARM::VLD1q32, ARM::VLD1q64 }; 4870 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo, 4871 ARM::VLD1d16QPseudo, 4872 ARM::VLD1d32QPseudo, 4873 ARM::VLD1d64QPseudo }; 4874 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4875 return; 4876 } 4877 4878 case Intrinsic::arm_neon_vld1x3: { 4879 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo, 4880 ARM::VLD1d16TPseudo, 4881 ARM::VLD1d32TPseudo, 4882 ARM::VLD1d64TPseudo }; 4883 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD, 4884 ARM::VLD1q16LowTPseudo_UPD, 4885 ARM::VLD1q32LowTPseudo_UPD, 4886 ARM::VLD1q64LowTPseudo_UPD }; 4887 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo, 4888 ARM::VLD1q16HighTPseudo, 4889 ARM::VLD1q32HighTPseudo, 4890 ARM::VLD1q64HighTPseudo }; 4891 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4892 return; 4893 } 4894 4895 case Intrinsic::arm_neon_vld1x4: { 4896 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo, 4897 ARM::VLD1d16QPseudo, 4898 ARM::VLD1d32QPseudo, 4899 ARM::VLD1d64QPseudo }; 4900 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD, 4901 ARM::VLD1q16LowQPseudo_UPD, 4902 ARM::VLD1q32LowQPseudo_UPD, 4903 ARM::VLD1q64LowQPseudo_UPD }; 4904 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo, 4905 ARM::VLD1q16HighQPseudo, 4906 ARM::VLD1q32HighQPseudo, 4907 ARM::VLD1q64HighQPseudo }; 4908 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4909 return; 4910 } 4911 4912 case Intrinsic::arm_neon_vld2: { 4913 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 4914 ARM::VLD2d32, ARM::VLD1q64 }; 4915 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 4916 ARM::VLD2q32Pseudo }; 4917 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4918 return; 4919 } 4920 4921 case Intrinsic::arm_neon_vld3: { 4922 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 4923 ARM::VLD3d16Pseudo, 4924 ARM::VLD3d32Pseudo, 4925 ARM::VLD1d64TPseudo }; 4926 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4927 ARM::VLD3q16Pseudo_UPD, 4928 ARM::VLD3q32Pseudo_UPD }; 4929 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 4930 ARM::VLD3q16oddPseudo, 4931 ARM::VLD3q32oddPseudo }; 4932 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4933 return; 4934 } 4935 4936 case Intrinsic::arm_neon_vld4: { 4937 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 4938 ARM::VLD4d16Pseudo, 4939 ARM::VLD4d32Pseudo, 4940 ARM::VLD1d64QPseudo }; 4941 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 4942 ARM::VLD4q16Pseudo_UPD, 4943 ARM::VLD4q32Pseudo_UPD }; 4944 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 4945 ARM::VLD4q16oddPseudo, 4946 ARM::VLD4q32oddPseudo }; 4947 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4948 return; 4949 } 4950 4951 case Intrinsic::arm_neon_vld2dup: { 4952 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4953 ARM::VLD2DUPd32, ARM::VLD1q64 }; 4954 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4955 ARM::VLD2DUPq16EvenPseudo, 4956 ARM::VLD2DUPq32EvenPseudo }; 4957 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo, 4958 ARM::VLD2DUPq16OddPseudo, 4959 ARM::VLD2DUPq32OddPseudo }; 4960 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2, 4961 DOpcodes, QOpcodes0, QOpcodes1); 4962 return; 4963 } 4964 4965 case Intrinsic::arm_neon_vld3dup: { 4966 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo, 4967 ARM::VLD3DUPd16Pseudo, 4968 ARM::VLD3DUPd32Pseudo, 4969 ARM::VLD1d64TPseudo }; 4970 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4971 ARM::VLD3DUPq16EvenPseudo, 4972 ARM::VLD3DUPq32EvenPseudo }; 4973 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo, 4974 ARM::VLD3DUPq16OddPseudo, 4975 ARM::VLD3DUPq32OddPseudo }; 4976 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3, 4977 DOpcodes, QOpcodes0, QOpcodes1); 4978 return; 4979 } 4980 4981 case Intrinsic::arm_neon_vld4dup: { 4982 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo, 4983 ARM::VLD4DUPd16Pseudo, 4984 ARM::VLD4DUPd32Pseudo, 4985 ARM::VLD1d64QPseudo }; 4986 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4987 ARM::VLD4DUPq16EvenPseudo, 4988 ARM::VLD4DUPq32EvenPseudo }; 4989 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo, 4990 ARM::VLD4DUPq16OddPseudo, 4991 ARM::VLD4DUPq32OddPseudo }; 4992 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4, 4993 DOpcodes, QOpcodes0, QOpcodes1); 4994 return; 4995 } 4996 4997 case Intrinsic::arm_neon_vld2lane: { 4998 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 4999 ARM::VLD2LNd16Pseudo, 5000 ARM::VLD2LNd32Pseudo }; 5001 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 5002 ARM::VLD2LNq32Pseudo }; 5003 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 5004 return; 5005 } 5006 5007 case Intrinsic::arm_neon_vld3lane: { 5008 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 5009 ARM::VLD3LNd16Pseudo, 5010 ARM::VLD3LNd32Pseudo }; 5011 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 5012 ARM::VLD3LNq32Pseudo }; 5013 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 5014 return; 5015 } 5016 5017 case Intrinsic::arm_neon_vld4lane: { 5018 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 5019 ARM::VLD4LNd16Pseudo, 5020 ARM::VLD4LNd32Pseudo }; 5021 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 5022 ARM::VLD4LNq32Pseudo }; 5023 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 5024 return; 5025 } 5026 5027 case Intrinsic::arm_neon_vst1: { 5028 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 5029 ARM::VST1d32, ARM::VST1d64 }; 5030 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 5031 ARM::VST1q32, ARM::VST1q64 }; 5032 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 5033 return; 5034 } 5035 5036 case Intrinsic::arm_neon_vst1x2: { 5037 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 5038 ARM::VST1q32, ARM::VST1q64 }; 5039 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo, 5040 ARM::VST1d16QPseudo, 5041 ARM::VST1d32QPseudo, 5042 ARM::VST1d64QPseudo }; 5043 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 5044 return; 5045 } 5046 5047 case Intrinsic::arm_neon_vst1x3: { 5048 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo, 5049 ARM::VST1d16TPseudo, 5050 ARM::VST1d32TPseudo, 5051 ARM::VST1d64TPseudo }; 5052 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 5053 ARM::VST1q16LowTPseudo_UPD, 5054 ARM::VST1q32LowTPseudo_UPD, 5055 ARM::VST1q64LowTPseudo_UPD }; 5056 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo, 5057 ARM::VST1q16HighTPseudo, 5058 ARM::VST1q32HighTPseudo, 5059 ARM::VST1q64HighTPseudo }; 5060 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 5061 return; 5062 } 5063 5064 case Intrinsic::arm_neon_vst1x4: { 5065 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo, 5066 ARM::VST1d16QPseudo, 5067 ARM::VST1d32QPseudo, 5068 ARM::VST1d64QPseudo }; 5069 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 5070 ARM::VST1q16LowQPseudo_UPD, 5071 ARM::VST1q32LowQPseudo_UPD, 5072 ARM::VST1q64LowQPseudo_UPD }; 5073 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo, 5074 ARM::VST1q16HighQPseudo, 5075 ARM::VST1q32HighQPseudo, 5076 ARM::VST1q64HighQPseudo }; 5077 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 5078 return; 5079 } 5080 5081 case Intrinsic::arm_neon_vst2: { 5082 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 5083 ARM::VST2d32, ARM::VST1q64 }; 5084 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 5085 ARM::VST2q32Pseudo }; 5086 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 5087 return; 5088 } 5089 5090 case Intrinsic::arm_neon_vst3: { 5091 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 5092 ARM::VST3d16Pseudo, 5093 ARM::VST3d32Pseudo, 5094 ARM::VST1d64TPseudo }; 5095 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 5096 ARM::VST3q16Pseudo_UPD, 5097 ARM::VST3q32Pseudo_UPD }; 5098 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 5099 ARM::VST3q16oddPseudo, 5100 ARM::VST3q32oddPseudo }; 5101 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 5102 return; 5103 } 5104 5105 case Intrinsic::arm_neon_vst4: { 5106 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 5107 ARM::VST4d16Pseudo, 5108 ARM::VST4d32Pseudo, 5109 ARM::VST1d64QPseudo }; 5110 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 5111 ARM::VST4q16Pseudo_UPD, 5112 ARM::VST4q32Pseudo_UPD }; 5113 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 5114 ARM::VST4q16oddPseudo, 5115 ARM::VST4q32oddPseudo }; 5116 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 5117 return; 5118 } 5119 5120 case Intrinsic::arm_neon_vst2lane: { 5121 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 5122 ARM::VST2LNd16Pseudo, 5123 ARM::VST2LNd32Pseudo }; 5124 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 5125 ARM::VST2LNq32Pseudo }; 5126 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 5127 return; 5128 } 5129 5130 case Intrinsic::arm_neon_vst3lane: { 5131 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 5132 ARM::VST3LNd16Pseudo, 5133 ARM::VST3LNd32Pseudo }; 5134 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 5135 ARM::VST3LNq32Pseudo }; 5136 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 5137 return; 5138 } 5139 5140 case Intrinsic::arm_neon_vst4lane: { 5141 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 5142 ARM::VST4LNd16Pseudo, 5143 ARM::VST4LNd32Pseudo }; 5144 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 5145 ARM::VST4LNq32Pseudo }; 5146 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 5147 return; 5148 } 5149 5150 case Intrinsic::arm_mve_vldr_gather_base_wb: 5151 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: { 5152 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre, 5153 ARM::MVE_VLDRDU64_qi_pre}; 5154 SelectMVE_WB(N, Opcodes, 5155 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated); 5156 return; 5157 } 5158 5159 case Intrinsic::arm_mve_vld2q: { 5160 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8}; 5161 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 5162 ARM::MVE_VLD21_16}; 5163 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 5164 ARM::MVE_VLD21_32}; 5165 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 5166 SelectMVE_VLD(N, 2, Opcodes, false); 5167 return; 5168 } 5169 5170 case Intrinsic::arm_mve_vld4q: { 5171 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 5172 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8}; 5173 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 5174 ARM::MVE_VLD42_16, 5175 ARM::MVE_VLD43_16}; 5176 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 5177 ARM::MVE_VLD42_32, 5178 ARM::MVE_VLD43_32}; 5179 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 5180 SelectMVE_VLD(N, 4, Opcodes, false); 5181 return; 5182 } 5183 } 5184 break; 5185 } 5186 5187 case ISD::INTRINSIC_WO_CHAIN: { 5188 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 5189 switch (IntNo) { 5190 default: 5191 break; 5192 5193 // Scalar f32 -> bf16 5194 case Intrinsic::arm_neon_vcvtbfp2bf: { 5195 SDLoc dl(N); 5196 const SDValue &Src = N->getOperand(1); 5197 llvm::EVT DestTy = N->getValueType(0); 5198 SDValue Pred = getAL(CurDAG, dl); 5199 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 5200 SDValue Ops[] = { Src, Src, Pred, Reg0 }; 5201 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops); 5202 return; 5203 } 5204 5205 // Vector v4f32 -> v4bf16 5206 case Intrinsic::arm_neon_vcvtfp2bf: { 5207 SDLoc dl(N); 5208 const SDValue &Src = N->getOperand(1); 5209 SDValue Pred = getAL(CurDAG, dl); 5210 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 5211 SDValue Ops[] = { Src, Pred, Reg0 }; 5212 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops); 5213 return; 5214 } 5215 5216 case Intrinsic::arm_mve_urshrl: 5217 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false); 5218 return; 5219 case Intrinsic::arm_mve_uqshll: 5220 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false); 5221 return; 5222 case Intrinsic::arm_mve_srshrl: 5223 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false); 5224 return; 5225 case Intrinsic::arm_mve_sqshll: 5226 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false); 5227 return; 5228 case Intrinsic::arm_mve_uqrshll: 5229 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true); 5230 return; 5231 case Intrinsic::arm_mve_sqrshrl: 5232 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true); 5233 return; 5234 5235 case Intrinsic::arm_mve_vadc: 5236 case Intrinsic::arm_mve_vadc_predicated: 5237 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true, 5238 IntNo == Intrinsic::arm_mve_vadc_predicated); 5239 return; 5240 case Intrinsic::arm_mve_vsbc: 5241 case Intrinsic::arm_mve_vsbc_predicated: 5242 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true, 5243 IntNo == Intrinsic::arm_mve_vsbc_predicated); 5244 return; 5245 case Intrinsic::arm_mve_vshlc: 5246 case Intrinsic::arm_mve_vshlc_predicated: 5247 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated); 5248 return; 5249 5250 case Intrinsic::arm_mve_vmlldava: 5251 case Intrinsic::arm_mve_vmlldava_predicated: { 5252 static const uint16_t OpcodesU[] = { 5253 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32, 5254 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32, 5255 }; 5256 static const uint16_t OpcodesS[] = { 5257 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32, 5258 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32, 5259 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32, 5260 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32, 5261 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32, 5262 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32, 5263 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32, 5264 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32, 5265 }; 5266 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated, 5267 OpcodesS, OpcodesU); 5268 return; 5269 } 5270 5271 case Intrinsic::arm_mve_vrmlldavha: 5272 case Intrinsic::arm_mve_vrmlldavha_predicated: { 5273 static const uint16_t OpcodesU[] = { 5274 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32, 5275 }; 5276 static const uint16_t OpcodesS[] = { 5277 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32, 5278 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32, 5279 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32, 5280 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32, 5281 }; 5282 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated, 5283 OpcodesS, OpcodesU); 5284 return; 5285 } 5286 5287 case Intrinsic::arm_mve_vidup: 5288 case Intrinsic::arm_mve_vidup_predicated: { 5289 static const uint16_t Opcodes[] = { 5290 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32, 5291 }; 5292 SelectMVE_VxDUP(N, Opcodes, false, 5293 IntNo == Intrinsic::arm_mve_vidup_predicated); 5294 return; 5295 } 5296 5297 case Intrinsic::arm_mve_vddup: 5298 case Intrinsic::arm_mve_vddup_predicated: { 5299 static const uint16_t Opcodes[] = { 5300 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32, 5301 }; 5302 SelectMVE_VxDUP(N, Opcodes, false, 5303 IntNo == Intrinsic::arm_mve_vddup_predicated); 5304 return; 5305 } 5306 5307 case Intrinsic::arm_mve_viwdup: 5308 case Intrinsic::arm_mve_viwdup_predicated: { 5309 static const uint16_t Opcodes[] = { 5310 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32, 5311 }; 5312 SelectMVE_VxDUP(N, Opcodes, true, 5313 IntNo == Intrinsic::arm_mve_viwdup_predicated); 5314 return; 5315 } 5316 5317 case Intrinsic::arm_mve_vdwdup: 5318 case Intrinsic::arm_mve_vdwdup_predicated: { 5319 static const uint16_t Opcodes[] = { 5320 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32, 5321 }; 5322 SelectMVE_VxDUP(N, Opcodes, true, 5323 IntNo == Intrinsic::arm_mve_vdwdup_predicated); 5324 return; 5325 } 5326 5327 case Intrinsic::arm_cde_cx1d: 5328 case Intrinsic::arm_cde_cx1da: 5329 case Intrinsic::arm_cde_cx2d: 5330 case Intrinsic::arm_cde_cx2da: 5331 case Intrinsic::arm_cde_cx3d: 5332 case Intrinsic::arm_cde_cx3da: { 5333 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da || 5334 IntNo == Intrinsic::arm_cde_cx2da || 5335 IntNo == Intrinsic::arm_cde_cx3da; 5336 size_t NumExtraOps; 5337 uint16_t Opcode; 5338 switch (IntNo) { 5339 case Intrinsic::arm_cde_cx1d: 5340 case Intrinsic::arm_cde_cx1da: 5341 NumExtraOps = 0; 5342 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D; 5343 break; 5344 case Intrinsic::arm_cde_cx2d: 5345 case Intrinsic::arm_cde_cx2da: 5346 NumExtraOps = 1; 5347 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D; 5348 break; 5349 case Intrinsic::arm_cde_cx3d: 5350 case Intrinsic::arm_cde_cx3da: 5351 NumExtraOps = 2; 5352 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D; 5353 break; 5354 default: 5355 llvm_unreachable("Unexpected opcode"); 5356 } 5357 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum); 5358 return; 5359 } 5360 } 5361 break; 5362 } 5363 5364 case ISD::ATOMIC_CMP_SWAP: 5365 SelectCMP_SWAP(N); 5366 return; 5367 } 5368 5369 SelectCode(N); 5370 } 5371 5372 // Inspect a register string of the form 5373 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 5374 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 5375 // and obtain the integer operands from them, adding these operands to the 5376 // provided vector. 5377 static void getIntOperandsFromRegisterString(StringRef RegString, 5378 SelectionDAG *CurDAG, 5379 const SDLoc &DL, 5380 std::vector<SDValue> &Ops) { 5381 SmallVector<StringRef, 5> Fields; 5382 RegString.split(Fields, ':'); 5383 5384 if (Fields.size() > 1) { 5385 bool AllIntFields = true; 5386 5387 for (StringRef Field : Fields) { 5388 // Need to trim out leading 'cp' characters and get the integer field. 5389 unsigned IntField; 5390 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 5391 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 5392 } 5393 5394 assert(AllIntFields && 5395 "Unexpected non-integer value in special register string."); 5396 (void)AllIntFields; 5397 } 5398 } 5399 5400 // Maps a Banked Register string to its mask value. The mask value returned is 5401 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 5402 // mask operand, which expresses which register is to be used, e.g. r8, and in 5403 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 5404 // was invalid. 5405 static inline int getBankedRegisterMask(StringRef RegString) { 5406 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower()); 5407 if (!TheReg) 5408 return -1; 5409 return TheReg->Encoding; 5410 } 5411 5412 // The flags here are common to those allowed for apsr in the A class cores and 5413 // those allowed for the special registers in the M class cores. Returns a 5414 // value representing which flags were present, -1 if invalid. 5415 static inline int getMClassFlagsMask(StringRef Flags) { 5416 return StringSwitch<int>(Flags) 5417 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is 5418 // correct when flags are not permitted 5419 .Case("g", 0x1) 5420 .Case("nzcvq", 0x2) 5421 .Case("nzcvqg", 0x3) 5422 .Default(-1); 5423 } 5424 5425 // Maps MClass special registers string to its value for use in the 5426 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand. 5427 // Returns -1 to signify that the string was invalid. 5428 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) { 5429 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg); 5430 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits(); 5431 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits)) 5432 return -1; 5433 return (int)(TheReg->Encoding & 0xFFF); // SYSm value 5434 } 5435 5436 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 5437 // The mask operand contains the special register (R Bit) in bit 4, whether 5438 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 5439 // bits 3-0 contains the fields to be accessed in the special register, set by 5440 // the flags provided with the register. 5441 int Mask = 0; 5442 if (Reg == "apsr") { 5443 // The flags permitted for apsr are the same flags that are allowed in 5444 // M class registers. We get the flag value and then shift the flags into 5445 // the correct place to combine with the mask. 5446 Mask = getMClassFlagsMask(Flags); 5447 if (Mask == -1) 5448 return -1; 5449 return Mask << 2; 5450 } 5451 5452 if (Reg != "cpsr" && Reg != "spsr") { 5453 return -1; 5454 } 5455 5456 // This is the same as if the flags were "fc" 5457 if (Flags.empty() || Flags == "all") 5458 return Mask | 0x9; 5459 5460 // Inspect the supplied flags string and set the bits in the mask for 5461 // the relevant and valid flags allowed for cpsr and spsr. 5462 for (char Flag : Flags) { 5463 int FlagVal; 5464 switch (Flag) { 5465 case 'c': 5466 FlagVal = 0x1; 5467 break; 5468 case 'x': 5469 FlagVal = 0x2; 5470 break; 5471 case 's': 5472 FlagVal = 0x4; 5473 break; 5474 case 'f': 5475 FlagVal = 0x8; 5476 break; 5477 default: 5478 FlagVal = 0; 5479 } 5480 5481 // This avoids allowing strings where the same flag bit appears twice. 5482 if (!FlagVal || (Mask & FlagVal)) 5483 return -1; 5484 Mask |= FlagVal; 5485 } 5486 5487 // If the register is spsr then we need to set the R bit. 5488 if (Reg == "spsr") 5489 Mask |= 0x10; 5490 5491 return Mask; 5492 } 5493 5494 // Lower the read_register intrinsic to ARM specific DAG nodes 5495 // using the supplied metadata string to select the instruction node to use 5496 // and the registers/masks to construct as operands for the node. 5497 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 5498 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 5499 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 5500 bool IsThumb2 = Subtarget->isThumb2(); 5501 SDLoc DL(N); 5502 5503 std::vector<SDValue> Ops; 5504 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5505 5506 if (!Ops.empty()) { 5507 // If the special register string was constructed of fields (as defined 5508 // in the ACLE) then need to lower to MRC node (32 bit) or 5509 // MRRC node(64 bit), we can make the distinction based on the number of 5510 // operands we have. 5511 unsigned Opcode; 5512 SmallVector<EVT, 3> ResTypes; 5513 if (Ops.size() == 5){ 5514 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 5515 ResTypes.append({ MVT::i32, MVT::Other }); 5516 } else { 5517 assert(Ops.size() == 3 && 5518 "Invalid number of fields in special register string."); 5519 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 5520 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 5521 } 5522 5523 Ops.push_back(getAL(CurDAG, DL)); 5524 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5525 Ops.push_back(N->getOperand(0)); 5526 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 5527 return true; 5528 } 5529 5530 std::string SpecialReg = RegString->getString().lower(); 5531 5532 int BankedReg = getBankedRegisterMask(SpecialReg); 5533 if (BankedReg != -1) { 5534 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 5535 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5536 N->getOperand(0) }; 5537 ReplaceNode( 5538 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 5539 DL, MVT::i32, MVT::Other, Ops)); 5540 return true; 5541 } 5542 5543 // The VFP registers are read by creating SelectionDAG nodes with opcodes 5544 // corresponding to the register that is being read from. So we switch on the 5545 // string to find which opcode we need to use. 5546 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5547 .Case("fpscr", ARM::VMRS) 5548 .Case("fpexc", ARM::VMRS_FPEXC) 5549 .Case("fpsid", ARM::VMRS_FPSID) 5550 .Case("mvfr0", ARM::VMRS_MVFR0) 5551 .Case("mvfr1", ARM::VMRS_MVFR1) 5552 .Case("mvfr2", ARM::VMRS_MVFR2) 5553 .Case("fpinst", ARM::VMRS_FPINST) 5554 .Case("fpinst2", ARM::VMRS_FPINST2) 5555 .Default(0); 5556 5557 // If an opcode was found then we can lower the read to a VFP instruction. 5558 if (Opcode) { 5559 if (!Subtarget->hasVFP2Base()) 5560 return false; 5561 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base()) 5562 return false; 5563 5564 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5565 N->getOperand(0) }; 5566 ReplaceNode(N, 5567 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 5568 return true; 5569 } 5570 5571 // If the target is M Class then need to validate that the register string 5572 // is an acceptable value, so check that a mask can be constructed from the 5573 // string. 5574 if (Subtarget->isMClass()) { 5575 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5576 if (SYSmValue == -1) 5577 return false; 5578 5579 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5580 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5581 N->getOperand(0) }; 5582 ReplaceNode( 5583 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 5584 return true; 5585 } 5586 5587 // Here we know the target is not M Class so we need to check if it is one 5588 // of the remaining possible values which are apsr, cpsr or spsr. 5589 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 5590 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5591 N->getOperand(0) }; 5592 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 5593 DL, MVT::i32, MVT::Other, Ops)); 5594 return true; 5595 } 5596 5597 if (SpecialReg == "spsr") { 5598 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5599 N->getOperand(0) }; 5600 ReplaceNode( 5601 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 5602 MVT::i32, MVT::Other, Ops)); 5603 return true; 5604 } 5605 5606 return false; 5607 } 5608 5609 // Lower the write_register intrinsic to ARM specific DAG nodes 5610 // using the supplied metadata string to select the instruction node to use 5611 // and the registers/masks to use in the nodes 5612 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 5613 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 5614 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 5615 bool IsThumb2 = Subtarget->isThumb2(); 5616 SDLoc DL(N); 5617 5618 std::vector<SDValue> Ops; 5619 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5620 5621 if (!Ops.empty()) { 5622 // If the special register string was constructed of fields (as defined 5623 // in the ACLE) then need to lower to MCR node (32 bit) or 5624 // MCRR node(64 bit), we can make the distinction based on the number of 5625 // operands we have. 5626 unsigned Opcode; 5627 if (Ops.size() == 5) { 5628 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 5629 Ops.insert(Ops.begin()+2, N->getOperand(2)); 5630 } else { 5631 assert(Ops.size() == 3 && 5632 "Invalid number of fields in special register string."); 5633 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 5634 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 5635 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 5636 } 5637 5638 Ops.push_back(getAL(CurDAG, DL)); 5639 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5640 Ops.push_back(N->getOperand(0)); 5641 5642 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5643 return true; 5644 } 5645 5646 std::string SpecialReg = RegString->getString().lower(); 5647 int BankedReg = getBankedRegisterMask(SpecialReg); 5648 if (BankedReg != -1) { 5649 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 5650 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5651 N->getOperand(0) }; 5652 ReplaceNode( 5653 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 5654 DL, MVT::Other, Ops)); 5655 return true; 5656 } 5657 5658 // The VFP registers are written to by creating SelectionDAG nodes with 5659 // opcodes corresponding to the register that is being written. So we switch 5660 // on the string to find which opcode we need to use. 5661 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5662 .Case("fpscr", ARM::VMSR) 5663 .Case("fpexc", ARM::VMSR_FPEXC) 5664 .Case("fpsid", ARM::VMSR_FPSID) 5665 .Case("fpinst", ARM::VMSR_FPINST) 5666 .Case("fpinst2", ARM::VMSR_FPINST2) 5667 .Default(0); 5668 5669 if (Opcode) { 5670 if (!Subtarget->hasVFP2Base()) 5671 return false; 5672 Ops = { N->getOperand(2), getAL(CurDAG, DL), 5673 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5674 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5675 return true; 5676 } 5677 5678 std::pair<StringRef, StringRef> Fields; 5679 Fields = StringRef(SpecialReg).rsplit('_'); 5680 std::string Reg = Fields.first.str(); 5681 StringRef Flags = Fields.second; 5682 5683 // If the target was M Class then need to validate the special register value 5684 // and retrieve the mask for use in the instruction node. 5685 if (Subtarget->isMClass()) { 5686 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5687 if (SYSmValue == -1) 5688 return false; 5689 5690 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5691 N->getOperand(2), getAL(CurDAG, DL), 5692 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5693 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 5694 return true; 5695 } 5696 5697 // We then check to see if a valid mask can be constructed for one of the 5698 // register string values permitted for the A and R class cores. These values 5699 // are apsr, spsr and cpsr; these are also valid on older cores. 5700 int Mask = getARClassRegisterMask(Reg, Flags); 5701 if (Mask != -1) { 5702 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 5703 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5704 N->getOperand(0) }; 5705 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 5706 DL, MVT::Other, Ops)); 5707 return true; 5708 } 5709 5710 return false; 5711 } 5712 5713 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 5714 std::vector<SDValue> AsmNodeOperands; 5715 unsigned Flag, Kind; 5716 bool Changed = false; 5717 unsigned NumOps = N->getNumOperands(); 5718 5719 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 5720 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 5721 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 5722 // respectively. Since there is no constraint to explicitly specify a 5723 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 5724 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 5725 // them into a GPRPair. 5726 5727 SDLoc dl(N); 5728 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) 5729 : SDValue(nullptr,0); 5730 5731 SmallVector<bool, 8> OpChanged; 5732 // Glue node will be appended late. 5733 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 5734 SDValue op = N->getOperand(i); 5735 AsmNodeOperands.push_back(op); 5736 5737 if (i < InlineAsm::Op_FirstOperand) 5738 continue; 5739 5740 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { 5741 Flag = C->getZExtValue(); 5742 Kind = InlineAsm::getKind(Flag); 5743 } 5744 else 5745 continue; 5746 5747 // Immediate operands to inline asm in the SelectionDAG are modeled with 5748 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and 5749 // the second is a constant with the value of the immediate. If we get here 5750 // and we have a Kind_Imm, skip the next operand, and continue. 5751 if (Kind == InlineAsm::Kind_Imm) { 5752 SDValue op = N->getOperand(++i); 5753 AsmNodeOperands.push_back(op); 5754 continue; 5755 } 5756 5757 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); 5758 if (NumRegs) 5759 OpChanged.push_back(false); 5760 5761 unsigned DefIdx = 0; 5762 bool IsTiedToChangedOp = false; 5763 // If it's a use that is tied with a previous def, it has no 5764 // reg class constraint. 5765 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) 5766 IsTiedToChangedOp = OpChanged[DefIdx]; 5767 5768 // Memory operands to inline asm in the SelectionDAG are modeled with two 5769 // operands: a constant of value InlineAsm::Kind_Mem followed by the input 5770 // operand. If we get here and we have a Kind_Mem, skip the next operand (so 5771 // it doesn't get misinterpreted), and continue. We do this here because 5772 // it's important to update the OpChanged array correctly before moving on. 5773 if (Kind == InlineAsm::Kind_Mem) { 5774 SDValue op = N->getOperand(++i); 5775 AsmNodeOperands.push_back(op); 5776 continue; 5777 } 5778 5779 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef 5780 && Kind != InlineAsm::Kind_RegDefEarlyClobber) 5781 continue; 5782 5783 unsigned RC; 5784 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); 5785 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 5786 || NumRegs != 2) 5787 continue; 5788 5789 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 5790 SDValue V0 = N->getOperand(i+1); 5791 SDValue V1 = N->getOperand(i+2); 5792 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); 5793 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg(); 5794 SDValue PairedReg; 5795 MachineRegisterInfo &MRI = MF->getRegInfo(); 5796 5797 if (Kind == InlineAsm::Kind_RegDef || 5798 Kind == InlineAsm::Kind_RegDefEarlyClobber) { 5799 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 5800 // the original GPRs. 5801 5802 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5803 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5804 SDValue Chain = SDValue(N,0); 5805 5806 SDNode *GU = N->getGluedUser(); 5807 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 5808 Chain.getValue(1)); 5809 5810 // Extract values from a GPRPair reg and copy to the original GPR reg. 5811 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 5812 RegCopy); 5813 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 5814 RegCopy); 5815 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 5816 RegCopy.getValue(1)); 5817 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 5818 5819 // Update the original glue user. 5820 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 5821 Ops.push_back(T1.getValue(1)); 5822 CurDAG->UpdateNodeOperands(GU, Ops); 5823 } 5824 else { 5825 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a 5826 // GPRPair and then pass the GPRPair to the inline asm. 5827 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 5828 5829 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 5830 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 5831 Chain.getValue(1)); 5832 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 5833 T0.getValue(1)); 5834 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 5835 5836 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 5837 // i32 VRs of inline asm with it. 5838 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5839 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5840 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 5841 5842 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 5843 Glue = Chain.getValue(1); 5844 } 5845 5846 Changed = true; 5847 5848 if(PairedReg.getNode()) { 5849 OpChanged[OpChanged.size() -1 ] = true; 5850 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); 5851 if (IsTiedToChangedOp) 5852 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); 5853 else 5854 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); 5855 // Replace the current flag. 5856 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 5857 Flag, dl, MVT::i32); 5858 // Add the new register node and skip the original two GPRs. 5859 AsmNodeOperands.push_back(PairedReg); 5860 // Skip the next two GPRs. 5861 i += 2; 5862 } 5863 } 5864 5865 if (Glue.getNode()) 5866 AsmNodeOperands.push_back(Glue); 5867 if (!Changed) 5868 return false; 5869 5870 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N), 5871 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 5872 New->setNodeId(-1); 5873 ReplaceNode(N, New.getNode()); 5874 return true; 5875 } 5876 5877 5878 bool ARMDAGToDAGISel:: 5879 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 5880 std::vector<SDValue> &OutOps) { 5881 switch(ConstraintID) { 5882 default: 5883 llvm_unreachable("Unexpected asm memory constraint"); 5884 case InlineAsm::Constraint_m: 5885 case InlineAsm::Constraint_o: 5886 case InlineAsm::Constraint_Q: 5887 case InlineAsm::Constraint_Um: 5888 case InlineAsm::Constraint_Un: 5889 case InlineAsm::Constraint_Uq: 5890 case InlineAsm::Constraint_Us: 5891 case InlineAsm::Constraint_Ut: 5892 case InlineAsm::Constraint_Uv: 5893 case InlineAsm::Constraint_Uy: 5894 // Require the address to be in a register. That is safe for all ARM 5895 // variants and it is hard to do anything much smarter without knowing 5896 // how the operand is used. 5897 OutOps.push_back(Op); 5898 return false; 5899 } 5900 return true; 5901 } 5902 5903 /// createARMISelDag - This pass converts a legalized DAG into a 5904 /// ARM-specific DAG, ready for instruction scheduling. 5905 /// 5906 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 5907 CodeGenOpt::Level OptLevel) { 5908 return new ARMDAGToDAGISel(TM, OptLevel); 5909 } 5910