1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the RISC-V target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVISelDAGToDAG.h" 14 #include "MCTargetDesc/RISCVBaseInfo.h" 15 #include "MCTargetDesc/RISCVMCTargetDesc.h" 16 #include "MCTargetDesc/RISCVMatInt.h" 17 #include "RISCVISelLowering.h" 18 #include "RISCVMachineFunctionInfo.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/IR/IntrinsicsRISCV.h" 21 #include "llvm/Support/Alignment.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "riscv-isel" 29 #define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection" 30 31 static cl::opt<bool> UsePseudoMovImm( 32 "riscv-use-rematerializable-movimm", cl::Hidden, 33 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " 34 "constant materialization"), 35 cl::init(false)); 36 37 namespace llvm::RISCV { 38 #define GET_RISCVVSSEGTable_IMPL 39 #define GET_RISCVVLSEGTable_IMPL 40 #define GET_RISCVVLXSEGTable_IMPL 41 #define GET_RISCVVSXSEGTable_IMPL 42 #define GET_RISCVVLETable_IMPL 43 #define GET_RISCVVSETable_IMPL 44 #define GET_RISCVVLXTable_IMPL 45 #define GET_RISCVVSXTable_IMPL 46 #define GET_RISCVMaskedPseudosTable_IMPL 47 #include "RISCVGenSearchableTables.inc" 48 } // namespace llvm::RISCV 49 50 void RISCVDAGToDAGISel::PreprocessISelDAG() { 51 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 52 53 bool MadeChange = false; 54 while (Position != CurDAG->allnodes_begin()) { 55 SDNode *N = &*--Position; 56 if (N->use_empty()) 57 continue; 58 59 SDValue Result; 60 switch (N->getOpcode()) { 61 case ISD::SPLAT_VECTOR: { 62 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point 63 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. 64 MVT VT = N->getSimpleValueType(0); 65 unsigned Opc = 66 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; 67 SDLoc DL(N); 68 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()); 69 SDValue Src = N->getOperand(0); 70 if (VT.isInteger()) 71 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(), 72 N->getOperand(0)); 73 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL); 74 break; 75 } 76 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: { 77 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector 78 // load. Done after lowering and combining so that we have a chance to 79 // optimize this to VMV_V_X_VL when the upper bits aren't needed. 80 assert(N->getNumOperands() == 4 && "Unexpected number of operands"); 81 MVT VT = N->getSimpleValueType(0); 82 SDValue Passthru = N->getOperand(0); 83 SDValue Lo = N->getOperand(1); 84 SDValue Hi = N->getOperand(2); 85 SDValue VL = N->getOperand(3); 86 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && 87 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && 88 "Unexpected VTs!"); 89 MachineFunction &MF = CurDAG->getMachineFunction(); 90 SDLoc DL(N); 91 92 // Create temporary stack for each expanding node. 93 SDValue StackSlot = 94 CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8)); 95 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex(); 96 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 97 98 SDValue Chain = CurDAG->getEntryNode(); 99 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); 100 101 SDValue OffsetSlot = 102 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL); 103 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), 104 Align(8)); 105 106 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 107 108 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); 109 SDValue IntID = 110 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); 111 SDValue Ops[] = {Chain, 112 IntID, 113 Passthru, 114 StackSlot, 115 CurDAG->getRegister(RISCV::X0, MVT::i64), 116 VL}; 117 118 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 119 MVT::i64, MPI, Align(8), 120 MachineMemOperand::MOLoad); 121 break; 122 } 123 } 124 125 if (Result) { 126 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: "); 127 LLVM_DEBUG(N->dump(CurDAG)); 128 LLVM_DEBUG(dbgs() << "\nNew: "); 129 LLVM_DEBUG(Result->dump(CurDAG)); 130 LLVM_DEBUG(dbgs() << "\n"); 131 132 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 133 MadeChange = true; 134 } 135 } 136 137 if (MadeChange) 138 CurDAG->RemoveDeadNodes(); 139 } 140 141 void RISCVDAGToDAGISel::PostprocessISelDAG() { 142 HandleSDNode Dummy(CurDAG->getRoot()); 143 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 144 145 bool MadeChange = false; 146 while (Position != CurDAG->allnodes_begin()) { 147 SDNode *N = &*--Position; 148 // Skip dead nodes and any non-machine opcodes. 149 if (N->use_empty() || !N->isMachineOpcode()) 150 continue; 151 152 MadeChange |= doPeepholeSExtW(N); 153 154 // FIXME: This is here only because the VMerge transform doesn't 155 // know how to handle masked true inputs. Once that has been moved 156 // to post-ISEL, this can be deleted as well. 157 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N)); 158 } 159 160 CurDAG->setRoot(Dummy.getValue()); 161 162 MadeChange |= doPeepholeMergeVVMFold(); 163 164 // After we're done with everything else, convert IMPLICIT_DEF 165 // passthru operands to NoRegister. This is required to workaround 166 // an optimization deficiency in MachineCSE. This really should 167 // be merged back into each of the patterns (i.e. there's no good 168 // reason not to go directly to NoReg), but is being done this way 169 // to allow easy backporting. 170 MadeChange |= doPeepholeNoRegPassThru(); 171 172 if (MadeChange) 173 CurDAG->RemoveDeadNodes(); 174 } 175 176 static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 177 RISCVMatInt::InstSeq &Seq) { 178 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT); 179 for (const RISCVMatInt::Inst &Inst : Seq) { 180 SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT); 181 SDNode *Result = nullptr; 182 switch (Inst.getOpndKind()) { 183 case RISCVMatInt::Imm: 184 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm); 185 break; 186 case RISCVMatInt::RegX0: 187 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, 188 CurDAG->getRegister(RISCV::X0, VT)); 189 break; 190 case RISCVMatInt::RegReg: 191 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg); 192 break; 193 case RISCVMatInt::RegImm: 194 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm); 195 break; 196 } 197 198 // Only the first instruction has X0 as its source. 199 SrcReg = SDValue(Result, 0); 200 } 201 202 return SrcReg; 203 } 204 205 static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 206 int64_t Imm, const RISCVSubtarget &Subtarget) { 207 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget); 208 209 // Use a rematerializable pseudo instruction for short sequences if enabled. 210 if (Seq.size() == 2 && UsePseudoMovImm) 211 return SDValue( 212 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT, 213 CurDAG->getTargetConstant(Imm, DL, VT)), 214 0); 215 216 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at 217 // worst an LUI+ADDIW. This will require an extra register, but avoids a 218 // constant pool. 219 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where 220 // low and high 32 bits are the same and bit 31 and 63 are set. 221 if (Seq.size() > 3) { 222 unsigned ShiftAmt, AddOpc; 223 RISCVMatInt::InstSeq SeqLo = 224 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc); 225 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) { 226 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo); 227 228 SDValue SLLI = SDValue( 229 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo, 230 CurDAG->getTargetConstant(ShiftAmt, DL, VT)), 231 0); 232 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0); 233 } 234 } 235 236 // Otherwise, use the original sequence. 237 return selectImmSeq(CurDAG, DL, VT, Seq); 238 } 239 240 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 241 unsigned NF, RISCVII::VLMUL LMUL) { 242 static const unsigned M1TupleRegClassIDs[] = { 243 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID, 244 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID, 245 RISCV::VRN8M1RegClassID}; 246 static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID, 247 RISCV::VRN3M2RegClassID, 248 RISCV::VRN4M2RegClassID}; 249 250 assert(Regs.size() >= 2 && Regs.size() <= 8); 251 252 unsigned RegClassID; 253 unsigned SubReg0; 254 switch (LMUL) { 255 default: 256 llvm_unreachable("Invalid LMUL."); 257 case RISCVII::VLMUL::LMUL_F8: 258 case RISCVII::VLMUL::LMUL_F4: 259 case RISCVII::VLMUL::LMUL_F2: 260 case RISCVII::VLMUL::LMUL_1: 261 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 262 "Unexpected subreg numbering"); 263 SubReg0 = RISCV::sub_vrm1_0; 264 RegClassID = M1TupleRegClassIDs[NF - 2]; 265 break; 266 case RISCVII::VLMUL::LMUL_2: 267 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 268 "Unexpected subreg numbering"); 269 SubReg0 = RISCV::sub_vrm2_0; 270 RegClassID = M2TupleRegClassIDs[NF - 2]; 271 break; 272 case RISCVII::VLMUL::LMUL_4: 273 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 274 "Unexpected subreg numbering"); 275 SubReg0 = RISCV::sub_vrm4_0; 276 RegClassID = RISCV::VRN2M4RegClassID; 277 break; 278 } 279 280 SDLoc DL(Regs[0]); 281 SmallVector<SDValue, 8> Ops; 282 283 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); 284 285 for (unsigned I = 0; I < Regs.size(); ++I) { 286 Ops.push_back(Regs[I]); 287 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); 288 } 289 SDNode *N = 290 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 291 return SDValue(N, 0); 292 } 293 294 void RISCVDAGToDAGISel::addVectorLoadStoreOperands( 295 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp, 296 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands, 297 bool IsLoad, MVT *IndexVT) { 298 SDValue Chain = Node->getOperand(0); 299 SDValue Glue; 300 301 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer. 302 303 if (IsStridedOrIndexed) { 304 Operands.push_back(Node->getOperand(CurOp++)); // Index. 305 if (IndexVT) 306 *IndexVT = Operands.back()->getSimpleValueType(0); 307 } 308 309 if (IsMasked) { 310 // Mask needs to be copied to V0. 311 SDValue Mask = Node->getOperand(CurOp++); 312 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); 313 Glue = Chain.getValue(1); 314 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); 315 } 316 SDValue VL; 317 selectVLOp(Node->getOperand(CurOp++), VL); 318 Operands.push_back(VL); 319 320 MVT XLenVT = Subtarget->getXLenVT(); 321 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 322 Operands.push_back(SEWOp); 323 324 // At the IR layer, all the masked load intrinsics have policy operands, 325 // none of the others do. All have passthru operands. For our pseudos, 326 // all loads have policy operands. 327 if (IsLoad) { 328 uint64_t Policy = RISCVII::MASK_AGNOSTIC; 329 if (IsMasked) 330 Policy = Node->getConstantOperandVal(CurOp++); 331 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 332 Operands.push_back(PolicyOp); 333 } 334 335 Operands.push_back(Chain); // Chain. 336 if (Glue) 337 Operands.push_back(Glue); 338 } 339 340 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, 341 bool IsStrided) { 342 SDLoc DL(Node); 343 unsigned NF = Node->getNumValues() - 1; 344 MVT VT = Node->getSimpleValueType(0); 345 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 346 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 347 348 unsigned CurOp = 2; 349 SmallVector<SDValue, 8> Operands; 350 351 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 352 Node->op_begin() + CurOp + NF); 353 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL); 354 Operands.push_back(Merge); 355 CurOp += NF; 356 357 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 358 Operands, /*IsLoad=*/true); 359 360 const RISCV::VLSEGPseudo *P = 361 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW, 362 static_cast<unsigned>(LMUL)); 363 MachineSDNode *Load = 364 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 365 366 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 367 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 368 369 SDValue SuperReg = SDValue(Load, 0); 370 for (unsigned I = 0; I < NF; ++I) { 371 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 372 ReplaceUses(SDValue(Node, I), 373 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 374 } 375 376 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 377 CurDAG->RemoveDeadNode(Node); 378 } 379 380 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { 381 SDLoc DL(Node); 382 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain. 383 MVT VT = Node->getSimpleValueType(0); 384 MVT XLenVT = Subtarget->getXLenVT(); 385 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 386 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 387 388 unsigned CurOp = 2; 389 SmallVector<SDValue, 7> Operands; 390 391 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 392 Node->op_begin() + CurOp + NF); 393 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 394 Operands.push_back(MaskedOff); 395 CurOp += NF; 396 397 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 398 /*IsStridedOrIndexed*/ false, Operands, 399 /*IsLoad=*/true); 400 401 const RISCV::VLSEGPseudo *P = 402 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true, 403 Log2SEW, static_cast<unsigned>(LMUL)); 404 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, 405 XLenVT, MVT::Other, Operands); 406 407 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 408 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 409 410 SDValue SuperReg = SDValue(Load, 0); 411 for (unsigned I = 0; I < NF; ++I) { 412 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 413 ReplaceUses(SDValue(Node, I), 414 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 415 } 416 417 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL 418 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain 419 CurDAG->RemoveDeadNode(Node); 420 } 421 422 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, 423 bool IsOrdered) { 424 SDLoc DL(Node); 425 unsigned NF = Node->getNumValues() - 1; 426 MVT VT = Node->getSimpleValueType(0); 427 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 428 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 429 430 unsigned CurOp = 2; 431 SmallVector<SDValue, 8> Operands; 432 433 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 434 Node->op_begin() + CurOp + NF); 435 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 436 Operands.push_back(MaskedOff); 437 CurOp += NF; 438 439 MVT IndexVT; 440 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 441 /*IsStridedOrIndexed*/ true, Operands, 442 /*IsLoad=*/true, &IndexVT); 443 444 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 445 "Element count mismatch"); 446 447 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 448 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 449 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 450 report_fatal_error("The V extension does not support EEW=64 for index " 451 "values when XLEN=32"); 452 } 453 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( 454 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 455 static_cast<unsigned>(IndexLMUL)); 456 MachineSDNode *Load = 457 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 458 459 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 460 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 461 462 SDValue SuperReg = SDValue(Load, 0); 463 for (unsigned I = 0; I < NF; ++I) { 464 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 465 ReplaceUses(SDValue(Node, I), 466 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 467 } 468 469 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 470 CurDAG->RemoveDeadNode(Node); 471 } 472 473 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked, 474 bool IsStrided) { 475 SDLoc DL(Node); 476 unsigned NF = Node->getNumOperands() - 4; 477 if (IsStrided) 478 NF--; 479 if (IsMasked) 480 NF--; 481 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 482 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 483 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 484 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 485 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 486 487 SmallVector<SDValue, 8> Operands; 488 Operands.push_back(StoreVal); 489 unsigned CurOp = 2 + NF; 490 491 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 492 Operands); 493 494 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo( 495 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 496 MachineSDNode *Store = 497 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 498 499 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 500 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 501 502 ReplaceNode(Node, Store); 503 } 504 505 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked, 506 bool IsOrdered) { 507 SDLoc DL(Node); 508 unsigned NF = Node->getNumOperands() - 5; 509 if (IsMasked) 510 --NF; 511 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 512 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 513 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 514 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 515 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 516 517 SmallVector<SDValue, 8> Operands; 518 Operands.push_back(StoreVal); 519 unsigned CurOp = 2 + NF; 520 521 MVT IndexVT; 522 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 523 /*IsStridedOrIndexed*/ true, Operands, 524 /*IsLoad=*/false, &IndexVT); 525 526 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 527 "Element count mismatch"); 528 529 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 530 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 531 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 532 report_fatal_error("The V extension does not support EEW=64 for index " 533 "values when XLEN=32"); 534 } 535 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo( 536 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 537 static_cast<unsigned>(IndexLMUL)); 538 MachineSDNode *Store = 539 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 540 541 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 542 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 543 544 ReplaceNode(Node, Store); 545 } 546 547 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { 548 if (!Subtarget->hasVInstructions()) 549 return; 550 551 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode"); 552 553 SDLoc DL(Node); 554 MVT XLenVT = Subtarget->getXLenVT(); 555 556 unsigned IntNo = Node->getConstantOperandVal(0); 557 558 assert((IntNo == Intrinsic::riscv_vsetvli || 559 IntNo == Intrinsic::riscv_vsetvlimax) && 560 "Unexpected vsetvli intrinsic"); 561 562 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax; 563 unsigned Offset = (VLMax ? 1 : 2); 564 565 assert(Node->getNumOperands() == Offset + 2 && 566 "Unexpected number of operands"); 567 568 unsigned SEW = 569 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7); 570 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>( 571 Node->getConstantOperandVal(Offset + 1) & 0x7); 572 573 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true, 574 /*MaskAgnostic*/ true); 575 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); 576 577 SDValue VLOperand; 578 unsigned Opcode = RISCV::PseudoVSETVLI; 579 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) { 580 const unsigned VLEN = Subtarget->getRealMinVLen(); 581 if (VLEN == Subtarget->getRealMaxVLen()) 582 if (VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue()) 583 VLMax = true; 584 } 585 if (VLMax || isAllOnesConstant(Node->getOperand(1))) { 586 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); 587 Opcode = RISCV::PseudoVSETVLIX0; 588 } else { 589 VLOperand = Node->getOperand(1); 590 591 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) { 592 uint64_t AVL = C->getZExtValue(); 593 if (isUInt<5>(AVL)) { 594 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); 595 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, 596 XLenVT, VLImm, VTypeIOp)); 597 return; 598 } 599 } 600 } 601 602 ReplaceNode(Node, 603 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp)); 604 } 605 606 bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) { 607 MVT VT = Node->getSimpleValueType(0); 608 unsigned Opcode = Node->getOpcode(); 609 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) && 610 "Unexpected opcode"); 611 SDLoc DL(Node); 612 613 // For operations of the form (x << C1) op C2, check if we can use 614 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1. 615 SDValue N0 = Node->getOperand(0); 616 SDValue N1 = Node->getOperand(1); 617 618 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1); 619 if (!Cst) 620 return false; 621 622 int64_t Val = Cst->getSExtValue(); 623 624 // Check if immediate can already use ANDI/ORI/XORI. 625 if (isInt<12>(Val)) 626 return false; 627 628 SDValue Shift = N0; 629 630 // If Val is simm32 and we have a sext_inreg from i32, then the binop 631 // produces at least 33 sign bits. We can peek through the sext_inreg and use 632 // a SLLIW at the end. 633 bool SignExt = false; 634 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 635 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) { 636 SignExt = true; 637 Shift = N0.getOperand(0); 638 } 639 640 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse()) 641 return false; 642 643 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); 644 if (!ShlCst) 645 return false; 646 647 uint64_t ShAmt = ShlCst->getZExtValue(); 648 649 // Make sure that we don't change the operation by removing bits. 650 // This only matters for OR and XOR, AND is unaffected. 651 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt); 652 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) 653 return false; 654 655 int64_t ShiftedVal = Val >> ShAmt; 656 if (!isInt<12>(ShiftedVal)) 657 return false; 658 659 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW. 660 if (SignExt && ShAmt >= 32) 661 return false; 662 663 // Ok, we can reorder to get a smaller immediate. 664 unsigned BinOpc; 665 switch (Opcode) { 666 default: llvm_unreachable("Unexpected opcode"); 667 case ISD::AND: BinOpc = RISCV::ANDI; break; 668 case ISD::OR: BinOpc = RISCV::ORI; break; 669 case ISD::XOR: BinOpc = RISCV::XORI; break; 670 } 671 672 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI; 673 674 SDNode *BinOp = 675 CurDAG->getMachineNode(BinOpc, DL, VT, Shift.getOperand(0), 676 CurDAG->getTargetConstant(ShiftedVal, DL, VT)); 677 SDNode *SLLI = 678 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0), 679 CurDAG->getTargetConstant(ShAmt, DL, VT)); 680 ReplaceNode(Node, SLLI); 681 return true; 682 } 683 684 bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) { 685 // Only supported with XTHeadBb at the moment. 686 if (!Subtarget->hasVendorXTHeadBb()) 687 return false; 688 689 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 690 if (!N1C) 691 return false; 692 693 SDValue N0 = Node->getOperand(0); 694 if (!N0.hasOneUse()) 695 return false; 696 697 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL, 698 MVT VT) { 699 return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0), 700 CurDAG->getTargetConstant(Msb, DL, VT), 701 CurDAG->getTargetConstant(Lsb, DL, VT)); 702 }; 703 704 SDLoc DL(Node); 705 MVT VT = Node->getSimpleValueType(0); 706 const unsigned RightShAmt = N1C->getZExtValue(); 707 708 // Transform (sra (shl X, C1) C2) with C1 < C2 709 // -> (TH.EXT X, msb, lsb) 710 if (N0.getOpcode() == ISD::SHL) { 711 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 712 if (!N01C) 713 return false; 714 715 const unsigned LeftShAmt = N01C->getZExtValue(); 716 // Make sure that this is a bitfield extraction (i.e., the shift-right 717 // amount can not be less than the left-shift). 718 if (LeftShAmt > RightShAmt) 719 return false; 720 721 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt; 722 const unsigned Msb = MsbPlusOne - 1; 723 const unsigned Lsb = RightShAmt - LeftShAmt; 724 725 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT); 726 ReplaceNode(Node, TH_EXT); 727 return true; 728 } 729 730 // Transform (sra (sext_inreg X, _), C) -> 731 // (TH.EXT X, msb, lsb) 732 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) { 733 unsigned ExtSize = 734 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 735 736 // ExtSize of 32 should use sraiw via tablegen pattern. 737 if (ExtSize == 32) 738 return false; 739 740 const unsigned Msb = ExtSize - 1; 741 const unsigned Lsb = RightShAmt; 742 743 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT); 744 ReplaceNode(Node, TH_EXT); 745 return true; 746 } 747 748 return false; 749 } 750 751 bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) { 752 // Target does not support indexed loads. 753 if (!Subtarget->hasVendorXTHeadMemIdx()) 754 return false; 755 756 LoadSDNode *Ld = cast<LoadSDNode>(Node); 757 ISD::MemIndexedMode AM = Ld->getAddressingMode(); 758 if (AM == ISD::UNINDEXED) 759 return false; 760 761 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset()); 762 if (!C) 763 return false; 764 765 EVT LoadVT = Ld->getMemoryVT(); 766 bool IsPre = (AM == ISD::PRE_INC || AM == ISD::PRE_DEC); 767 bool IsPost = (AM == ISD::POST_INC || AM == ISD::POST_DEC); 768 int64_t Offset = C->getSExtValue(); 769 770 // Convert decrements to increments by a negative quantity. 771 if (AM == ISD::PRE_DEC || AM == ISD::POST_DEC) 772 Offset = -Offset; 773 774 // The constants that can be encoded in the THeadMemIdx instructions 775 // are of the form (sign_extend(imm5) << imm2). 776 int64_t Shift; 777 for (Shift = 0; Shift < 4; Shift++) 778 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) 779 break; 780 781 // Constant cannot be encoded. 782 if (Shift == 4) 783 return false; 784 785 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD); 786 unsigned Opcode; 787 if (LoadVT == MVT::i8 && IsPre) 788 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB; 789 else if (LoadVT == MVT::i8 && IsPost) 790 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA; 791 else if (LoadVT == MVT::i16 && IsPre) 792 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB; 793 else if (LoadVT == MVT::i16 && IsPost) 794 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA; 795 else if (LoadVT == MVT::i32 && IsPre) 796 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB; 797 else if (LoadVT == MVT::i32 && IsPost) 798 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA; 799 else if (LoadVT == MVT::i64 && IsPre) 800 Opcode = RISCV::TH_LDIB; 801 else if (LoadVT == MVT::i64 && IsPost) 802 Opcode = RISCV::TH_LDIA; 803 else 804 return false; 805 806 EVT Ty = Ld->getOffset().getValueType(); 807 SDValue Ops[] = {Ld->getBasePtr(), 808 CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty), 809 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), 810 Ld->getChain()}; 811 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0), 812 Ld->getValueType(1), MVT::Other, Ops); 813 814 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand(); 815 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp}); 816 817 ReplaceNode(Node, New); 818 819 return true; 820 } 821 822 void RISCVDAGToDAGISel::Select(SDNode *Node) { 823 // If we have a custom node, we have already selected. 824 if (Node->isMachineOpcode()) { 825 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); 826 Node->setNodeId(-1); 827 return; 828 } 829 830 // Instruction Selection not handled by the auto-generated tablegen selection 831 // should be handled here. 832 unsigned Opcode = Node->getOpcode(); 833 MVT XLenVT = Subtarget->getXLenVT(); 834 SDLoc DL(Node); 835 MVT VT = Node->getSimpleValueType(0); 836 837 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs(); 838 839 switch (Opcode) { 840 case ISD::Constant: { 841 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT"); 842 auto *ConstNode = cast<ConstantSDNode>(Node); 843 if (ConstNode->isZero()) { 844 SDValue New = 845 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT); 846 ReplaceNode(Node, New.getNode()); 847 return; 848 } 849 int64_t Imm = ConstNode->getSExtValue(); 850 // If the upper XLen-16 bits are not used, try to convert this to a simm12 851 // by sign extending bit 15. 852 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) && 853 hasAllHUsers(Node)) 854 Imm = SignExtend64<16>(Imm); 855 // If the upper 32-bits are not used try to convert this into a simm32 by 856 // sign extending bit 32. 857 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) 858 Imm = SignExtend64<32>(Imm); 859 860 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode()); 861 return; 862 } 863 case ISD::ConstantFP: { 864 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF(); 865 auto [FPImm, NeedsFNeg] = 866 static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF, 867 VT); 868 if (FPImm >= 0) { 869 unsigned Opc; 870 unsigned FNegOpc; 871 switch (VT.SimpleTy) { 872 default: 873 llvm_unreachable("Unexpected size"); 874 case MVT::f16: 875 Opc = RISCV::FLI_H; 876 FNegOpc = RISCV::FSGNJN_H; 877 break; 878 case MVT::f32: 879 Opc = RISCV::FLI_S; 880 FNegOpc = RISCV::FSGNJN_S; 881 break; 882 case MVT::f64: 883 Opc = RISCV::FLI_D; 884 FNegOpc = RISCV::FSGNJN_D; 885 break; 886 } 887 SDNode *Res = CurDAG->getMachineNode( 888 Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT)); 889 if (NeedsFNeg) 890 Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0), 891 SDValue(Res, 0)); 892 893 ReplaceNode(Node, Res); 894 return; 895 } 896 897 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64; 898 SDValue Imm; 899 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will 900 // create an integer immediate. 901 if (APF.isPosZero() || NegZeroF64) 902 Imm = CurDAG->getRegister(RISCV::X0, XLenVT); 903 else 904 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(), 905 *Subtarget); 906 907 bool HasZdinx = Subtarget->hasStdExtZdinx(); 908 bool Is64Bit = Subtarget->is64Bit(); 909 unsigned Opc; 910 switch (VT.SimpleTy) { 911 default: 912 llvm_unreachable("Unexpected size"); 913 case MVT::bf16: 914 assert(Subtarget->hasStdExtZfbfmin()); 915 Opc = RISCV::FMV_H_X; 916 break; 917 case MVT::f16: 918 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X; 919 break; 920 case MVT::f32: 921 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X; 922 break; 923 case MVT::f64: 924 // For RV32, we can't move from a GPR, we need to convert instead. This 925 // should only happen for +0.0 and -0.0. 926 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant"); 927 if (Is64Bit) 928 Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X; 929 else 930 Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W; 931 break; 932 } 933 934 SDNode *Res; 935 if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W) 936 Res = CurDAG->getMachineNode( 937 Opc, DL, VT, Imm, 938 CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT)); 939 else 940 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm); 941 942 // For f64 -0.0, we need to insert a fneg.d idiom. 943 if (NegZeroF64) { 944 Opc = RISCV::FSGNJN_D; 945 if (HasZdinx) 946 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X; 947 Res = 948 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0)); 949 } 950 951 ReplaceNode(Node, Res); 952 return; 953 } 954 case RISCVISD::SplitF64: { 955 if (!Subtarget->hasStdExtZfa()) 956 break; 957 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() && 958 "Unexpected subtarget"); 959 960 // With Zfa, lower to fmv.x.w and fmvh.x.d. 961 if (!SDValue(Node, 0).use_empty()) { 962 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT, 963 Node->getOperand(0)); 964 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0)); 965 } 966 if (!SDValue(Node, 1).use_empty()) { 967 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT, 968 Node->getOperand(0)); 969 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0)); 970 } 971 972 CurDAG->RemoveDeadNode(Node); 973 return; 974 } 975 case ISD::SHL: { 976 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 977 if (!N1C) 978 break; 979 SDValue N0 = Node->getOperand(0); 980 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 981 !isa<ConstantSDNode>(N0.getOperand(1))) 982 break; 983 unsigned ShAmt = N1C->getZExtValue(); 984 uint64_t Mask = N0.getConstantOperandVal(1); 985 986 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has 987 // 32 leading zeros and C3 trailing zeros. 988 if (ShAmt <= 32 && isShiftedMask_64(Mask)) { 989 unsigned XLen = Subtarget->getXLen(); 990 unsigned LeadingZeros = XLen - llvm::bit_width(Mask); 991 unsigned TrailingZeros = llvm::countr_zero(Mask); 992 if (TrailingZeros > 0 && LeadingZeros == 32) { 993 SDNode *SRLIW = CurDAG->getMachineNode( 994 RISCV::SRLIW, DL, VT, N0->getOperand(0), 995 CurDAG->getTargetConstant(TrailingZeros, DL, VT)); 996 SDNode *SLLI = CurDAG->getMachineNode( 997 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 998 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT)); 999 ReplaceNode(Node, SLLI); 1000 return; 1001 } 1002 } 1003 break; 1004 } 1005 case ISD::SRL: { 1006 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1007 if (!N1C) 1008 break; 1009 SDValue N0 = Node->getOperand(0); 1010 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 1011 break; 1012 unsigned ShAmt = N1C->getZExtValue(); 1013 uint64_t Mask = N0.getConstantOperandVal(1); 1014 1015 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has 1016 // 32 leading zeros and C3 trailing zeros. 1017 if (isShiftedMask_64(Mask) && N0.hasOneUse()) { 1018 unsigned XLen = Subtarget->getXLen(); 1019 unsigned LeadingZeros = XLen - llvm::bit_width(Mask); 1020 unsigned TrailingZeros = llvm::countr_zero(Mask); 1021 if (LeadingZeros == 32 && TrailingZeros > ShAmt) { 1022 SDNode *SRLIW = CurDAG->getMachineNode( 1023 RISCV::SRLIW, DL, VT, N0->getOperand(0), 1024 CurDAG->getTargetConstant(TrailingZeros, DL, VT)); 1025 SDNode *SLLI = CurDAG->getMachineNode( 1026 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 1027 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT)); 1028 ReplaceNode(Node, SLLI); 1029 return; 1030 } 1031 } 1032 1033 // Optimize (srl (and X, C2), C) -> 1034 // (srli (slli X, (XLen-C3), (XLen-C3) + C) 1035 // Where C2 is a mask with C3 trailing ones. 1036 // Taking into account that the C2 may have had lower bits unset by 1037 // SimplifyDemandedBits. This avoids materializing the C2 immediate. 1038 // This pattern occurs when type legalizing right shifts for types with 1039 // less than XLen bits. 1040 Mask |= maskTrailingOnes<uint64_t>(ShAmt); 1041 if (!isMask_64(Mask)) 1042 break; 1043 unsigned TrailingOnes = llvm::countr_one(Mask); 1044 if (ShAmt >= TrailingOnes) 1045 break; 1046 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64. 1047 if (TrailingOnes == 32) { 1048 SDNode *SRLI = CurDAG->getMachineNode( 1049 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT, 1050 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT)); 1051 ReplaceNode(Node, SRLI); 1052 return; 1053 } 1054 1055 // Only do the remaining transforms if the AND has one use. 1056 if (!N0.hasOneUse()) 1057 break; 1058 1059 // If C2 is (1 << ShAmt) use bexti or th.tst if possible. 1060 if (HasBitTest && ShAmt + 1 == TrailingOnes) { 1061 SDNode *BEXTI = CurDAG->getMachineNode( 1062 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT, 1063 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT)); 1064 ReplaceNode(Node, BEXTI); 1065 return; 1066 } 1067 1068 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; 1069 SDNode *SLLI = 1070 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 1071 CurDAG->getTargetConstant(LShAmt, DL, VT)); 1072 SDNode *SRLI = CurDAG->getMachineNode( 1073 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 1074 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 1075 ReplaceNode(Node, SRLI); 1076 return; 1077 } 1078 case ISD::SRA: { 1079 if (trySignedBitfieldExtract(Node)) 1080 return; 1081 1082 // Optimize (sra (sext_inreg X, i16), C) -> 1083 // (srai (slli X, (XLen-16), (XLen-16) + C) 1084 // And (sra (sext_inreg X, i8), C) -> 1085 // (srai (slli X, (XLen-8), (XLen-8) + C) 1086 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal. 1087 // This transform matches the code we get without Zbb. The shifts are more 1088 // compressible, and this can help expose CSE opportunities in the sdiv by 1089 // constant optimization. 1090 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1091 if (!N1C) 1092 break; 1093 SDValue N0 = Node->getOperand(0); 1094 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse()) 1095 break; 1096 unsigned ShAmt = N1C->getZExtValue(); 1097 unsigned ExtSize = 1098 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 1099 // ExtSize of 32 should use sraiw via tablegen pattern. 1100 if (ExtSize >= 32 || ShAmt >= ExtSize) 1101 break; 1102 unsigned LShAmt = Subtarget->getXLen() - ExtSize; 1103 SDNode *SLLI = 1104 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 1105 CurDAG->getTargetConstant(LShAmt, DL, VT)); 1106 SDNode *SRAI = CurDAG->getMachineNode( 1107 RISCV::SRAI, DL, VT, SDValue(SLLI, 0), 1108 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 1109 ReplaceNode(Node, SRAI); 1110 return; 1111 } 1112 case ISD::OR: 1113 case ISD::XOR: 1114 if (tryShrinkShlLogicImm(Node)) 1115 return; 1116 1117 break; 1118 case ISD::AND: { 1119 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1120 if (!N1C) 1121 break; 1122 uint64_t C1 = N1C->getZExtValue(); 1123 const bool isC1Mask = isMask_64(C1); 1124 const bool isC1ANDI = isInt<12>(C1); 1125 1126 SDValue N0 = Node->getOperand(0); 1127 1128 auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT, 1129 SDValue X, unsigned Msb, 1130 unsigned Lsb) { 1131 if (!Subtarget->hasVendorXTHeadBb()) 1132 return false; 1133 1134 SDNode *TH_EXTU = CurDAG->getMachineNode( 1135 RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT), 1136 CurDAG->getTargetConstant(Lsb, DL, VT)); 1137 ReplaceNode(Node, TH_EXTU); 1138 return true; 1139 }; 1140 1141 bool LeftShift = N0.getOpcode() == ISD::SHL; 1142 if (LeftShift || N0.getOpcode() == ISD::SRL) { 1143 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 1144 if (!C) 1145 break; 1146 unsigned C2 = C->getZExtValue(); 1147 unsigned XLen = Subtarget->getXLen(); 1148 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!"); 1149 1150 // Keep track of whether this is a c.andi. If we can't use c.andi, the 1151 // shift pair might offer more compression opportunities. 1152 // TODO: We could check for C extension here, but we don't have many lit 1153 // tests with the C extension enabled so not checking gets better 1154 // coverage. 1155 // TODO: What if ANDI faster than shift? 1156 bool IsCANDI = isInt<6>(N1C->getSExtValue()); 1157 1158 // Clear irrelevant bits in the mask. 1159 if (LeftShift) 1160 C1 &= maskTrailingZeros<uint64_t>(C2); 1161 else 1162 C1 &= maskTrailingOnes<uint64_t>(XLen - C2); 1163 1164 // Some transforms should only be done if the shift has a single use or 1165 // the AND would become (srli (slli X, 32), 32) 1166 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF); 1167 1168 SDValue X = N0.getOperand(0); 1169 1170 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask 1171 // with c3 leading zeros. 1172 if (!LeftShift && isC1Mask) { 1173 unsigned Leading = XLen - llvm::bit_width(C1); 1174 if (C2 < Leading) { 1175 // If the number of leading zeros is C2+32 this can be SRLIW. 1176 if (C2 + 32 == Leading) { 1177 SDNode *SRLIW = CurDAG->getMachineNode( 1178 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); 1179 ReplaceNode(Node, SRLIW); 1180 return; 1181 } 1182 1183 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) 1184 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1. 1185 // 1186 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type 1187 // legalized and goes through DAG combine. 1188 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() && 1189 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 1190 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) { 1191 SDNode *SRAIW = 1192 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0), 1193 CurDAG->getTargetConstant(31, DL, VT)); 1194 SDNode *SRLIW = CurDAG->getMachineNode( 1195 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0), 1196 CurDAG->getTargetConstant(Leading - 32, DL, VT)); 1197 ReplaceNode(Node, SRLIW); 1198 return; 1199 } 1200 1201 // Try to use an unsigned bitfield extract (e.g., th.extu) if 1202 // available. 1203 // Transform (and (srl x, C2), C1) 1204 // -> (<bfextract> x, msb, lsb) 1205 // 1206 // Make sure to keep this below the SRLIW cases, as we always want to 1207 // prefer the more common instruction. 1208 const unsigned Msb = llvm::bit_width(C1) + C2 - 1; 1209 const unsigned Lsb = C2; 1210 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb)) 1211 return; 1212 1213 // (srli (slli x, c3-c2), c3). 1214 // Skip if we could use (zext.w (sraiw X, C2)). 1215 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 && 1216 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 1217 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32; 1218 // Also Skip if we can use bexti or th.tst. 1219 Skip |= HasBitTest && Leading == XLen - 1; 1220 if (OneUseOrZExtW && !Skip) { 1221 SDNode *SLLI = CurDAG->getMachineNode( 1222 RISCV::SLLI, DL, VT, X, 1223 CurDAG->getTargetConstant(Leading - C2, DL, VT)); 1224 SDNode *SRLI = CurDAG->getMachineNode( 1225 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 1226 CurDAG->getTargetConstant(Leading, DL, VT)); 1227 ReplaceNode(Node, SRLI); 1228 return; 1229 } 1230 } 1231 } 1232 1233 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask 1234 // shifted by c2 bits with c3 leading zeros. 1235 if (LeftShift && isShiftedMask_64(C1)) { 1236 unsigned Leading = XLen - llvm::bit_width(C1); 1237 1238 if (C2 + Leading < XLen && 1239 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) { 1240 // Use slli.uw when possible. 1241 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) { 1242 SDNode *SLLI_UW = 1243 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X, 1244 CurDAG->getTargetConstant(C2, DL, VT)); 1245 ReplaceNode(Node, SLLI_UW); 1246 return; 1247 } 1248 1249 // (srli (slli c2+c3), c3) 1250 if (OneUseOrZExtW && !IsCANDI) { 1251 SDNode *SLLI = CurDAG->getMachineNode( 1252 RISCV::SLLI, DL, VT, X, 1253 CurDAG->getTargetConstant(C2 + Leading, DL, VT)); 1254 SDNode *SRLI = CurDAG->getMachineNode( 1255 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 1256 CurDAG->getTargetConstant(Leading, DL, VT)); 1257 ReplaceNode(Node, SRLI); 1258 return; 1259 } 1260 } 1261 } 1262 1263 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a 1264 // shifted mask with c2 leading zeros and c3 trailing zeros. 1265 if (!LeftShift && isShiftedMask_64(C1)) { 1266 unsigned Leading = XLen - llvm::bit_width(C1); 1267 unsigned Trailing = llvm::countr_zero(C1); 1268 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW && 1269 !IsCANDI) { 1270 unsigned SrliOpc = RISCV::SRLI; 1271 // If the input is zexti32 we should use SRLIW. 1272 if (X.getOpcode() == ISD::AND && 1273 isa<ConstantSDNode>(X.getOperand(1)) && 1274 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) { 1275 SrliOpc = RISCV::SRLIW; 1276 X = X.getOperand(0); 1277 } 1278 SDNode *SRLI = CurDAG->getMachineNode( 1279 SrliOpc, DL, VT, X, 1280 CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); 1281 SDNode *SLLI = CurDAG->getMachineNode( 1282 RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 1283 CurDAG->getTargetConstant(Trailing, DL, VT)); 1284 ReplaceNode(Node, SLLI); 1285 return; 1286 } 1287 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. 1288 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 && 1289 OneUseOrZExtW && !IsCANDI) { 1290 SDNode *SRLIW = CurDAG->getMachineNode( 1291 RISCV::SRLIW, DL, VT, X, 1292 CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); 1293 SDNode *SLLI = CurDAG->getMachineNode( 1294 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 1295 CurDAG->getTargetConstant(Trailing, DL, VT)); 1296 ReplaceNode(Node, SLLI); 1297 return; 1298 } 1299 } 1300 1301 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a 1302 // shifted mask with no leading zeros and c3 trailing zeros. 1303 if (LeftShift && isShiftedMask_64(C1)) { 1304 unsigned Leading = XLen - llvm::bit_width(C1); 1305 unsigned Trailing = llvm::countr_zero(C1); 1306 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) { 1307 SDNode *SRLI = CurDAG->getMachineNode( 1308 RISCV::SRLI, DL, VT, X, 1309 CurDAG->getTargetConstant(Trailing - C2, DL, VT)); 1310 SDNode *SLLI = CurDAG->getMachineNode( 1311 RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 1312 CurDAG->getTargetConstant(Trailing, DL, VT)); 1313 ReplaceNode(Node, SLLI); 1314 return; 1315 } 1316 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. 1317 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) { 1318 SDNode *SRLIW = CurDAG->getMachineNode( 1319 RISCV::SRLIW, DL, VT, X, 1320 CurDAG->getTargetConstant(Trailing - C2, DL, VT)); 1321 SDNode *SLLI = CurDAG->getMachineNode( 1322 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 1323 CurDAG->getTargetConstant(Trailing, DL, VT)); 1324 ReplaceNode(Node, SLLI); 1325 return; 1326 } 1327 } 1328 } 1329 1330 // If C1 masks off the upper bits only (but can't be formed as an 1331 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if 1332 // available. 1333 // Transform (and x, C1) 1334 // -> (<bfextract> x, msb, lsb) 1335 if (isC1Mask && !isC1ANDI) { 1336 const unsigned Msb = llvm::bit_width(C1) - 1; 1337 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0)) 1338 return; 1339 } 1340 1341 if (tryShrinkShlLogicImm(Node)) 1342 return; 1343 1344 break; 1345 } 1346 case ISD::MUL: { 1347 // Special case for calculating (mul (and X, C2), C1) where the full product 1348 // fits in XLen bits. We can shift X left by the number of leading zeros in 1349 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final 1350 // product has XLen trailing zeros, putting it in the output of MULHU. This 1351 // can avoid materializing a constant in a register for C2. 1352 1353 // RHS should be a constant. 1354 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1355 if (!N1C || !N1C->hasOneUse()) 1356 break; 1357 1358 // LHS should be an AND with constant. 1359 SDValue N0 = Node->getOperand(0); 1360 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 1361 break; 1362 1363 uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 1364 1365 // Constant should be a mask. 1366 if (!isMask_64(C2)) 1367 break; 1368 1369 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has 1370 // multiple users or the constant is a simm12. This prevents inserting a 1371 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely 1372 // make it more costly to materialize. Otherwise, using a SLLI might allow 1373 // it to be compressed. 1374 bool IsANDIOrZExt = 1375 isInt<12>(C2) || 1376 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb()); 1377 // With XTHeadBb, we can use TH.EXTU. 1378 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb(); 1379 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse())) 1380 break; 1381 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or 1382 // the constant is a simm32. 1383 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba(); 1384 // With XTHeadBb, we can use TH.EXTU. 1385 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb(); 1386 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse())) 1387 break; 1388 1389 // We need to shift left the AND input and C1 by a total of XLen bits. 1390 1391 // How far left do we need to shift the AND input? 1392 unsigned XLen = Subtarget->getXLen(); 1393 unsigned LeadingZeros = XLen - llvm::bit_width(C2); 1394 1395 // The constant gets shifted by the remaining amount unless that would 1396 // shift bits out. 1397 uint64_t C1 = N1C->getZExtValue(); 1398 unsigned ConstantShift = XLen - LeadingZeros; 1399 if (ConstantShift > (XLen - llvm::bit_width(C1))) 1400 break; 1401 1402 uint64_t ShiftedC1 = C1 << ConstantShift; 1403 // If this RV32, we need to sign extend the constant. 1404 if (XLen == 32) 1405 ShiftedC1 = SignExtend64<32>(ShiftedC1); 1406 1407 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). 1408 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode(); 1409 SDNode *SLLI = 1410 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), 1411 CurDAG->getTargetConstant(LeadingZeros, DL, VT)); 1412 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT, 1413 SDValue(SLLI, 0), SDValue(Imm, 0)); 1414 ReplaceNode(Node, MULHU); 1415 return; 1416 } 1417 case ISD::LOAD: { 1418 if (tryIndexedLoad(Node)) 1419 return; 1420 break; 1421 } 1422 case ISD::INTRINSIC_WO_CHAIN: { 1423 unsigned IntNo = Node->getConstantOperandVal(0); 1424 switch (IntNo) { 1425 // By default we do not custom select any intrinsic. 1426 default: 1427 break; 1428 case Intrinsic::riscv_vmsgeu: 1429 case Intrinsic::riscv_vmsge: { 1430 SDValue Src1 = Node->getOperand(1); 1431 SDValue Src2 = Node->getOperand(2); 1432 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu; 1433 bool IsCmpUnsignedZero = false; 1434 // Only custom select scalar second operand. 1435 if (Src2.getValueType() != XLenVT) 1436 break; 1437 // Small constants are handled with patterns. 1438 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1439 int64_t CVal = C->getSExtValue(); 1440 if (CVal >= -15 && CVal <= 16) { 1441 if (!IsUnsigned || CVal != 0) 1442 break; 1443 IsCmpUnsignedZero = true; 1444 } 1445 } 1446 MVT Src1VT = Src1.getSimpleValueType(); 1447 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode; 1448 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1449 default: 1450 llvm_unreachable("Unexpected LMUL!"); 1451 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ 1452 case RISCVII::VLMUL::lmulenum: \ 1453 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1454 : RISCV::PseudoVMSLT_VX_##suffix; \ 1455 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \ 1456 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ 1457 break; 1458 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1) 1459 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2) 1460 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4) 1461 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8) 1462 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16) 1463 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32) 1464 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64) 1465 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES 1466 } 1467 SDValue SEW = CurDAG->getTargetConstant( 1468 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1469 SDValue VL; 1470 selectVLOp(Node->getOperand(3), VL); 1471 1472 // If vmsgeu with 0 immediate, expand it to vmset. 1473 if (IsCmpUnsignedZero) { 1474 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW)); 1475 return; 1476 } 1477 1478 // Expand to 1479 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd 1480 SDValue Cmp = SDValue( 1481 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1482 0); 1483 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT, 1484 {Cmp, Cmp, VL, SEW})); 1485 return; 1486 } 1487 case Intrinsic::riscv_vmsgeu_mask: 1488 case Intrinsic::riscv_vmsge_mask: { 1489 SDValue Src1 = Node->getOperand(2); 1490 SDValue Src2 = Node->getOperand(3); 1491 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; 1492 bool IsCmpUnsignedZero = false; 1493 // Only custom select scalar second operand. 1494 if (Src2.getValueType() != XLenVT) 1495 break; 1496 // Small constants are handled with patterns. 1497 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1498 int64_t CVal = C->getSExtValue(); 1499 if (CVal >= -15 && CVal <= 16) { 1500 if (!IsUnsigned || CVal != 0) 1501 break; 1502 IsCmpUnsignedZero = true; 1503 } 1504 } 1505 MVT Src1VT = Src1.getSimpleValueType(); 1506 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, 1507 VMOROpcode; 1508 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1509 default: 1510 llvm_unreachable("Unexpected LMUL!"); 1511 #define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \ 1512 case RISCVII::VLMUL::lmulenum: \ 1513 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1514 : RISCV::PseudoVMSLT_VX_##suffix; \ 1515 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ 1516 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ 1517 break; 1518 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1) 1519 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2) 1520 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4) 1521 CASE_VMSLT_OPCODES(LMUL_1, M1, B8) 1522 CASE_VMSLT_OPCODES(LMUL_2, M2, B16) 1523 CASE_VMSLT_OPCODES(LMUL_4, M4, B32) 1524 CASE_VMSLT_OPCODES(LMUL_8, M8, B64) 1525 #undef CASE_VMSLT_OPCODES 1526 } 1527 // Mask operations use the LMUL from the mask type. 1528 switch (RISCVTargetLowering::getLMUL(VT)) { 1529 default: 1530 llvm_unreachable("Unexpected LMUL!"); 1531 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \ 1532 case RISCVII::VLMUL::lmulenum: \ 1533 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ 1534 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ 1535 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \ 1536 break; 1537 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8) 1538 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4) 1539 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2) 1540 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1) 1541 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2) 1542 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4) 1543 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8) 1544 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES 1545 } 1546 SDValue SEW = CurDAG->getTargetConstant( 1547 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1548 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT); 1549 SDValue VL; 1550 selectVLOp(Node->getOperand(5), VL); 1551 SDValue MaskedOff = Node->getOperand(1); 1552 SDValue Mask = Node->getOperand(4); 1553 1554 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff. 1555 if (IsCmpUnsignedZero) { 1556 // We don't need vmor if the MaskedOff and the Mask are the same 1557 // value. 1558 if (Mask == MaskedOff) { 1559 ReplaceUses(Node, Mask.getNode()); 1560 return; 1561 } 1562 ReplaceNode(Node, 1563 CurDAG->getMachineNode(VMOROpcode, DL, VT, 1564 {Mask, MaskedOff, VL, MaskSEW})); 1565 return; 1566 } 1567 1568 // If the MaskedOff value and the Mask are the same value use 1569 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt 1570 // This avoids needing to copy v0 to vd before starting the next sequence. 1571 if (Mask == MaskedOff) { 1572 SDValue Cmp = SDValue( 1573 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1574 0); 1575 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT, 1576 {Mask, Cmp, VL, MaskSEW})); 1577 return; 1578 } 1579 1580 // Mask needs to be copied to V0. 1581 SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 1582 RISCV::V0, Mask, SDValue()); 1583 SDValue Glue = Chain.getValue(1); 1584 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT); 1585 1586 // Otherwise use 1587 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 1588 // The result is mask undisturbed. 1589 // We use the same instructions to emulate mask agnostic behavior, because 1590 // the agnostic result can be either undisturbed or all 1. 1591 SDValue Cmp = SDValue( 1592 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, 1593 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), 1594 0); 1595 // vmxor.mm vd, vd, v0 is used to update active value. 1596 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, 1597 {Cmp, Mask, VL, MaskSEW})); 1598 return; 1599 } 1600 case Intrinsic::riscv_vsetvli: 1601 case Intrinsic::riscv_vsetvlimax: 1602 return selectVSETVLI(Node); 1603 } 1604 break; 1605 } 1606 case ISD::INTRINSIC_W_CHAIN: { 1607 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1608 switch (IntNo) { 1609 // By default we do not custom select any intrinsic. 1610 default: 1611 break; 1612 case Intrinsic::riscv_vlseg2: 1613 case Intrinsic::riscv_vlseg3: 1614 case Intrinsic::riscv_vlseg4: 1615 case Intrinsic::riscv_vlseg5: 1616 case Intrinsic::riscv_vlseg6: 1617 case Intrinsic::riscv_vlseg7: 1618 case Intrinsic::riscv_vlseg8: { 1619 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1620 return; 1621 } 1622 case Intrinsic::riscv_vlseg2_mask: 1623 case Intrinsic::riscv_vlseg3_mask: 1624 case Intrinsic::riscv_vlseg4_mask: 1625 case Intrinsic::riscv_vlseg5_mask: 1626 case Intrinsic::riscv_vlseg6_mask: 1627 case Intrinsic::riscv_vlseg7_mask: 1628 case Intrinsic::riscv_vlseg8_mask: { 1629 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1630 return; 1631 } 1632 case Intrinsic::riscv_vlsseg2: 1633 case Intrinsic::riscv_vlsseg3: 1634 case Intrinsic::riscv_vlsseg4: 1635 case Intrinsic::riscv_vlsseg5: 1636 case Intrinsic::riscv_vlsseg6: 1637 case Intrinsic::riscv_vlsseg7: 1638 case Intrinsic::riscv_vlsseg8: { 1639 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1640 return; 1641 } 1642 case Intrinsic::riscv_vlsseg2_mask: 1643 case Intrinsic::riscv_vlsseg3_mask: 1644 case Intrinsic::riscv_vlsseg4_mask: 1645 case Intrinsic::riscv_vlsseg5_mask: 1646 case Intrinsic::riscv_vlsseg6_mask: 1647 case Intrinsic::riscv_vlsseg7_mask: 1648 case Intrinsic::riscv_vlsseg8_mask: { 1649 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1650 return; 1651 } 1652 case Intrinsic::riscv_vloxseg2: 1653 case Intrinsic::riscv_vloxseg3: 1654 case Intrinsic::riscv_vloxseg4: 1655 case Intrinsic::riscv_vloxseg5: 1656 case Intrinsic::riscv_vloxseg6: 1657 case Intrinsic::riscv_vloxseg7: 1658 case Intrinsic::riscv_vloxseg8: 1659 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1660 return; 1661 case Intrinsic::riscv_vluxseg2: 1662 case Intrinsic::riscv_vluxseg3: 1663 case Intrinsic::riscv_vluxseg4: 1664 case Intrinsic::riscv_vluxseg5: 1665 case Intrinsic::riscv_vluxseg6: 1666 case Intrinsic::riscv_vluxseg7: 1667 case Intrinsic::riscv_vluxseg8: 1668 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1669 return; 1670 case Intrinsic::riscv_vloxseg2_mask: 1671 case Intrinsic::riscv_vloxseg3_mask: 1672 case Intrinsic::riscv_vloxseg4_mask: 1673 case Intrinsic::riscv_vloxseg5_mask: 1674 case Intrinsic::riscv_vloxseg6_mask: 1675 case Intrinsic::riscv_vloxseg7_mask: 1676 case Intrinsic::riscv_vloxseg8_mask: 1677 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1678 return; 1679 case Intrinsic::riscv_vluxseg2_mask: 1680 case Intrinsic::riscv_vluxseg3_mask: 1681 case Intrinsic::riscv_vluxseg4_mask: 1682 case Intrinsic::riscv_vluxseg5_mask: 1683 case Intrinsic::riscv_vluxseg6_mask: 1684 case Intrinsic::riscv_vluxseg7_mask: 1685 case Intrinsic::riscv_vluxseg8_mask: 1686 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1687 return; 1688 case Intrinsic::riscv_vlseg8ff: 1689 case Intrinsic::riscv_vlseg7ff: 1690 case Intrinsic::riscv_vlseg6ff: 1691 case Intrinsic::riscv_vlseg5ff: 1692 case Intrinsic::riscv_vlseg4ff: 1693 case Intrinsic::riscv_vlseg3ff: 1694 case Intrinsic::riscv_vlseg2ff: { 1695 selectVLSEGFF(Node, /*IsMasked*/ false); 1696 return; 1697 } 1698 case Intrinsic::riscv_vlseg8ff_mask: 1699 case Intrinsic::riscv_vlseg7ff_mask: 1700 case Intrinsic::riscv_vlseg6ff_mask: 1701 case Intrinsic::riscv_vlseg5ff_mask: 1702 case Intrinsic::riscv_vlseg4ff_mask: 1703 case Intrinsic::riscv_vlseg3ff_mask: 1704 case Intrinsic::riscv_vlseg2ff_mask: { 1705 selectVLSEGFF(Node, /*IsMasked*/ true); 1706 return; 1707 } 1708 case Intrinsic::riscv_vloxei: 1709 case Intrinsic::riscv_vloxei_mask: 1710 case Intrinsic::riscv_vluxei: 1711 case Intrinsic::riscv_vluxei_mask: { 1712 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask || 1713 IntNo == Intrinsic::riscv_vluxei_mask; 1714 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei || 1715 IntNo == Intrinsic::riscv_vloxei_mask; 1716 1717 MVT VT = Node->getSimpleValueType(0); 1718 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1719 1720 unsigned CurOp = 2; 1721 SmallVector<SDValue, 8> Operands; 1722 Operands.push_back(Node->getOperand(CurOp++)); 1723 1724 MVT IndexVT; 1725 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1726 /*IsStridedOrIndexed*/ true, Operands, 1727 /*IsLoad=*/true, &IndexVT); 1728 1729 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1730 "Element count mismatch"); 1731 1732 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1733 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1734 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1735 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1736 report_fatal_error("The V extension does not support EEW=64 for index " 1737 "values when XLEN=32"); 1738 } 1739 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( 1740 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 1741 static_cast<unsigned>(IndexLMUL)); 1742 MachineSDNode *Load = 1743 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1744 1745 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1746 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1747 1748 ReplaceNode(Node, Load); 1749 return; 1750 } 1751 case Intrinsic::riscv_vlm: 1752 case Intrinsic::riscv_vle: 1753 case Intrinsic::riscv_vle_mask: 1754 case Intrinsic::riscv_vlse: 1755 case Intrinsic::riscv_vlse_mask: { 1756 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || 1757 IntNo == Intrinsic::riscv_vlse_mask; 1758 bool IsStrided = 1759 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; 1760 1761 MVT VT = Node->getSimpleValueType(0); 1762 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1763 1764 // The riscv_vlm intrinsic are always tail agnostic and no passthru 1765 // operand at the IR level. In pseudos, they have both policy and 1766 // passthru operand. The passthru operand is needed to track the 1767 // "tail undefined" state, and the policy is there just for 1768 // for consistency - it will always be "don't care" for the 1769 // unmasked form. 1770 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; 1771 unsigned CurOp = 2; 1772 SmallVector<SDValue, 8> Operands; 1773 if (HasPassthruOperand) 1774 Operands.push_back(Node->getOperand(CurOp++)); 1775 else { 1776 // We eagerly lower to implicit_def (instead of undef), as we 1777 // otherwise fail to select nodes such as: nxv1i1 = undef 1778 SDNode *Passthru = 1779 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); 1780 Operands.push_back(SDValue(Passthru, 0)); 1781 } 1782 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1783 Operands, /*IsLoad=*/true); 1784 1785 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1786 const RISCV::VLEPseudo *P = 1787 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW, 1788 static_cast<unsigned>(LMUL)); 1789 MachineSDNode *Load = 1790 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1791 1792 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1793 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1794 1795 ReplaceNode(Node, Load); 1796 return; 1797 } 1798 case Intrinsic::riscv_vleff: 1799 case Intrinsic::riscv_vleff_mask: { 1800 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask; 1801 1802 MVT VT = Node->getSimpleValueType(0); 1803 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1804 1805 unsigned CurOp = 2; 1806 SmallVector<SDValue, 7> Operands; 1807 Operands.push_back(Node->getOperand(CurOp++)); 1808 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1809 /*IsStridedOrIndexed*/ false, Operands, 1810 /*IsLoad=*/true); 1811 1812 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1813 const RISCV::VLEPseudo *P = 1814 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true, 1815 Log2SEW, static_cast<unsigned>(LMUL)); 1816 MachineSDNode *Load = CurDAG->getMachineNode( 1817 P->Pseudo, DL, Node->getVTList(), Operands); 1818 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1819 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1820 1821 ReplaceNode(Node, Load); 1822 return; 1823 } 1824 } 1825 break; 1826 } 1827 case ISD::INTRINSIC_VOID: { 1828 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1829 switch (IntNo) { 1830 case Intrinsic::riscv_vsseg2: 1831 case Intrinsic::riscv_vsseg3: 1832 case Intrinsic::riscv_vsseg4: 1833 case Intrinsic::riscv_vsseg5: 1834 case Intrinsic::riscv_vsseg6: 1835 case Intrinsic::riscv_vsseg7: 1836 case Intrinsic::riscv_vsseg8: { 1837 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1838 return; 1839 } 1840 case Intrinsic::riscv_vsseg2_mask: 1841 case Intrinsic::riscv_vsseg3_mask: 1842 case Intrinsic::riscv_vsseg4_mask: 1843 case Intrinsic::riscv_vsseg5_mask: 1844 case Intrinsic::riscv_vsseg6_mask: 1845 case Intrinsic::riscv_vsseg7_mask: 1846 case Intrinsic::riscv_vsseg8_mask: { 1847 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1848 return; 1849 } 1850 case Intrinsic::riscv_vssseg2: 1851 case Intrinsic::riscv_vssseg3: 1852 case Intrinsic::riscv_vssseg4: 1853 case Intrinsic::riscv_vssseg5: 1854 case Intrinsic::riscv_vssseg6: 1855 case Intrinsic::riscv_vssseg7: 1856 case Intrinsic::riscv_vssseg8: { 1857 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1858 return; 1859 } 1860 case Intrinsic::riscv_vssseg2_mask: 1861 case Intrinsic::riscv_vssseg3_mask: 1862 case Intrinsic::riscv_vssseg4_mask: 1863 case Intrinsic::riscv_vssseg5_mask: 1864 case Intrinsic::riscv_vssseg6_mask: 1865 case Intrinsic::riscv_vssseg7_mask: 1866 case Intrinsic::riscv_vssseg8_mask: { 1867 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1868 return; 1869 } 1870 case Intrinsic::riscv_vsoxseg2: 1871 case Intrinsic::riscv_vsoxseg3: 1872 case Intrinsic::riscv_vsoxseg4: 1873 case Intrinsic::riscv_vsoxseg5: 1874 case Intrinsic::riscv_vsoxseg6: 1875 case Intrinsic::riscv_vsoxseg7: 1876 case Intrinsic::riscv_vsoxseg8: 1877 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1878 return; 1879 case Intrinsic::riscv_vsuxseg2: 1880 case Intrinsic::riscv_vsuxseg3: 1881 case Intrinsic::riscv_vsuxseg4: 1882 case Intrinsic::riscv_vsuxseg5: 1883 case Intrinsic::riscv_vsuxseg6: 1884 case Intrinsic::riscv_vsuxseg7: 1885 case Intrinsic::riscv_vsuxseg8: 1886 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1887 return; 1888 case Intrinsic::riscv_vsoxseg2_mask: 1889 case Intrinsic::riscv_vsoxseg3_mask: 1890 case Intrinsic::riscv_vsoxseg4_mask: 1891 case Intrinsic::riscv_vsoxseg5_mask: 1892 case Intrinsic::riscv_vsoxseg6_mask: 1893 case Intrinsic::riscv_vsoxseg7_mask: 1894 case Intrinsic::riscv_vsoxseg8_mask: 1895 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1896 return; 1897 case Intrinsic::riscv_vsuxseg2_mask: 1898 case Intrinsic::riscv_vsuxseg3_mask: 1899 case Intrinsic::riscv_vsuxseg4_mask: 1900 case Intrinsic::riscv_vsuxseg5_mask: 1901 case Intrinsic::riscv_vsuxseg6_mask: 1902 case Intrinsic::riscv_vsuxseg7_mask: 1903 case Intrinsic::riscv_vsuxseg8_mask: 1904 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1905 return; 1906 case Intrinsic::riscv_vsoxei: 1907 case Intrinsic::riscv_vsoxei_mask: 1908 case Intrinsic::riscv_vsuxei: 1909 case Intrinsic::riscv_vsuxei_mask: { 1910 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask || 1911 IntNo == Intrinsic::riscv_vsuxei_mask; 1912 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei || 1913 IntNo == Intrinsic::riscv_vsoxei_mask; 1914 1915 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1916 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1917 1918 unsigned CurOp = 2; 1919 SmallVector<SDValue, 8> Operands; 1920 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1921 1922 MVT IndexVT; 1923 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1924 /*IsStridedOrIndexed*/ true, Operands, 1925 /*IsLoad=*/false, &IndexVT); 1926 1927 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1928 "Element count mismatch"); 1929 1930 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1931 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1932 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1933 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1934 report_fatal_error("The V extension does not support EEW=64 for index " 1935 "values when XLEN=32"); 1936 } 1937 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( 1938 IsMasked, IsOrdered, IndexLog2EEW, 1939 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL)); 1940 MachineSDNode *Store = 1941 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1942 1943 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1944 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1945 1946 ReplaceNode(Node, Store); 1947 return; 1948 } 1949 case Intrinsic::riscv_vsm: 1950 case Intrinsic::riscv_vse: 1951 case Intrinsic::riscv_vse_mask: 1952 case Intrinsic::riscv_vsse: 1953 case Intrinsic::riscv_vsse_mask: { 1954 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask || 1955 IntNo == Intrinsic::riscv_vsse_mask; 1956 bool IsStrided = 1957 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask; 1958 1959 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1960 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1961 1962 unsigned CurOp = 2; 1963 SmallVector<SDValue, 8> Operands; 1964 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1965 1966 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1967 Operands); 1968 1969 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1970 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( 1971 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 1972 MachineSDNode *Store = 1973 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1974 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1975 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1976 1977 ReplaceNode(Node, Store); 1978 return; 1979 } 1980 } 1981 break; 1982 } 1983 case ISD::BITCAST: { 1984 MVT SrcVT = Node->getOperand(0).getSimpleValueType(); 1985 // Just drop bitcasts between vectors if both are fixed or both are 1986 // scalable. 1987 if ((VT.isScalableVector() && SrcVT.isScalableVector()) || 1988 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) { 1989 ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); 1990 CurDAG->RemoveDeadNode(Node); 1991 return; 1992 } 1993 break; 1994 } 1995 case ISD::INSERT_SUBVECTOR: { 1996 SDValue V = Node->getOperand(0); 1997 SDValue SubV = Node->getOperand(1); 1998 SDLoc DL(SubV); 1999 auto Idx = Node->getConstantOperandVal(2); 2000 MVT SubVecVT = SubV.getSimpleValueType(); 2001 2002 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 2003 MVT SubVecContainerVT = SubVecVT; 2004 // Establish the correct scalable-vector types for any fixed-length type. 2005 if (SubVecVT.isFixedLengthVector()) 2006 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT); 2007 if (VT.isFixedLengthVector()) 2008 VT = TLI.getContainerForFixedLengthVector(VT); 2009 2010 const auto *TRI = Subtarget->getRegisterInfo(); 2011 unsigned SubRegIdx; 2012 std::tie(SubRegIdx, Idx) = 2013 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2014 VT, SubVecContainerVT, Idx, TRI); 2015 2016 // If the Idx hasn't been completely eliminated then this is a subvector 2017 // insert which doesn't naturally align to a vector register. These must 2018 // be handled using instructions to manipulate the vector registers. 2019 if (Idx != 0) 2020 break; 2021 2022 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT); 2023 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 2024 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 2025 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 2026 (void)IsSubVecPartReg; // Silence unused variable warning without asserts. 2027 assert((!IsSubVecPartReg || V.isUndef()) && 2028 "Expecting lowering to have created legal INSERT_SUBVECTORs when " 2029 "the subvector is smaller than a full-sized register"); 2030 2031 // If we haven't set a SubRegIdx, then we must be going between 2032 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. 2033 if (SubRegIdx == RISCV::NoSubRegister) { 2034 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT); 2035 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 2036 InRegClassID && 2037 "Unexpected subvector extraction"); 2038 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 2039 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 2040 DL, VT, SubV, RC); 2041 ReplaceNode(Node, NewNode); 2042 return; 2043 } 2044 2045 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV); 2046 ReplaceNode(Node, Insert.getNode()); 2047 return; 2048 } 2049 case ISD::EXTRACT_SUBVECTOR: { 2050 SDValue V = Node->getOperand(0); 2051 auto Idx = Node->getConstantOperandVal(1); 2052 MVT InVT = V.getSimpleValueType(); 2053 SDLoc DL(V); 2054 2055 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 2056 MVT SubVecContainerVT = VT; 2057 // Establish the correct scalable-vector types for any fixed-length type. 2058 if (VT.isFixedLengthVector()) 2059 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT); 2060 if (InVT.isFixedLengthVector()) 2061 InVT = TLI.getContainerForFixedLengthVector(InVT); 2062 2063 const auto *TRI = Subtarget->getRegisterInfo(); 2064 unsigned SubRegIdx; 2065 std::tie(SubRegIdx, Idx) = 2066 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2067 InVT, SubVecContainerVT, Idx, TRI); 2068 2069 // If the Idx hasn't been completely eliminated then this is a subvector 2070 // extract which doesn't naturally align to a vector register. These must 2071 // be handled using instructions to manipulate the vector registers. 2072 if (Idx != 0) 2073 break; 2074 2075 // If we haven't set a SubRegIdx, then we must be going between 2076 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. 2077 if (SubRegIdx == RISCV::NoSubRegister) { 2078 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT); 2079 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 2080 InRegClassID && 2081 "Unexpected subvector extraction"); 2082 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 2083 SDNode *NewNode = 2084 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 2085 ReplaceNode(Node, NewNode); 2086 return; 2087 } 2088 2089 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V); 2090 ReplaceNode(Node, Extract.getNode()); 2091 return; 2092 } 2093 case RISCVISD::VMV_S_X_VL: 2094 case RISCVISD::VFMV_S_F_VL: 2095 case RISCVISD::VMV_V_X_VL: 2096 case RISCVISD::VFMV_V_F_VL: { 2097 // Try to match splat of a scalar load to a strided load with stride of x0. 2098 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || 2099 Node->getOpcode() == RISCVISD::VFMV_S_F_VL; 2100 if (!Node->getOperand(0).isUndef()) 2101 break; 2102 SDValue Src = Node->getOperand(1); 2103 auto *Ld = dyn_cast<LoadSDNode>(Src); 2104 // Can't fold load update node because the second 2105 // output is used so that load update node can't be removed. 2106 if (!Ld || Ld->isIndexed()) 2107 break; 2108 EVT MemVT = Ld->getMemoryVT(); 2109 // The memory VT should be the same size as the element type. 2110 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize()) 2111 break; 2112 if (!IsProfitableToFold(Src, Node, Node) || 2113 !IsLegalToFold(Src, Node, Node, TM.getOptLevel())) 2114 break; 2115 2116 SDValue VL; 2117 if (IsScalarMove) { 2118 // We could deal with more VL if we update the VSETVLI insert pass to 2119 // avoid introducing more VSETVLI. 2120 if (!isOneConstant(Node->getOperand(2))) 2121 break; 2122 selectVLOp(Node->getOperand(2), VL); 2123 } else 2124 selectVLOp(Node->getOperand(2), VL); 2125 2126 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 2127 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 2128 2129 // If VL=1, then we don't need to do a strided load and can just do a 2130 // regular load. 2131 bool IsStrided = !isOneConstant(VL); 2132 2133 // Only do a strided load if we have optimized zero-stride vector load. 2134 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad()) 2135 break; 2136 2137 SmallVector<SDValue> Operands = { 2138 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0), 2139 Ld->getBasePtr()}; 2140 if (IsStrided) 2141 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT)); 2142 uint64_t Policy = RISCVII::MASK_AGNOSTIC | RISCVII::TAIL_AGNOSTIC; 2143 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 2144 Operands.append({VL, SEW, PolicyOp, Ld->getChain()}); 2145 2146 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 2147 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( 2148 /*IsMasked*/ false, IsStrided, /*FF*/ false, 2149 Log2SEW, static_cast<unsigned>(LMUL)); 2150 MachineSDNode *Load = 2151 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands); 2152 // Update the chain. 2153 ReplaceUses(Src.getValue(1), SDValue(Load, 1)); 2154 // Record the mem-refs 2155 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()}); 2156 // Replace the splat with the vlse. 2157 ReplaceNode(Node, Load); 2158 return; 2159 } 2160 case ISD::PREFETCH: 2161 unsigned Locality = Node->getConstantOperandVal(3); 2162 if (Locality > 2) 2163 break; 2164 2165 if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) { 2166 MachineMemOperand *MMO = LoadStoreMem->getMemOperand(); 2167 MMO->setFlags(MachineMemOperand::MONonTemporal); 2168 2169 int NontemporalLevel = 0; 2170 switch (Locality) { 2171 case 0: 2172 NontemporalLevel = 3; // NTL.ALL 2173 break; 2174 case 1: 2175 NontemporalLevel = 1; // NTL.PALL 2176 break; 2177 case 2: 2178 NontemporalLevel = 0; // NTL.P1 2179 break; 2180 default: 2181 llvm_unreachable("unexpected locality value."); 2182 } 2183 2184 if (NontemporalLevel & 0b1) 2185 MMO->setFlags(MONontemporalBit0); 2186 if (NontemporalLevel & 0b10) 2187 MMO->setFlags(MONontemporalBit1); 2188 } 2189 break; 2190 } 2191 2192 // Select the default instruction. 2193 SelectCode(Node); 2194 } 2195 2196 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( 2197 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, 2198 std::vector<SDValue> &OutOps) { 2199 // Always produce a register and immediate operand, as expected by 2200 // RISCVAsmPrinter::PrintAsmMemoryOperand. 2201 switch (ConstraintID) { 2202 case InlineAsm::ConstraintCode::o: 2203 case InlineAsm::ConstraintCode::m: { 2204 SDValue Op0, Op1; 2205 bool Found = SelectAddrRegImm(Op, Op0, Op1); 2206 assert(Found && "SelectAddrRegImm should always succeed"); 2207 (void)Found; 2208 OutOps.push_back(Op0); 2209 OutOps.push_back(Op1); 2210 return false; 2211 } 2212 case InlineAsm::ConstraintCode::A: 2213 OutOps.push_back(Op); 2214 OutOps.push_back( 2215 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT())); 2216 return false; 2217 default: 2218 report_fatal_error("Unexpected asm memory constraint " + 2219 InlineAsm::getMemConstraintName(ConstraintID)); 2220 } 2221 2222 return true; 2223 } 2224 2225 bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base, 2226 SDValue &Offset) { 2227 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 2228 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 2229 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT()); 2230 return true; 2231 } 2232 2233 return false; 2234 } 2235 2236 // Select a frame index and an optional immediate offset from an ADD or OR. 2237 bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, 2238 SDValue &Offset) { 2239 if (SelectAddrFrameIndex(Addr, Base, Offset)) 2240 return true; 2241 2242 if (!CurDAG->isBaseWithConstantOffset(Addr)) 2243 return false; 2244 2245 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) { 2246 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2247 if (isInt<12>(CVal)) { 2248 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), 2249 Subtarget->getXLenVT()); 2250 Offset = CurDAG->getTargetConstant(CVal, SDLoc(Addr), 2251 Subtarget->getXLenVT()); 2252 return true; 2253 } 2254 } 2255 2256 return false; 2257 } 2258 2259 // Fold constant addresses. 2260 static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, 2261 const MVT VT, const RISCVSubtarget *Subtarget, 2262 SDValue Addr, SDValue &Base, SDValue &Offset, 2263 bool IsPrefetch = false) { 2264 if (!isa<ConstantSDNode>(Addr)) 2265 return false; 2266 2267 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue(); 2268 2269 // If the constant is a simm12, we can fold the whole constant and use X0 as 2270 // the base. If the constant can be materialized with LUI+simm12, use LUI as 2271 // the base. We can't use generateInstSeq because it favors LUI+ADDIW. 2272 int64_t Lo12 = SignExtend64<12>(CVal); 2273 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12; 2274 if (!Subtarget->is64Bit() || isInt<32>(Hi)) { 2275 if (IsPrefetch && (Lo12 & 0b11111) != 0) 2276 return false; 2277 2278 if (Hi) { 2279 int64_t Hi20 = (Hi >> 12) & 0xfffff; 2280 Base = SDValue( 2281 CurDAG->getMachineNode(RISCV::LUI, DL, VT, 2282 CurDAG->getTargetConstant(Hi20, DL, VT)), 2283 0); 2284 } else { 2285 Base = CurDAG->getRegister(RISCV::X0, VT); 2286 } 2287 Offset = CurDAG->getTargetConstant(Lo12, DL, VT); 2288 return true; 2289 } 2290 2291 // Ask how constant materialization would handle this constant. 2292 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget); 2293 2294 // If the last instruction would be an ADDI, we can fold its immediate and 2295 // emit the rest of the sequence as the base. 2296 if (Seq.back().getOpcode() != RISCV::ADDI) 2297 return false; 2298 Lo12 = Seq.back().getImm(); 2299 if (IsPrefetch && (Lo12 & 0b11111) != 0) 2300 return false; 2301 2302 // Drop the last instruction. 2303 Seq.pop_back(); 2304 assert(!Seq.empty() && "Expected more instructions in sequence"); 2305 2306 Base = selectImmSeq(CurDAG, DL, VT, Seq); 2307 Offset = CurDAG->getTargetConstant(Lo12, DL, VT); 2308 return true; 2309 } 2310 2311 // Is this ADD instruction only used as the base pointer of scalar loads and 2312 // stores? 2313 static bool isWorthFoldingAdd(SDValue Add) { 2314 for (auto *Use : Add->uses()) { 2315 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && 2316 Use->getOpcode() != ISD::ATOMIC_LOAD && 2317 Use->getOpcode() != ISD::ATOMIC_STORE) 2318 return false; 2319 EVT VT = cast<MemSDNode>(Use)->getMemoryVT(); 2320 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 && 2321 VT != MVT::f64) 2322 return false; 2323 // Don't allow stores of the value. It must be used as the address. 2324 if (Use->getOpcode() == ISD::STORE && 2325 cast<StoreSDNode>(Use)->getValue() == Add) 2326 return false; 2327 if (Use->getOpcode() == ISD::ATOMIC_STORE && 2328 cast<AtomicSDNode>(Use)->getVal() == Add) 2329 return false; 2330 } 2331 2332 return true; 2333 } 2334 2335 bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr, 2336 unsigned MaxShiftAmount, 2337 SDValue &Base, SDValue &Index, 2338 SDValue &Scale) { 2339 EVT VT = Addr.getSimpleValueType(); 2340 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index, 2341 SDValue &Shift) { 2342 uint64_t ShiftAmt = 0; 2343 Index = N; 2344 2345 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) { 2346 // Only match shifts by a value in range [0, MaxShiftAmount]. 2347 if (N.getConstantOperandVal(1) <= MaxShiftAmount) { 2348 Index = N.getOperand(0); 2349 ShiftAmt = N.getConstantOperandVal(1); 2350 } 2351 } 2352 2353 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT); 2354 return ShiftAmt != 0; 2355 }; 2356 2357 if (Addr.getOpcode() == ISD::ADD) { 2358 if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { 2359 SDValue AddrB = Addr.getOperand(0); 2360 if (AddrB.getOpcode() == ISD::ADD && 2361 UnwrapShl(AddrB.getOperand(0), Index, Scale) && 2362 !isa<ConstantSDNode>(AddrB.getOperand(1)) && 2363 isInt<12>(C1->getSExtValue())) { 2364 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) 2365 SDValue C1Val = 2366 CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT); 2367 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT, 2368 AddrB.getOperand(1), C1Val), 2369 0); 2370 return true; 2371 } 2372 } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) { 2373 Base = Addr.getOperand(1); 2374 return true; 2375 } else { 2376 UnwrapShl(Addr.getOperand(1), Index, Scale); 2377 Base = Addr.getOperand(0); 2378 return true; 2379 } 2380 } else if (UnwrapShl(Addr, Index, Scale)) { 2381 EVT VT = Addr.getValueType(); 2382 Base = CurDAG->getRegister(RISCV::X0, VT); 2383 return true; 2384 } 2385 2386 return false; 2387 } 2388 2389 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, 2390 SDValue &Offset, bool IsINX) { 2391 if (SelectAddrFrameIndex(Addr, Base, Offset)) 2392 return true; 2393 2394 SDLoc DL(Addr); 2395 MVT VT = Addr.getSimpleValueType(); 2396 2397 if (Addr.getOpcode() == RISCVISD::ADD_LO) { 2398 Base = Addr.getOperand(0); 2399 Offset = Addr.getOperand(1); 2400 return true; 2401 } 2402 2403 int64_t RV32ZdinxRange = IsINX ? 4 : 0; 2404 if (CurDAG->isBaseWithConstantOffset(Addr)) { 2405 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2406 if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) { 2407 Base = Addr.getOperand(0); 2408 if (Base.getOpcode() == RISCVISD::ADD_LO) { 2409 SDValue LoOperand = Base.getOperand(1); 2410 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) { 2411 // If the Lo in (ADD_LO hi, lo) is a global variable's address 2412 // (its low part, really), then we can rely on the alignment of that 2413 // variable to provide a margin of safety before low part can overflow 2414 // the 12 bits of the load/store offset. Check if CVal falls within 2415 // that margin; if so (low part + CVal) can't overflow. 2416 const DataLayout &DL = CurDAG->getDataLayout(); 2417 Align Alignment = commonAlignment( 2418 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset()); 2419 if (CVal == 0 || Alignment > CVal) { 2420 int64_t CombinedOffset = CVal + GA->getOffset(); 2421 Base = Base.getOperand(0); 2422 Offset = CurDAG->getTargetGlobalAddress( 2423 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(), 2424 CombinedOffset, GA->getTargetFlags()); 2425 return true; 2426 } 2427 } 2428 } 2429 2430 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base)) 2431 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); 2432 Offset = CurDAG->getTargetConstant(CVal, DL, VT); 2433 return true; 2434 } 2435 } 2436 2437 // Handle ADD with large immediates. 2438 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) { 2439 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2440 assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) && 2441 "simm12 not already handled?"); 2442 2443 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use 2444 // an ADDI for part of the offset and fold the rest into the load/store. 2445 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td. 2446 if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) { 2447 int64_t Adj = CVal < 0 ? -2048 : 2047; 2448 Base = SDValue( 2449 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0), 2450 CurDAG->getTargetConstant(Adj, DL, VT)), 2451 0); 2452 Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT); 2453 return true; 2454 } 2455 2456 // For larger immediates, we might be able to save one instruction from 2457 // constant materialization by folding the Lo12 bits of the immediate into 2458 // the address. We should only do this if the ADD is only used by loads and 2459 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled 2460 // separately with the full materialized immediate creating extra 2461 // instructions. 2462 if (isWorthFoldingAdd(Addr) && 2463 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base, 2464 Offset)) { 2465 // Insert an ADD instruction with the materialized Hi52 bits. 2466 Base = SDValue( 2467 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base), 2468 0); 2469 return true; 2470 } 2471 } 2472 2473 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset)) 2474 return true; 2475 2476 Base = Addr; 2477 Offset = CurDAG->getTargetConstant(0, DL, VT); 2478 return true; 2479 } 2480 2481 /// Similar to SelectAddrRegImm, except that the least significant 5 bits of 2482 /// Offset shoule be all zeros. 2483 bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, 2484 SDValue &Offset) { 2485 if (SelectAddrFrameIndex(Addr, Base, Offset)) 2486 return true; 2487 2488 SDLoc DL(Addr); 2489 MVT VT = Addr.getSimpleValueType(); 2490 2491 if (CurDAG->isBaseWithConstantOffset(Addr)) { 2492 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2493 if (isInt<12>(CVal)) { 2494 Base = Addr.getOperand(0); 2495 2496 // Early-out if not a valid offset. 2497 if ((CVal & 0b11111) != 0) { 2498 Base = Addr; 2499 Offset = CurDAG->getTargetConstant(0, DL, VT); 2500 return true; 2501 } 2502 2503 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base)) 2504 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); 2505 Offset = CurDAG->getTargetConstant(CVal, DL, VT); 2506 return true; 2507 } 2508 } 2509 2510 // Handle ADD with large immediates. 2511 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) { 2512 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2513 assert(!(isInt<12>(CVal) && isInt<12>(CVal)) && 2514 "simm12 not already handled?"); 2515 2516 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save 2517 // one instruction by folding adjustment (-2048 or 2016) into the address. 2518 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) { 2519 int64_t Adj = CVal < 0 ? -2048 : 2016; 2520 int64_t AdjustedOffset = CVal - Adj; 2521 Base = SDValue(CurDAG->getMachineNode( 2522 RISCV::ADDI, DL, VT, Addr.getOperand(0), 2523 CurDAG->getTargetConstant(AdjustedOffset, DL, VT)), 2524 0); 2525 Offset = CurDAG->getTargetConstant(Adj, DL, VT); 2526 return true; 2527 } 2528 2529 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base, 2530 Offset, true)) { 2531 // Insert an ADD instruction with the materialized Hi52 bits. 2532 Base = SDValue( 2533 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base), 2534 0); 2535 return true; 2536 } 2537 } 2538 2539 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true)) 2540 return true; 2541 2542 Base = Addr; 2543 Offset = CurDAG->getTargetConstant(0, DL, VT); 2544 return true; 2545 } 2546 2547 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, 2548 SDValue &ShAmt) { 2549 ShAmt = N; 2550 2551 // Peek through zext. 2552 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND) 2553 ShAmt = ShAmt.getOperand(0); 2554 2555 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift 2556 // amount. If there is an AND on the shift amount, we can bypass it if it 2557 // doesn't affect any of those bits. 2558 if (ShAmt.getOpcode() == ISD::AND && 2559 isa<ConstantSDNode>(ShAmt.getOperand(1))) { 2560 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1); 2561 2562 // Since the max shift amount is a power of 2 we can subtract 1 to make a 2563 // mask that covers the bits needed to represent all shift amounts. 2564 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); 2565 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); 2566 2567 if (ShMask.isSubsetOf(AndMask)) { 2568 ShAmt = ShAmt.getOperand(0); 2569 } else { 2570 // SimplifyDemandedBits may have optimized the mask so try restoring any 2571 // bits that are known zero. 2572 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0)); 2573 if (!ShMask.isSubsetOf(AndMask | Known.Zero)) 2574 return true; 2575 ShAmt = ShAmt.getOperand(0); 2576 } 2577 } 2578 2579 if (ShAmt.getOpcode() == ISD::ADD && 2580 isa<ConstantSDNode>(ShAmt.getOperand(1))) { 2581 uint64_t Imm = ShAmt.getConstantOperandVal(1); 2582 // If we are shifting by X+N where N == 0 mod Size, then just shift by X 2583 // to avoid the ADD. 2584 if (Imm != 0 && Imm % ShiftWidth == 0) { 2585 ShAmt = ShAmt.getOperand(0); 2586 return true; 2587 } 2588 } else if (ShAmt.getOpcode() == ISD::SUB && 2589 isa<ConstantSDNode>(ShAmt.getOperand(0))) { 2590 uint64_t Imm = ShAmt.getConstantOperandVal(0); 2591 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 2592 // generate a NEG instead of a SUB of a constant. 2593 if (Imm != 0 && Imm % ShiftWidth == 0) { 2594 SDLoc DL(ShAmt); 2595 EVT VT = ShAmt.getValueType(); 2596 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT); 2597 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB; 2598 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero, 2599 ShAmt.getOperand(1)); 2600 ShAmt = SDValue(Neg, 0); 2601 return true; 2602 } 2603 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X 2604 // to generate a NOT instead of a SUB of a constant. 2605 if (Imm % ShiftWidth == ShiftWidth - 1) { 2606 SDLoc DL(ShAmt); 2607 EVT VT = ShAmt.getValueType(); 2608 MachineSDNode *Not = 2609 CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1), 2610 CurDAG->getTargetConstant(-1, DL, VT)); 2611 ShAmt = SDValue(Not, 0); 2612 return true; 2613 } 2614 } 2615 2616 return true; 2617 } 2618 2619 /// RISC-V doesn't have general instructions for integer setne/seteq, but we can 2620 /// check for equality with 0. This function emits instructions that convert the 2621 /// seteq/setne into something that can be compared with 0. 2622 /// \p ExpectedCCVal indicates the condition code to attempt to match (e.g. 2623 /// ISD::SETNE). 2624 bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, 2625 SDValue &Val) { 2626 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) && 2627 "Unexpected condition code!"); 2628 2629 // We're looking for a setcc. 2630 if (N->getOpcode() != ISD::SETCC) 2631 return false; 2632 2633 // Must be an equality comparison. 2634 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2635 if (CCVal != ExpectedCCVal) 2636 return false; 2637 2638 SDValue LHS = N->getOperand(0); 2639 SDValue RHS = N->getOperand(1); 2640 2641 if (!LHS.getValueType().isScalarInteger()) 2642 return false; 2643 2644 // If the RHS side is 0, we don't need any extra instructions, return the LHS. 2645 if (isNullConstant(RHS)) { 2646 Val = LHS; 2647 return true; 2648 } 2649 2650 SDLoc DL(N); 2651 2652 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) { 2653 int64_t CVal = C->getSExtValue(); 2654 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and 2655 // non-zero otherwise. 2656 if (CVal == -2048) { 2657 Val = 2658 SDValue(CurDAG->getMachineNode( 2659 RISCV::XORI, DL, N->getValueType(0), LHS, 2660 CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))), 2661 0); 2662 return true; 2663 } 2664 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the 2665 // LHS is equal to the RHS and non-zero otherwise. 2666 if (isInt<12>(CVal) || CVal == 2048) { 2667 Val = 2668 SDValue(CurDAG->getMachineNode( 2669 RISCV::ADDI, DL, N->getValueType(0), LHS, 2670 CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))), 2671 0); 2672 return true; 2673 } 2674 } 2675 2676 // If nothing else we can XOR the LHS and RHS to produce zero if they are 2677 // equal and a non-zero value if they aren't. 2678 Val = SDValue( 2679 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0); 2680 return true; 2681 } 2682 2683 bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) { 2684 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && 2685 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) { 2686 Val = N.getOperand(0); 2687 return true; 2688 } 2689 2690 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) { 2691 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1))) 2692 return N; 2693 2694 SDValue N0 = N.getOperand(0); 2695 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && 2696 N.getConstantOperandVal(1) == ShiftAmt && 2697 N0.getConstantOperandVal(1) == ShiftAmt) 2698 return N0.getOperand(0); 2699 2700 return N; 2701 }; 2702 2703 MVT VT = N.getSimpleValueType(); 2704 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) { 2705 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits); 2706 return true; 2707 } 2708 2709 return false; 2710 } 2711 2712 bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) { 2713 if (N.getOpcode() == ISD::AND) { 2714 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2715 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) { 2716 Val = N.getOperand(0); 2717 return true; 2718 } 2719 } 2720 MVT VT = N.getSimpleValueType(); 2721 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits); 2722 if (CurDAG->MaskedValueIsZero(N, Mask)) { 2723 Val = N; 2724 return true; 2725 } 2726 2727 return false; 2728 } 2729 2730 /// Look for various patterns that can be done with a SHL that can be folded 2731 /// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which 2732 /// SHXADD we are trying to match. 2733 bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt, 2734 SDValue &Val) { 2735 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { 2736 SDValue N0 = N.getOperand(0); 2737 2738 bool LeftShift = N0.getOpcode() == ISD::SHL; 2739 if ((LeftShift || N0.getOpcode() == ISD::SRL) && 2740 isa<ConstantSDNode>(N0.getOperand(1))) { 2741 uint64_t Mask = N.getConstantOperandVal(1); 2742 unsigned C2 = N0.getConstantOperandVal(1); 2743 2744 unsigned XLen = Subtarget->getXLen(); 2745 if (LeftShift) 2746 Mask &= maskTrailingZeros<uint64_t>(C2); 2747 else 2748 Mask &= maskTrailingOnes<uint64_t>(XLen - C2); 2749 2750 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no 2751 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3 2752 // followed by a SHXADD with c3 for the X amount. 2753 if (isShiftedMask_64(Mask)) { 2754 unsigned Leading = XLen - llvm::bit_width(Mask); 2755 unsigned Trailing = llvm::countr_zero(Mask); 2756 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) { 2757 SDLoc DL(N); 2758 EVT VT = N.getValueType(); 2759 Val = SDValue(CurDAG->getMachineNode( 2760 RISCV::SRLI, DL, VT, N0.getOperand(0), 2761 CurDAG->getTargetConstant(Trailing - C2, DL, VT)), 2762 0); 2763 return true; 2764 } 2765 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2 2766 // leading zeros and c3 trailing zeros. We can use an SRLI by C3 2767 // followed by a SHXADD using c3 for the X amount. 2768 if (!LeftShift && Leading == C2 && Trailing == ShAmt) { 2769 SDLoc DL(N); 2770 EVT VT = N.getValueType(); 2771 Val = SDValue( 2772 CurDAG->getMachineNode( 2773 RISCV::SRLI, DL, VT, N0.getOperand(0), 2774 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)), 2775 0); 2776 return true; 2777 } 2778 } 2779 } 2780 } 2781 2782 bool LeftShift = N.getOpcode() == ISD::SHL; 2783 if ((LeftShift || N.getOpcode() == ISD::SRL) && 2784 isa<ConstantSDNode>(N.getOperand(1))) { 2785 SDValue N0 = N.getOperand(0); 2786 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && 2787 isa<ConstantSDNode>(N0.getOperand(1))) { 2788 uint64_t Mask = N0.getConstantOperandVal(1); 2789 if (isShiftedMask_64(Mask)) { 2790 unsigned C1 = N.getConstantOperandVal(1); 2791 unsigned XLen = Subtarget->getXLen(); 2792 unsigned Leading = XLen - llvm::bit_width(Mask); 2793 unsigned Trailing = llvm::countr_zero(Mask); 2794 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and 2795 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD. 2796 if (LeftShift && Leading == 32 && Trailing > 0 && 2797 (Trailing + C1) == ShAmt) { 2798 SDLoc DL(N); 2799 EVT VT = N.getValueType(); 2800 Val = SDValue(CurDAG->getMachineNode( 2801 RISCV::SRLIW, DL, VT, N0.getOperand(0), 2802 CurDAG->getTargetConstant(Trailing, DL, VT)), 2803 0); 2804 return true; 2805 } 2806 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and 2807 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD. 2808 if (!LeftShift && Leading == 32 && Trailing > C1 && 2809 (Trailing - C1) == ShAmt) { 2810 SDLoc DL(N); 2811 EVT VT = N.getValueType(); 2812 Val = SDValue(CurDAG->getMachineNode( 2813 RISCV::SRLIW, DL, VT, N0.getOperand(0), 2814 CurDAG->getTargetConstant(Trailing, DL, VT)), 2815 0); 2816 return true; 2817 } 2818 } 2819 } 2820 } 2821 2822 return false; 2823 } 2824 2825 /// Look for various patterns that can be done with a SHL that can be folded 2826 /// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which 2827 /// SHXADD_UW we are trying to match. 2828 bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt, 2829 SDValue &Val) { 2830 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) && 2831 N.hasOneUse()) { 2832 SDValue N0 = N.getOperand(0); 2833 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && 2834 N0.hasOneUse()) { 2835 uint64_t Mask = N.getConstantOperandVal(1); 2836 unsigned C2 = N0.getConstantOperandVal(1); 2837 2838 Mask &= maskTrailingZeros<uint64_t>(C2); 2839 2840 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 2841 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by 2842 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount. 2843 if (isShiftedMask_64(Mask)) { 2844 unsigned Leading = llvm::countl_zero(Mask); 2845 unsigned Trailing = llvm::countr_zero(Mask); 2846 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) { 2847 SDLoc DL(N); 2848 EVT VT = N.getValueType(); 2849 Val = SDValue(CurDAG->getMachineNode( 2850 RISCV::SLLI, DL, VT, N0.getOperand(0), 2851 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)), 2852 0); 2853 return true; 2854 } 2855 } 2856 } 2857 } 2858 2859 return false; 2860 } 2861 2862 static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, 2863 unsigned Bits, 2864 const TargetInstrInfo *TII) { 2865 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode()); 2866 2867 if (!MCOpcode) 2868 return false; 2869 2870 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode()); 2871 const uint64_t TSFlags = MCID.TSFlags; 2872 if (!RISCVII::hasSEWOp(TSFlags)) 2873 return false; 2874 assert(RISCVII::hasVLOp(TSFlags)); 2875 2876 bool HasGlueOp = User->getGluedNode() != nullptr; 2877 unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1; 2878 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other; 2879 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags); 2880 unsigned VLIdx = 2881 User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2; 2882 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1); 2883 2884 if (UserOpNo == VLIdx) 2885 return false; 2886 2887 auto NumDemandedBits = 2888 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW); 2889 return NumDemandedBits && Bits >= *NumDemandedBits; 2890 } 2891 2892 // Return true if all users of this SDNode* only consume the lower \p Bits. 2893 // This can be used to form W instructions for add/sub/mul/shl even when the 2894 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if 2895 // SimplifyDemandedBits has made it so some users see a sext_inreg and some 2896 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave 2897 // the add/sub/mul/shl to become non-W instructions. By checking the users we 2898 // may be able to use a W instruction and CSE with the other instruction if 2899 // this has happened. We could try to detect that the CSE opportunity exists 2900 // before doing this, but that would be more complicated. 2901 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits, 2902 const unsigned Depth) const { 2903 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB || 2904 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || 2905 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND || 2906 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR || 2907 Node->getOpcode() == ISD::SIGN_EXTEND_INREG || 2908 isa<ConstantSDNode>(Node) || Depth != 0) && 2909 "Unexpected opcode"); 2910 2911 if (Depth >= SelectionDAG::MaxRecursionDepth) 2912 return false; 2913 2914 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked 2915 // the VT. Ensure the type is scalar to avoid wasting time on vectors. 2916 if (Depth == 0 && !Node->getValueType(0).isScalarInteger()) 2917 return false; 2918 2919 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { 2920 SDNode *User = *UI; 2921 // Users of this node should have already been instruction selected 2922 if (!User->isMachineOpcode()) 2923 return false; 2924 2925 // TODO: Add more opcodes? 2926 switch (User->getMachineOpcode()) { 2927 default: 2928 if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII)) 2929 break; 2930 return false; 2931 case RISCV::ADDW: 2932 case RISCV::ADDIW: 2933 case RISCV::SUBW: 2934 case RISCV::MULW: 2935 case RISCV::SLLW: 2936 case RISCV::SLLIW: 2937 case RISCV::SRAW: 2938 case RISCV::SRAIW: 2939 case RISCV::SRLW: 2940 case RISCV::SRLIW: 2941 case RISCV::DIVW: 2942 case RISCV::DIVUW: 2943 case RISCV::REMW: 2944 case RISCV::REMUW: 2945 case RISCV::ROLW: 2946 case RISCV::RORW: 2947 case RISCV::RORIW: 2948 case RISCV::CLZW: 2949 case RISCV::CTZW: 2950 case RISCV::CPOPW: 2951 case RISCV::SLLI_UW: 2952 case RISCV::FMV_W_X: 2953 case RISCV::FCVT_H_W: 2954 case RISCV::FCVT_H_WU: 2955 case RISCV::FCVT_S_W: 2956 case RISCV::FCVT_S_WU: 2957 case RISCV::FCVT_D_W: 2958 case RISCV::FCVT_D_WU: 2959 case RISCV::TH_REVW: 2960 case RISCV::TH_SRRIW: 2961 if (Bits < 32) 2962 return false; 2963 break; 2964 case RISCV::SLL: 2965 case RISCV::SRA: 2966 case RISCV::SRL: 2967 case RISCV::ROL: 2968 case RISCV::ROR: 2969 case RISCV::BSET: 2970 case RISCV::BCLR: 2971 case RISCV::BINV: 2972 // Shift amount operands only use log2(Xlen) bits. 2973 if (UI.getOperandNo() != 1 || Bits < Log2_32(Subtarget->getXLen())) 2974 return false; 2975 break; 2976 case RISCV::SLLI: 2977 // SLLI only uses the lower (XLen - ShAmt) bits. 2978 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1)) 2979 return false; 2980 break; 2981 case RISCV::ANDI: 2982 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1))) 2983 break; 2984 goto RecCheck; 2985 case RISCV::ORI: { 2986 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue(); 2987 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm)) 2988 break; 2989 [[fallthrough]]; 2990 } 2991 case RISCV::AND: 2992 case RISCV::OR: 2993 case RISCV::XOR: 2994 case RISCV::XORI: 2995 case RISCV::ANDN: 2996 case RISCV::ORN: 2997 case RISCV::XNOR: 2998 case RISCV::SH1ADD: 2999 case RISCV::SH2ADD: 3000 case RISCV::SH3ADD: 3001 RecCheck: 3002 if (hasAllNBitUsers(User, Bits, Depth + 1)) 3003 break; 3004 return false; 3005 case RISCV::SRLI: { 3006 unsigned ShAmt = User->getConstantOperandVal(1); 3007 // If we are shifting right by less than Bits, and users don't demand any 3008 // bits that were shifted into [Bits-1:0], then we can consider this as an 3009 // N-Bit user. 3010 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1)) 3011 break; 3012 return false; 3013 } 3014 case RISCV::SEXT_B: 3015 case RISCV::PACKH: 3016 if (Bits < 8) 3017 return false; 3018 break; 3019 case RISCV::SEXT_H: 3020 case RISCV::FMV_H_X: 3021 case RISCV::ZEXT_H_RV32: 3022 case RISCV::ZEXT_H_RV64: 3023 case RISCV::PACKW: 3024 if (Bits < 16) 3025 return false; 3026 break; 3027 case RISCV::PACK: 3028 if (Bits < (Subtarget->getXLen() / 2)) 3029 return false; 3030 break; 3031 case RISCV::ADD_UW: 3032 case RISCV::SH1ADD_UW: 3033 case RISCV::SH2ADD_UW: 3034 case RISCV::SH3ADD_UW: 3035 // The first operand to add.uw/shXadd.uw is implicitly zero extended from 3036 // 32 bits. 3037 if (UI.getOperandNo() != 0 || Bits < 32) 3038 return false; 3039 break; 3040 case RISCV::SB: 3041 if (UI.getOperandNo() != 0 || Bits < 8) 3042 return false; 3043 break; 3044 case RISCV::SH: 3045 if (UI.getOperandNo() != 0 || Bits < 16) 3046 return false; 3047 break; 3048 case RISCV::SW: 3049 if (UI.getOperandNo() != 0 || Bits < 32) 3050 return false; 3051 break; 3052 } 3053 } 3054 3055 return true; 3056 } 3057 3058 // Select a constant that can be represented as (sign_extend(imm5) << imm2). 3059 bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5, 3060 SDValue &Shl2) { 3061 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 3062 int64_t Offset = C->getSExtValue(); 3063 int64_t Shift; 3064 for (Shift = 0; Shift < 4; Shift++) 3065 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) 3066 break; 3067 3068 // Constant cannot be encoded. 3069 if (Shift == 4) 3070 return false; 3071 3072 EVT Ty = N->getValueType(0); 3073 Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty); 3074 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty); 3075 return true; 3076 } 3077 3078 return false; 3079 } 3080 3081 // Select VL as a 5 bit immediate or a value that will become a register. This 3082 // allows us to choose betwen VSETIVLI or VSETVLI later. 3083 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { 3084 auto *C = dyn_cast<ConstantSDNode>(N); 3085 if (C && isUInt<5>(C->getZExtValue())) { 3086 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), 3087 N->getValueType(0)); 3088 } else if (C && C->isAllOnes()) { 3089 // Treat all ones as VLMax. 3090 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 3091 N->getValueType(0)); 3092 } else if (isa<RegisterSDNode>(N) && 3093 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) { 3094 // All our VL operands use an operand that allows GPRNoX0 or an immediate 3095 // as the register class. Convert X0 to a special immediate to pass the 3096 // MachineVerifier. This is recognized specially by the vsetvli insertion 3097 // pass. 3098 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 3099 N->getValueType(0)); 3100 } else { 3101 VL = N; 3102 } 3103 3104 return true; 3105 } 3106 3107 static SDValue findVSplat(SDValue N) { 3108 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) { 3109 if (!N.getOperand(0).isUndef()) 3110 return SDValue(); 3111 N = N.getOperand(1); 3112 } 3113 SDValue Splat = N; 3114 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL && 3115 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) || 3116 !Splat.getOperand(0).isUndef()) 3117 return SDValue(); 3118 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands"); 3119 return Splat; 3120 } 3121 3122 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { 3123 SDValue Splat = findVSplat(N); 3124 if (!Splat) 3125 return false; 3126 3127 SplatVal = Splat.getOperand(1); 3128 return true; 3129 } 3130 3131 static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, 3132 SelectionDAG &DAG, 3133 const RISCVSubtarget &Subtarget, 3134 std::function<bool(int64_t)> ValidateImm) { 3135 SDValue Splat = findVSplat(N); 3136 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1))) 3137 return false; 3138 3139 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits(); 3140 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() && 3141 "Unexpected splat operand type"); 3142 3143 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand 3144 // type is wider than the resulting vector element type: an implicit 3145 // truncation first takes place. Therefore, perform a manual 3146 // truncation/sign-extension in order to ignore any truncated bits and catch 3147 // any zero-extended immediate. 3148 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first 3149 // sign-extending to (XLenVT -1). 3150 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize); 3151 3152 int64_t SplatImm = SplatConst.getSExtValue(); 3153 3154 if (!ValidateImm(SplatImm)) 3155 return false; 3156 3157 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT()); 3158 return true; 3159 } 3160 3161 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { 3162 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget, 3163 [](int64_t Imm) { return isInt<5>(Imm); }); 3164 } 3165 3166 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { 3167 return selectVSplatImmHelper( 3168 N, SplatVal, *CurDAG, *Subtarget, 3169 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }); 3170 } 3171 3172 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, 3173 SDValue &SplatVal) { 3174 return selectVSplatImmHelper( 3175 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) { 3176 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16); 3177 }); 3178 } 3179 3180 bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits, 3181 SDValue &SplatVal) { 3182 return selectVSplatImmHelper( 3183 N, SplatVal, *CurDAG, *Subtarget, 3184 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); }); 3185 } 3186 3187 bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) { 3188 // Truncates are custom lowered during legalization. 3189 auto IsTrunc = [this](SDValue N) { 3190 if (N->getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL) 3191 return false; 3192 SDValue VL; 3193 selectVLOp(N->getOperand(2), VL); 3194 // Any vmset_vl is ok, since any bits past VL are undefined and we can 3195 // assume they are set. 3196 return N->getOperand(1).getOpcode() == RISCVISD::VMSET_VL && 3197 isa<ConstantSDNode>(VL) && 3198 cast<ConstantSDNode>(VL)->getSExtValue() == RISCV::VLMaxSentinel; 3199 }; 3200 3201 // We can have multiple nested truncates, so unravel them all if needed. 3202 while (N->getOpcode() == ISD::SIGN_EXTEND || 3203 N->getOpcode() == ISD::ZERO_EXTEND || IsTrunc(N)) { 3204 if (!N.hasOneUse() || 3205 N.getValueType().getSizeInBits().getKnownMinValue() < 8) 3206 return false; 3207 N = N->getOperand(0); 3208 } 3209 3210 return selectVSplat(N, SplatVal); 3211 } 3212 3213 bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) { 3214 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode()); 3215 if (!CFP) 3216 return false; 3217 const APFloat &APF = CFP->getValueAPF(); 3218 // td can handle +0.0 already. 3219 if (APF.isPosZero()) 3220 return false; 3221 3222 MVT VT = CFP->getSimpleValueType(0); 3223 3224 // Even if this FPImm requires an additional FNEG (i.e. the second element of 3225 // the returned pair is true) we still prefer FLI + FNEG over immediate 3226 // materialization as the latter might generate a longer instruction sequence. 3227 if (static_cast<const RISCVTargetLowering *>(TLI) 3228 ->getLegalZfaFPImm(APF, VT) 3229 .first >= 0) 3230 return false; 3231 3232 MVT XLenVT = Subtarget->getXLenVT(); 3233 if (VT == MVT::f64 && !Subtarget->is64Bit()) { 3234 assert(APF.isNegZero() && "Unexpected constant."); 3235 return false; 3236 } 3237 SDLoc DL(N); 3238 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(), 3239 *Subtarget); 3240 return true; 3241 } 3242 3243 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, 3244 SDValue &Imm) { 3245 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 3246 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width); 3247 3248 if (!isInt<5>(ImmVal)) 3249 return false; 3250 3251 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT()); 3252 return true; 3253 } 3254 3255 return false; 3256 } 3257 3258 // Try to remove sext.w if the input is a W instruction or can be made into 3259 // a W instruction cheaply. 3260 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { 3261 // Look for the sext.w pattern, addiw rd, rs1, 0. 3262 if (N->getMachineOpcode() != RISCV::ADDIW || 3263 !isNullConstant(N->getOperand(1))) 3264 return false; 3265 3266 SDValue N0 = N->getOperand(0); 3267 if (!N0.isMachineOpcode()) 3268 return false; 3269 3270 switch (N0.getMachineOpcode()) { 3271 default: 3272 break; 3273 case RISCV::ADD: 3274 case RISCV::ADDI: 3275 case RISCV::SUB: 3276 case RISCV::MUL: 3277 case RISCV::SLLI: { 3278 // Convert sext.w+add/sub/mul to their W instructions. This will create 3279 // a new independent instruction. This improves latency. 3280 unsigned Opc; 3281 switch (N0.getMachineOpcode()) { 3282 default: 3283 llvm_unreachable("Unexpected opcode!"); 3284 case RISCV::ADD: Opc = RISCV::ADDW; break; 3285 case RISCV::ADDI: Opc = RISCV::ADDIW; break; 3286 case RISCV::SUB: Opc = RISCV::SUBW; break; 3287 case RISCV::MUL: Opc = RISCV::MULW; break; 3288 case RISCV::SLLI: Opc = RISCV::SLLIW; break; 3289 } 3290 3291 SDValue N00 = N0.getOperand(0); 3292 SDValue N01 = N0.getOperand(1); 3293 3294 // Shift amount needs to be uimm5. 3295 if (N0.getMachineOpcode() == RISCV::SLLI && 3296 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue())) 3297 break; 3298 3299 SDNode *Result = 3300 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 3301 N00, N01); 3302 ReplaceUses(N, Result); 3303 return true; 3304 } 3305 case RISCV::ADDW: 3306 case RISCV::ADDIW: 3307 case RISCV::SUBW: 3308 case RISCV::MULW: 3309 case RISCV::SLLIW: 3310 case RISCV::PACKW: 3311 case RISCV::TH_MULAW: 3312 case RISCV::TH_MULAH: 3313 case RISCV::TH_MULSW: 3314 case RISCV::TH_MULSH: 3315 if (N0.getValueType() == MVT::i32) 3316 break; 3317 3318 // Result is already sign extended just remove the sext.w. 3319 // NOTE: We only handle the nodes that are selected with hasAllWUsers. 3320 ReplaceUses(N, N0.getNode()); 3321 return true; 3322 } 3323 3324 return false; 3325 } 3326 3327 static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) { 3328 // Check that we're using V0 as a mask register. 3329 if (!isa<RegisterSDNode>(MaskOp) || 3330 cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0) 3331 return false; 3332 3333 // The glued user defines V0. 3334 const auto *Glued = GlueOp.getNode(); 3335 3336 if (!Glued || Glued->getOpcode() != ISD::CopyToReg) 3337 return false; 3338 3339 // Check that we're defining V0 as a mask register. 3340 if (!isa<RegisterSDNode>(Glued->getOperand(1)) || 3341 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0) 3342 return false; 3343 3344 // Check the instruction defining V0; it needs to be a VMSET pseudo. 3345 SDValue MaskSetter = Glued->getOperand(2); 3346 3347 // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came 3348 // from an extract_subvector or insert_subvector. 3349 if (MaskSetter->isMachineOpcode() && 3350 MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS) 3351 MaskSetter = MaskSetter->getOperand(0); 3352 3353 const auto IsVMSet = [](unsigned Opc) { 3354 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || 3355 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || 3356 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 || 3357 Opc == RISCV::PseudoVMSET_M_B8; 3358 }; 3359 3360 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has 3361 // undefined behaviour if it's the wrong bitwidth, so we could choose to 3362 // assume that it's all-ones? Same applies to its VL. 3363 return MaskSetter->isMachineOpcode() && 3364 IsVMSet(MaskSetter.getMachineOpcode()); 3365 } 3366 3367 // Return true if we can make sure mask of N is all-ones mask. 3368 static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) { 3369 return usesAllOnesMask(N->getOperand(MaskOpIdx), 3370 N->getOperand(N->getNumOperands() - 1)); 3371 } 3372 3373 static bool isImplicitDef(SDValue V) { 3374 return V.isMachineOpcode() && 3375 V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF; 3376 } 3377 3378 // Optimize masked RVV pseudo instructions with a known all-ones mask to their 3379 // corresponding "unmasked" pseudo versions. The mask we're interested in will 3380 // take the form of a V0 physical register operand, with a glued 3381 // register-setting instruction. 3382 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) { 3383 const RISCV::RISCVMaskedPseudoInfo *I = 3384 RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); 3385 if (!I) 3386 return false; 3387 3388 unsigned MaskOpIdx = I->MaskOpIdx; 3389 if (!usesAllOnesMask(N, MaskOpIdx)) 3390 return false; 3391 3392 // There are two classes of pseudos in the table - compares and 3393 // everything else. See the comment on RISCVMaskedPseudo for details. 3394 const unsigned Opc = I->UnmaskedPseudo; 3395 const MCInstrDesc &MCID = TII->get(Opc); 3396 const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags); 3397 #ifndef NDEBUG 3398 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode()); 3399 assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) == 3400 RISCVII::hasVecPolicyOp(MCID.TSFlags) && 3401 "Masked and unmasked pseudos are inconsistent"); 3402 const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID); 3403 assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure"); 3404 #endif 3405 3406 SmallVector<SDValue, 8> Ops; 3407 // Skip the merge operand at index 0 if !UseTUPseudo. 3408 for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) { 3409 // Skip the mask, and the Glue. 3410 SDValue Op = N->getOperand(I); 3411 if (I == MaskOpIdx || Op.getValueType() == MVT::Glue) 3412 continue; 3413 Ops.push_back(Op); 3414 } 3415 3416 // Transitively apply any node glued to our new node. 3417 const auto *Glued = N->getGluedNode(); 3418 if (auto *TGlued = Glued->getGluedNode()) 3419 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1)); 3420 3421 MachineSDNode *Result = 3422 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 3423 3424 if (!N->memoperands_empty()) 3425 CurDAG->setNodeMemRefs(Result, N->memoperands()); 3426 3427 Result->setFlags(N->getFlags()); 3428 ReplaceUses(N, Result); 3429 3430 return true; 3431 } 3432 3433 static bool IsVMerge(SDNode *N) { 3434 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM; 3435 } 3436 3437 static bool IsVMv(SDNode *N) { 3438 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V; 3439 } 3440 3441 static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) { 3442 switch (LMUL) { 3443 case RISCVII::LMUL_F8: 3444 return RISCV::PseudoVMSET_M_B1; 3445 case RISCVII::LMUL_F4: 3446 return RISCV::PseudoVMSET_M_B2; 3447 case RISCVII::LMUL_F2: 3448 return RISCV::PseudoVMSET_M_B4; 3449 case RISCVII::LMUL_1: 3450 return RISCV::PseudoVMSET_M_B8; 3451 case RISCVII::LMUL_2: 3452 return RISCV::PseudoVMSET_M_B16; 3453 case RISCVII::LMUL_4: 3454 return RISCV::PseudoVMSET_M_B32; 3455 case RISCVII::LMUL_8: 3456 return RISCV::PseudoVMSET_M_B64; 3457 case RISCVII::LMUL_RESERVED: 3458 llvm_unreachable("Unexpected LMUL"); 3459 } 3460 llvm_unreachable("Unknown VLMUL enum"); 3461 } 3462 3463 // Try to fold away VMERGE_VVM instructions. We handle these cases: 3464 // -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction 3465 // folds to a masked TU instruction. VMERGE_VVM must have have merge operand 3466 // same as false operand. 3467 // -Masked TA VMERGE_VVM combined with an unmasked TA instruction fold to a 3468 // masked TA instruction. 3469 // -Unmasked TU VMERGE_VVM combined with a masked MU TA instruction folds to 3470 // masked TU instruction. Both instructions must have the same merge operand. 3471 // VMERGE_VVM must have have merge operand same as false operand. 3472 // Note: The VMERGE_VVM forms above (TA, and TU) refer to the policy implied, 3473 // not the pseudo name. That is, a TA VMERGE_VVM can be either the _TU pseudo 3474 // form with an IMPLICIT_DEF passthrough operand or the unsuffixed (TA) pseudo 3475 // form. 3476 bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { 3477 SDValue Merge, False, True, VL, Mask, Glue; 3478 // A vmv.v.v is equivalent to a vmerge with an all-ones mask. 3479 if (IsVMv(N)) { 3480 Merge = N->getOperand(0); 3481 False = N->getOperand(0); 3482 True = N->getOperand(1); 3483 VL = N->getOperand(2); 3484 // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones 3485 // mask later below. 3486 } else { 3487 assert(IsVMerge(N)); 3488 Merge = N->getOperand(0); 3489 False = N->getOperand(1); 3490 True = N->getOperand(2); 3491 Mask = N->getOperand(3); 3492 VL = N->getOperand(4); 3493 // We always have a glue node for the mask at v0. 3494 Glue = N->getOperand(N->getNumOperands() - 1); 3495 } 3496 assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0); 3497 assert(!Glue || Glue.getValueType() == MVT::Glue); 3498 3499 // We require that either merge and false are the same, or that merge 3500 // is undefined. 3501 if (Merge != False && !isImplicitDef(Merge)) 3502 return false; 3503 3504 assert(True.getResNo() == 0 && 3505 "Expect True is the first output of an instruction."); 3506 3507 // Need N is the exactly one using True. 3508 if (!True.hasOneUse()) 3509 return false; 3510 3511 if (!True.isMachineOpcode()) 3512 return false; 3513 3514 unsigned TrueOpc = True.getMachineOpcode(); 3515 const MCInstrDesc &TrueMCID = TII->get(TrueOpc); 3516 uint64_t TrueTSFlags = TrueMCID.TSFlags; 3517 bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID); 3518 3519 bool IsMasked = false; 3520 const RISCV::RISCVMaskedPseudoInfo *Info = 3521 RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc); 3522 if (!Info && HasTiedDest) { 3523 Info = RISCV::getMaskedPseudoInfo(TrueOpc); 3524 IsMasked = true; 3525 } 3526 3527 if (!Info) 3528 return false; 3529 3530 // When Mask is not a true mask, this transformation is illegal for some 3531 // operations whose results are affected by mask, like viota.m. 3532 if (Info->MaskAffectsResult && Mask && !usesAllOnesMask(Mask, Glue)) 3533 return false; 3534 3535 if (HasTiedDest && !isImplicitDef(True->getOperand(0))) { 3536 // The vmerge instruction must be TU. 3537 // FIXME: This could be relaxed, but we need to handle the policy for the 3538 // resulting op correctly. 3539 if (isImplicitDef(Merge)) 3540 return false; 3541 SDValue MergeOpTrue = True->getOperand(0); 3542 // Both the vmerge instruction and the True instruction must have the same 3543 // merge operand. 3544 if (False != MergeOpTrue) 3545 return false; 3546 } 3547 3548 if (IsMasked) { 3549 assert(HasTiedDest && "Expected tied dest"); 3550 // The vmerge instruction must be TU. 3551 if (isImplicitDef(Merge)) 3552 return false; 3553 // The vmerge instruction must have an all 1s mask since we're going to keep 3554 // the mask from the True instruction. 3555 // FIXME: Support mask agnostic True instruction which would have an 3556 // undef merge operand. 3557 if (Mask && !usesAllOnesMask(Mask, Glue)) 3558 return false; 3559 } 3560 3561 // Skip if True has side effect. 3562 // TODO: Support vleff and vlsegff. 3563 if (TII->get(TrueOpc).hasUnmodeledSideEffects()) 3564 return false; 3565 3566 // The last operand of a masked instruction may be glued. 3567 bool HasGlueOp = True->getGluedNode() != nullptr; 3568 3569 // The chain operand may exist either before the glued operands or in the last 3570 // position. 3571 unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1; 3572 bool HasChainOp = 3573 True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other; 3574 3575 if (HasChainOp) { 3576 // Avoid creating cycles in the DAG. We must ensure that none of the other 3577 // operands depend on True through it's Chain. 3578 SmallVector<const SDNode *, 4> LoopWorklist; 3579 SmallPtrSet<const SDNode *, 16> Visited; 3580 LoopWorklist.push_back(False.getNode()); 3581 if (Mask) 3582 LoopWorklist.push_back(Mask.getNode()); 3583 LoopWorklist.push_back(VL.getNode()); 3584 if (Glue) 3585 LoopWorklist.push_back(Glue.getNode()); 3586 if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist)) 3587 return false; 3588 } 3589 3590 // The vector policy operand may be present for masked intrinsics 3591 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags); 3592 unsigned TrueVLIndex = 3593 True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2; 3594 SDValue TrueVL = True.getOperand(TrueVLIndex); 3595 SDValue SEW = True.getOperand(TrueVLIndex + 1); 3596 3597 auto GetMinVL = [](SDValue LHS, SDValue RHS) { 3598 if (LHS == RHS) 3599 return LHS; 3600 if (isAllOnesConstant(LHS)) 3601 return RHS; 3602 if (isAllOnesConstant(RHS)) 3603 return LHS; 3604 auto *CLHS = dyn_cast<ConstantSDNode>(LHS); 3605 auto *CRHS = dyn_cast<ConstantSDNode>(RHS); 3606 if (!CLHS || !CRHS) 3607 return SDValue(); 3608 return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS; 3609 }; 3610 3611 // Because N and True must have the same merge operand (or True's operand is 3612 // implicit_def), the "effective" body is the minimum of their VLs. 3613 SDValue OrigVL = VL; 3614 VL = GetMinVL(TrueVL, VL); 3615 if (!VL) 3616 return false; 3617 3618 // If we end up changing the VL or mask of True, then we need to make sure it 3619 // doesn't raise any observable fp exceptions, since changing the active 3620 // elements will affect how fflags is set. 3621 if (TrueVL != VL || !IsMasked) 3622 if (mayRaiseFPException(True.getNode()) && 3623 !True->getFlags().hasNoFPExcept()) 3624 return false; 3625 3626 SDLoc DL(N); 3627 3628 // From the preconditions we checked above, we know the mask and thus glue 3629 // for the result node will be taken from True. 3630 if (IsMasked) { 3631 Mask = True->getOperand(Info->MaskOpIdx); 3632 Glue = True->getOperand(True->getNumOperands() - 1); 3633 assert(Glue.getValueType() == MVT::Glue); 3634 } 3635 // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create 3636 // an all-ones mask to use. 3637 else if (IsVMv(N)) { 3638 unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags; 3639 unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags)); 3640 ElementCount EC = N->getValueType(0).getVectorElementCount(); 3641 MVT MaskVT = MVT::getVectorVT(MVT::i1, EC); 3642 3643 SDValue AllOnesMask = 3644 SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0); 3645 SDValue MaskCopy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 3646 RISCV::V0, AllOnesMask, SDValue()); 3647 Mask = CurDAG->getRegister(RISCV::V0, MaskVT); 3648 Glue = MaskCopy.getValue(1); 3649 } 3650 3651 unsigned MaskedOpc = Info->MaskedPseudo; 3652 #ifndef NDEBUG 3653 const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc); 3654 assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) && 3655 "Expected instructions with mask have policy operand."); 3656 assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(), 3657 MCOI::TIED_TO) == 0 && 3658 "Expected instructions with mask have a tied dest."); 3659 #endif 3660 3661 // Use a tumu policy, relaxing it to tail agnostic provided that the merge 3662 // operand is undefined. 3663 // 3664 // However, if the VL became smaller than what the vmerge had originally, then 3665 // elements past VL that were previously in the vmerge's body will have moved 3666 // to the tail. In that case we always need to use tail undisturbed to 3667 // preserve them. 3668 bool MergeVLShrunk = VL != OrigVL; 3669 uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk) 3670 ? RISCVII::TAIL_AGNOSTIC 3671 : /*TUMU*/ 0; 3672 SDValue PolicyOp = 3673 CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT()); 3674 3675 3676 SmallVector<SDValue, 8> Ops; 3677 Ops.push_back(False); 3678 3679 const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags); 3680 const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode; 3681 assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx); 3682 Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd); 3683 3684 Ops.push_back(Mask); 3685 3686 // For unmasked "VOp" with rounding mode operand, that is interfaces like 3687 // (..., rm, vl) or (..., rm, vl, policy). 3688 // Its masked version is (..., vm, rm, vl, policy). 3689 // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td 3690 if (HasRoundingMode) 3691 Ops.push_back(True->getOperand(TrueVLIndex - 1)); 3692 3693 Ops.append({VL, SEW, PolicyOp}); 3694 3695 // Result node should have chain operand of True. 3696 if (HasChainOp) 3697 Ops.push_back(True.getOperand(TrueChainOpIdx)); 3698 3699 // Add the glue for the CopyToReg of mask->v0. 3700 Ops.push_back(Glue); 3701 3702 MachineSDNode *Result = 3703 CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops); 3704 Result->setFlags(True->getFlags()); 3705 3706 if (!cast<MachineSDNode>(True)->memoperands_empty()) 3707 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands()); 3708 3709 // Replace vmerge.vvm node by Result. 3710 ReplaceUses(SDValue(N, 0), SDValue(Result, 0)); 3711 3712 // Replace another value of True. E.g. chain and VL. 3713 for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx) 3714 ReplaceUses(True.getValue(Idx), SDValue(Result, Idx)); 3715 3716 return true; 3717 } 3718 3719 bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { 3720 bool MadeChange = false; 3721 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 3722 3723 while (Position != CurDAG->allnodes_begin()) { 3724 SDNode *N = &*--Position; 3725 if (N->use_empty() || !N->isMachineOpcode()) 3726 continue; 3727 3728 if (IsVMerge(N) || IsVMv(N)) 3729 MadeChange |= performCombineVMergeAndVOps(N); 3730 } 3731 return MadeChange; 3732 } 3733 3734 /// If our passthru is an implicit_def, use noreg instead. This side 3735 /// steps issues with MachineCSE not being able to CSE expressions with 3736 /// IMPLICIT_DEF operands while preserving the semantic intent. See 3737 /// pr64282 for context. Note that this transform is the last one 3738 /// performed at ISEL DAG to DAG. 3739 bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() { 3740 bool MadeChange = false; 3741 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 3742 3743 while (Position != CurDAG->allnodes_begin()) { 3744 SDNode *N = &*--Position; 3745 if (N->use_empty() || !N->isMachineOpcode()) 3746 continue; 3747 3748 const unsigned Opc = N->getMachineOpcode(); 3749 if (!RISCVVPseudosTable::getPseudoInfo(Opc) || 3750 !RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) || 3751 !isImplicitDef(N->getOperand(0))) 3752 continue; 3753 3754 SmallVector<SDValue> Ops; 3755 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0))); 3756 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) { 3757 SDValue Op = N->getOperand(I); 3758 Ops.push_back(Op); 3759 } 3760 3761 MachineSDNode *Result = 3762 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 3763 Result->setFlags(N->getFlags()); 3764 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands()); 3765 ReplaceUses(N, Result); 3766 MadeChange = true; 3767 } 3768 return MadeChange; 3769 } 3770 3771 3772 // This pass converts a legalized DAG into a RISCV-specific DAG, ready 3773 // for instruction scheduling. 3774 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, 3775 CodeGenOptLevel OptLevel) { 3776 return new RISCVDAGToDAGISel(TM, OptLevel); 3777 } 3778 3779 char RISCVDAGToDAGISel::ID = 0; 3780 3781 INITIALIZE_PASS(RISCVDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) 3782