1 //===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file describes how to lower LLVM code to machine code. This has two 11 /// main components: 12 /// 13 /// 1. Which ValueTypes are natively supported by the target. 14 /// 2. Which operations are supported for supported ValueTypes. 15 /// 3. Cost thresholds for alternative implementations of certain operations. 16 /// 17 /// In addition it has a few other components, like information about FP 18 /// immediates. 19 /// 20 //===----------------------------------------------------------------------===// 21 22 #ifndef LLVM_CODEGEN_TARGETLOWERING_H 23 #define LLVM_CODEGEN_TARGETLOWERING_H 24 25 #include "llvm/ADT/APInt.h" 26 #include "llvm/ADT/ArrayRef.h" 27 #include "llvm/ADT/DenseMap.h" 28 #include "llvm/ADT/STLExtras.h" 29 #include "llvm/ADT/SmallVector.h" 30 #include "llvm/ADT/StringRef.h" 31 #include "llvm/CodeGen/DAGCombine.h" 32 #include "llvm/CodeGen/ISDOpcodes.h" 33 #include "llvm/CodeGen/LowLevelType.h" 34 #include "llvm/CodeGen/RuntimeLibcalls.h" 35 #include "llvm/CodeGen/SelectionDAG.h" 36 #include "llvm/CodeGen/SelectionDAGNodes.h" 37 #include "llvm/CodeGen/TargetCallingConv.h" 38 #include "llvm/CodeGen/ValueTypes.h" 39 #include "llvm/IR/Attributes.h" 40 #include "llvm/IR/CallingConv.h" 41 #include "llvm/IR/DataLayout.h" 42 #include "llvm/IR/DerivedTypes.h" 43 #include "llvm/IR/Function.h" 44 #include "llvm/IR/InlineAsm.h" 45 #include "llvm/IR/Instruction.h" 46 #include "llvm/IR/Instructions.h" 47 #include "llvm/IR/Type.h" 48 #include "llvm/Support/Alignment.h" 49 #include "llvm/Support/AtomicOrdering.h" 50 #include "llvm/Support/Casting.h" 51 #include "llvm/Support/ErrorHandling.h" 52 #include "llvm/Support/InstructionCost.h" 53 #include "llvm/Support/MachineValueType.h" 54 #include <algorithm> 55 #include <cassert> 56 #include <climits> 57 #include <cstdint> 58 #include <iterator> 59 #include <map> 60 #include <string> 61 #include <utility> 62 #include <vector> 63 64 namespace llvm { 65 66 class BranchProbability; 67 class CCState; 68 class CCValAssign; 69 class Constant; 70 class FastISel; 71 class FunctionLoweringInfo; 72 class GlobalValue; 73 class GISelKnownBits; 74 class IntrinsicInst; 75 class IRBuilderBase; 76 struct KnownBits; 77 class LegacyDivergenceAnalysis; 78 class LLVMContext; 79 class MachineBasicBlock; 80 class MachineFunction; 81 class MachineInstr; 82 class MachineJumpTableInfo; 83 class MachineLoop; 84 class MachineRegisterInfo; 85 class MCContext; 86 class MCExpr; 87 class Module; 88 class ProfileSummaryInfo; 89 class TargetLibraryInfo; 90 class TargetMachine; 91 class TargetRegisterClass; 92 class TargetRegisterInfo; 93 class TargetTransformInfo; 94 class Value; 95 96 namespace Sched { 97 98 enum Preference { 99 None, // No preference 100 Source, // Follow source order. 101 RegPressure, // Scheduling for lowest register pressure. 102 Hybrid, // Scheduling for both latency and register pressure. 103 ILP, // Scheduling for ILP in low register pressure mode. 104 VLIW, // Scheduling for VLIW targets. 105 Fast, // Fast suboptimal list scheduling 106 Linearize // Linearize DAG, no scheduling 107 }; 108 109 } // end namespace Sched 110 111 // MemOp models a memory operation, either memset or memcpy/memmove. 112 struct MemOp { 113 private: 114 // Shared 115 uint64_t Size; 116 bool DstAlignCanChange; // true if destination alignment can satisfy any 117 // constraint. 118 Align DstAlign; // Specified alignment of the memory operation. 119 120 bool AllowOverlap; 121 // memset only 122 bool IsMemset; // If setthis memory operation is a memset. 123 bool ZeroMemset; // If set clears out memory with zeros. 124 // memcpy only 125 bool MemcpyStrSrc; // Indicates whether the memcpy source is an in-register 126 // constant so it does not need to be loaded. 127 Align SrcAlign; // Inferred alignment of the source or default value if the 128 // memory operation does not need to load the value. 129 public: 130 static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, 131 Align SrcAlign, bool IsVolatile, 132 bool MemcpyStrSrc = false) { 133 MemOp Op; 134 Op.Size = Size; 135 Op.DstAlignCanChange = DstAlignCanChange; 136 Op.DstAlign = DstAlign; 137 Op.AllowOverlap = !IsVolatile; 138 Op.IsMemset = false; 139 Op.ZeroMemset = false; 140 Op.MemcpyStrSrc = MemcpyStrSrc; 141 Op.SrcAlign = SrcAlign; 142 return Op; 143 } 144 SetMemOp145 static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, 146 bool IsZeroMemset, bool IsVolatile) { 147 MemOp Op; 148 Op.Size = Size; 149 Op.DstAlignCanChange = DstAlignCanChange; 150 Op.DstAlign = DstAlign; 151 Op.AllowOverlap = !IsVolatile; 152 Op.IsMemset = true; 153 Op.ZeroMemset = IsZeroMemset; 154 Op.MemcpyStrSrc = false; 155 return Op; 156 } 157 sizeMemOp158 uint64_t size() const { return Size; } getDstAlignMemOp159 Align getDstAlign() const { 160 assert(!DstAlignCanChange); 161 return DstAlign; 162 } isFixedDstAlignMemOp163 bool isFixedDstAlign() const { return !DstAlignCanChange; } allowOverlapMemOp164 bool allowOverlap() const { return AllowOverlap; } isMemsetMemOp165 bool isMemset() const { return IsMemset; } isMemcpyMemOp166 bool isMemcpy() const { return !IsMemset; } isMemcpyWithFixedDstAlignMemOp167 bool isMemcpyWithFixedDstAlign() const { 168 return isMemcpy() && !DstAlignCanChange; 169 } isZeroMemsetMemOp170 bool isZeroMemset() const { return isMemset() && ZeroMemset; } isMemcpyStrSrcMemOp171 bool isMemcpyStrSrc() const { 172 assert(isMemcpy() && "Must be a memcpy"); 173 return MemcpyStrSrc; 174 } getSrcAlignMemOp175 Align getSrcAlign() const { 176 assert(isMemcpy() && "Must be a memcpy"); 177 return SrcAlign; 178 } isSrcAlignedMemOp179 bool isSrcAligned(Align AlignCheck) const { 180 return isMemset() || llvm::isAligned(AlignCheck, SrcAlign.value()); 181 } isDstAlignedMemOp182 bool isDstAligned(Align AlignCheck) const { 183 return DstAlignCanChange || llvm::isAligned(AlignCheck, DstAlign.value()); 184 } isAlignedMemOp185 bool isAligned(Align AlignCheck) const { 186 return isSrcAligned(AlignCheck) && isDstAligned(AlignCheck); 187 } 188 }; 189 190 /// This base class for TargetLowering contains the SelectionDAG-independent 191 /// parts that can be used from the rest of CodeGen. 192 class TargetLoweringBase { 193 public: 194 /// This enum indicates whether operations are valid for a target, and if not, 195 /// what action should be used to make them valid. 196 enum LegalizeAction : uint8_t { 197 Legal, // The target natively supports this operation. 198 Promote, // This operation should be executed in a larger type. 199 Expand, // Try to expand this to other ops, otherwise use a libcall. 200 LibCall, // Don't try to expand this to other ops, always use a libcall. 201 Custom // Use the LowerOperation hook to implement custom lowering. 202 }; 203 204 /// This enum indicates whether a types are legal for a target, and if not, 205 /// what action should be used to make them valid. 206 enum LegalizeTypeAction : uint8_t { 207 TypeLegal, // The target natively supports this type. 208 TypePromoteInteger, // Replace this integer with a larger one. 209 TypeExpandInteger, // Split this integer into two of half the size. 210 TypeSoftenFloat, // Convert this float to a same size integer type. 211 TypeExpandFloat, // Split this float into two of half the size. 212 TypeScalarizeVector, // Replace this one-element vector with its element. 213 TypeSplitVector, // Split this vector into two of half the size. 214 TypeWidenVector, // This vector should be widened into a larger vector. 215 TypePromoteFloat, // Replace this float with a larger one. 216 TypeSoftPromoteHalf, // Soften half to i16 and use float to do arithmetic. 217 TypeScalarizeScalableVector, // This action is explicitly left unimplemented. 218 // While it is theoretically possible to 219 // legalize operations on scalable types with a 220 // loop that handles the vscale * #lanes of the 221 // vector, this is non-trivial at SelectionDAG 222 // level and these types are better to be 223 // widened or promoted. 224 }; 225 226 /// LegalizeKind holds the legalization kind that needs to happen to EVT 227 /// in order to type-legalize it. 228 using LegalizeKind = std::pair<LegalizeTypeAction, EVT>; 229 230 /// Enum that describes how the target represents true/false values. 231 enum BooleanContent { 232 UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage. 233 ZeroOrOneBooleanContent, // All bits zero except for bit 0. 234 ZeroOrNegativeOneBooleanContent // All bits equal to bit 0. 235 }; 236 237 /// Enum that describes what type of support for selects the target has. 238 enum SelectSupportKind { 239 ScalarValSelect, // The target supports scalar selects (ex: cmov). 240 ScalarCondVectorVal, // The target supports selects with a scalar condition 241 // and vector values (ex: cmov). 242 VectorMaskSelect // The target supports vector selects with a vector 243 // mask (ex: x86 blends). 244 }; 245 246 /// Enum that specifies what an atomic load/AtomicRMWInst is expanded 247 /// to, if at all. Exists because different targets have different levels of 248 /// support for these atomic instructions, and also have different options 249 /// w.r.t. what they should expand to. 250 enum class AtomicExpansionKind { 251 None, // Don't expand the instruction. 252 LLSC, // Expand the instruction into loadlinked/storeconditional; used 253 // by ARM/AArch64. 254 LLOnly, // Expand the (load) instruction into just a load-linked, which has 255 // greater atomic guarantees than a normal load. 256 CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. 257 MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop. 258 }; 259 260 /// Enum that specifies when a multiplication should be expanded. 261 enum class MulExpansionKind { 262 Always, // Always expand the instruction. 263 OnlyLegalOrCustom, // Only expand when the resulting instructions are legal 264 // or custom. 265 }; 266 267 /// Enum that specifies when a float negation is beneficial. 268 enum class NegatibleCost { 269 Cheaper = 0, // Negated expression is cheaper. 270 Neutral = 1, // Negated expression has the same cost. 271 Expensive = 2 // Negated expression is more expensive. 272 }; 273 274 class ArgListEntry { 275 public: 276 Value *Val = nullptr; 277 SDValue Node = SDValue(); 278 Type *Ty = nullptr; 279 bool IsSExt : 1; 280 bool IsZExt : 1; 281 bool IsInReg : 1; 282 bool IsSRet : 1; 283 bool IsNest : 1; 284 bool IsByVal : 1; 285 bool IsByRef : 1; 286 bool IsInAlloca : 1; 287 bool IsPreallocated : 1; 288 bool IsReturned : 1; 289 bool IsSwiftSelf : 1; 290 bool IsSwiftAsync : 1; 291 bool IsSwiftError : 1; 292 bool IsCFGuardTarget : 1; 293 MaybeAlign Alignment = None; 294 Type *IndirectType = nullptr; 295 ArgListEntry()296 ArgListEntry() 297 : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false), 298 IsNest(false), IsByVal(false), IsByRef(false), IsInAlloca(false), 299 IsPreallocated(false), IsReturned(false), IsSwiftSelf(false), 300 IsSwiftAsync(false), IsSwiftError(false), IsCFGuardTarget(false) {} 301 302 void setAttributes(const CallBase *Call, unsigned ArgIdx); 303 }; 304 using ArgListTy = std::vector<ArgListEntry>; 305 markLibCallAttributes(MachineFunction * MF,unsigned CC,ArgListTy & Args)306 virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC, 307 ArgListTy &Args) const {}; 308 getExtendForContent(BooleanContent Content)309 static ISD::NodeType getExtendForContent(BooleanContent Content) { 310 switch (Content) { 311 case UndefinedBooleanContent: 312 // Extend by adding rubbish bits. 313 return ISD::ANY_EXTEND; 314 case ZeroOrOneBooleanContent: 315 // Extend by adding zero bits. 316 return ISD::ZERO_EXTEND; 317 case ZeroOrNegativeOneBooleanContent: 318 // Extend by copying the sign bit. 319 return ISD::SIGN_EXTEND; 320 } 321 llvm_unreachable("Invalid content kind"); 322 } 323 324 explicit TargetLoweringBase(const TargetMachine &TM); 325 TargetLoweringBase(const TargetLoweringBase &) = delete; 326 TargetLoweringBase &operator=(const TargetLoweringBase &) = delete; 327 virtual ~TargetLoweringBase() = default; 328 329 /// Return true if the target support strict float operation isStrictFPEnabled()330 bool isStrictFPEnabled() const { 331 return IsStrictFPEnabled; 332 } 333 334 protected: 335 /// Initialize all of the actions to default values. 336 void initActions(); 337 338 public: getTargetMachine()339 const TargetMachine &getTargetMachine() const { return TM; } 340 useSoftFloat()341 virtual bool useSoftFloat() const { return false; } 342 343 /// Return the pointer type for the given address space, defaults to 344 /// the pointer type from the data layout. 345 /// FIXME: The default needs to be removed once all the code is updated. 346 virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const { 347 return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); 348 } 349 350 /// Return the in-memory pointer type for the given address space, defaults to 351 /// the pointer type from the data layout. FIXME: The default needs to be 352 /// removed once all the code is updated. 353 virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const { 354 return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); 355 } 356 357 /// Return the type for frame index, which is determined by 358 /// the alloca address space specified through the data layout. getFrameIndexTy(const DataLayout & DL)359 MVT getFrameIndexTy(const DataLayout &DL) const { 360 return getPointerTy(DL, DL.getAllocaAddrSpace()); 361 } 362 363 /// Return the type for code pointers, which is determined by the program 364 /// address space specified through the data layout. getProgramPointerTy(const DataLayout & DL)365 MVT getProgramPointerTy(const DataLayout &DL) const { 366 return getPointerTy(DL, DL.getProgramAddressSpace()); 367 } 368 369 /// Return the type for operands of fence. 370 /// TODO: Let fence operands be of i32 type and remove this. getFenceOperandTy(const DataLayout & DL)371 virtual MVT getFenceOperandTy(const DataLayout &DL) const { 372 return getPointerTy(DL); 373 } 374 375 /// EVT is not used in-tree, but is used by out-of-tree target. 376 /// A documentation for this function would be nice... 377 virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const; 378 379 EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, 380 bool LegalTypes = true) const; 381 382 /// Return the preferred type to use for a shift opcode, given the shifted 383 /// amount type is \p ShiftValueTy. 384 LLVM_READONLY getPreferredShiftAmountTy(LLT ShiftValueTy)385 virtual LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const { 386 return ShiftValueTy; 387 } 388 389 /// Returns the type to be used for the index operand of: 390 /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, 391 /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR getVectorIdxTy(const DataLayout & DL)392 virtual MVT getVectorIdxTy(const DataLayout &DL) const { 393 return getPointerTy(DL); 394 } 395 396 /// Returns the type to be used for the EVL/AVL operand of VP nodes: 397 /// ISD::VP_ADD, ISD::VP_SUB, etc. It must be a legal scalar integer type, 398 /// and must be at least as large as i32. The EVL is implicitly zero-extended 399 /// to any larger type. getVPExplicitVectorLengthTy()400 virtual MVT getVPExplicitVectorLengthTy() const { return MVT::i32; } 401 402 /// This callback is used to inspect load/store instructions and add 403 /// target-specific MachineMemOperand flags to them. The default 404 /// implementation does nothing. getTargetMMOFlags(const Instruction & I)405 virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const { 406 return MachineMemOperand::MONone; 407 } 408 409 MachineMemOperand::Flags getLoadMemOperandFlags(const LoadInst &LI, 410 const DataLayout &DL) const; 411 MachineMemOperand::Flags getStoreMemOperandFlags(const StoreInst &SI, 412 const DataLayout &DL) const; 413 MachineMemOperand::Flags getAtomicMemOperandFlags(const Instruction &AI, 414 const DataLayout &DL) const; 415 isSelectSupported(SelectSupportKind)416 virtual bool isSelectSupported(SelectSupportKind /*kind*/) const { 417 return true; 418 } 419 420 /// Return true if it is profitable to convert a select of FP constants into 421 /// a constant pool load whose address depends on the select condition. The 422 /// parameter may be used to differentiate a select with FP compare from 423 /// integer compare. reduceSelectOfFPConstantLoads(EVT CmpOpVT)424 virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const { 425 return true; 426 } 427 428 /// Return true if multiple condition registers are available. hasMultipleConditionRegisters()429 bool hasMultipleConditionRegisters() const { 430 return HasMultipleConditionRegisters; 431 } 432 433 /// Return true if the target has BitExtract instructions. hasExtractBitsInsn()434 bool hasExtractBitsInsn() const { return HasExtractBitsInsn; } 435 436 /// Return the preferred vector type legalization action. 437 virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)438 getPreferredVectorAction(MVT VT) const { 439 // The default action for one element vectors is to scalarize 440 if (VT.getVectorElementCount().isScalar()) 441 return TypeScalarizeVector; 442 // The default action for an odd-width vector is to widen. 443 if (!VT.isPow2VectorType()) 444 return TypeWidenVector; 445 // The default action for other vectors is to promote 446 return TypePromoteInteger; 447 } 448 449 // Return true if the half type should be passed around as i16, but promoted 450 // to float around arithmetic. The default behavior is to pass around as 451 // float and convert around loads/stores/bitcasts and other places where 452 // the size matters. softPromoteHalfType()453 virtual bool softPromoteHalfType() const { return false; } 454 455 // There are two general methods for expanding a BUILD_VECTOR node: 456 // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle 457 // them together. 458 // 2. Build the vector on the stack and then load it. 459 // If this function returns true, then method (1) will be used, subject to 460 // the constraint that all of the necessary shuffles are legal (as determined 461 // by isShuffleMaskLegal). If this function returns false, then method (2) is 462 // always used. The vector type, and the number of defined values, are 463 // provided. 464 virtual bool shouldExpandBuildVectorWithShuffles(EVT,unsigned DefinedValues)465 shouldExpandBuildVectorWithShuffles(EVT /* VT */, 466 unsigned DefinedValues) const { 467 return DefinedValues < 3; 468 } 469 470 /// Return true if integer divide is usually cheaper than a sequence of 471 /// several shifts, adds, and multiplies for this target. 472 /// The definition of "cheaper" may depend on whether we're optimizing 473 /// for speed or for size. isIntDivCheap(EVT VT,AttributeList Attr)474 virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; } 475 476 /// Return true if the target can handle a standalone remainder operation. hasStandaloneRem(EVT VT)477 virtual bool hasStandaloneRem(EVT VT) const { 478 return true; 479 } 480 481 /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X). isFsqrtCheap(SDValue X,SelectionDAG & DAG)482 virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const { 483 // Default behavior is to replace SQRT(X) with X*RSQRT(X). 484 return false; 485 } 486 487 /// Reciprocal estimate status values used by the functions below. 488 enum ReciprocalEstimate : int { 489 Unspecified = -1, 490 Disabled = 0, 491 Enabled = 1 492 }; 493 494 /// Return a ReciprocalEstimate enum value for a square root of the given type 495 /// based on the function's attributes. If the operation is not overridden by 496 /// the function's attributes, "Unspecified" is returned and target defaults 497 /// are expected to be used for instruction selection. 498 int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const; 499 500 /// Return a ReciprocalEstimate enum value for a division of the given type 501 /// based on the function's attributes. If the operation is not overridden by 502 /// the function's attributes, "Unspecified" is returned and target defaults 503 /// are expected to be used for instruction selection. 504 int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const; 505 506 /// Return the refinement step count for a square root of the given type based 507 /// on the function's attributes. If the operation is not overridden by 508 /// the function's attributes, "Unspecified" is returned and target defaults 509 /// are expected to be used for instruction selection. 510 int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const; 511 512 /// Return the refinement step count for a division of the given type based 513 /// on the function's attributes. If the operation is not overridden by 514 /// the function's attributes, "Unspecified" is returned and target defaults 515 /// are expected to be used for instruction selection. 516 int getDivRefinementSteps(EVT VT, MachineFunction &MF) const; 517 518 /// Returns true if target has indicated at least one type should be bypassed. isSlowDivBypassed()519 bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); } 520 521 /// Returns map of slow types for division or remainder with corresponding 522 /// fast types getBypassSlowDivWidths()523 const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const { 524 return BypassSlowDivWidths; 525 } 526 527 /// Return true if Flow Control is an expensive operation that should be 528 /// avoided. isJumpExpensive()529 bool isJumpExpensive() const { return JumpIsExpensive; } 530 531 /// Return true if selects are only cheaper than branches if the branch is 532 /// unlikely to be predicted right. isPredictableSelectExpensive()533 bool isPredictableSelectExpensive() const { 534 return PredictableSelectIsExpensive; 535 } 536 fallBackToDAGISel(const Instruction & Inst)537 virtual bool fallBackToDAGISel(const Instruction &Inst) const { 538 return false; 539 } 540 541 /// Return true if the following transform is beneficial: 542 /// fold (conv (load x)) -> (load (conv*)x) 543 /// On architectures that don't natively support some vector loads 544 /// efficiently, casting the load to a smaller vector of larger types and 545 /// loading is more efficient, however, this can be undone by optimizations in 546 /// dag combiner. isLoadBitCastBeneficial(EVT LoadVT,EVT BitcastVT,const SelectionDAG & DAG,const MachineMemOperand & MMO)547 virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, 548 const SelectionDAG &DAG, 549 const MachineMemOperand &MMO) const { 550 // Don't do if we could do an indexed load on the original type, but not on 551 // the new one. 552 if (!LoadVT.isSimple() || !BitcastVT.isSimple()) 553 return true; 554 555 MVT LoadMVT = LoadVT.getSimpleVT(); 556 557 // Don't bother doing this if it's just going to be promoted again later, as 558 // doing so might interfere with other combines. 559 if (getOperationAction(ISD::LOAD, LoadMVT) == Promote && 560 getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT()) 561 return false; 562 563 bool Fast = false; 564 return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT, 565 MMO, &Fast) && Fast; 566 } 567 568 /// Return true if the following transform is beneficial: 569 /// (store (y (conv x)), y*)) -> (store x, (x*)) isStoreBitCastBeneficial(EVT StoreVT,EVT BitcastVT,const SelectionDAG & DAG,const MachineMemOperand & MMO)570 virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, 571 const SelectionDAG &DAG, 572 const MachineMemOperand &MMO) const { 573 // Default to the same logic as loads. 574 return isLoadBitCastBeneficial(StoreVT, BitcastVT, DAG, MMO); 575 } 576 577 /// Return true if it is expected to be cheaper to do a store of a non-zero 578 /// vector constant with the given size and type for the address space than to 579 /// store the individual scalar element constants. storeOfVectorConstantIsCheap(EVT MemVT,unsigned NumElem,unsigned AddrSpace)580 virtual bool storeOfVectorConstantIsCheap(EVT MemVT, 581 unsigned NumElem, 582 unsigned AddrSpace) const { 583 return false; 584 } 585 586 /// Allow store merging for the specified type after legalization in addition 587 /// to before legalization. This may transform stores that do not exist 588 /// earlier (for example, stores created from intrinsics). mergeStoresAfterLegalization(EVT MemVT)589 virtual bool mergeStoresAfterLegalization(EVT MemVT) const { 590 return true; 591 } 592 593 /// Returns if it's reasonable to merge stores to MemVT size. canMergeStoresTo(unsigned AS,EVT MemVT,const MachineFunction & MF)594 virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, 595 const MachineFunction &MF) const { 596 return true; 597 } 598 599 /// Return true if it is cheap to speculate a call to intrinsic cttz. isCheapToSpeculateCttz()600 virtual bool isCheapToSpeculateCttz() const { 601 return false; 602 } 603 604 /// Return true if it is cheap to speculate a call to intrinsic ctlz. isCheapToSpeculateCtlz()605 virtual bool isCheapToSpeculateCtlz() const { 606 return false; 607 } 608 609 /// Return true if ctlz instruction is fast. isCtlzFast()610 virtual bool isCtlzFast() const { 611 return false; 612 } 613 614 /// Return the maximum number of "x & (x - 1)" operations that can be done 615 /// instead of deferring to a custom CTPOP. getCustomCtpopCost(EVT VT,ISD::CondCode Cond)616 virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const { 617 return 1; 618 } 619 620 /// Return true if instruction generated for equality comparison is folded 621 /// with instruction generated for signed comparison. isEqualityCmpFoldedWithSignedCmp()622 virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; } 623 624 /// Return true if the heuristic to prefer icmp eq zero should be used in code 625 /// gen prepare. preferZeroCompareBranch()626 virtual bool preferZeroCompareBranch() const { return false; } 627 628 /// Return true if it is safe to transform an integer-domain bitwise operation 629 /// into the equivalent floating-point operation. This should be set to true 630 /// if the target has IEEE-754-compliant fabs/fneg operations for the input 631 /// type. hasBitPreservingFPLogic(EVT VT)632 virtual bool hasBitPreservingFPLogic(EVT VT) const { 633 return false; 634 } 635 636 /// Return true if it is cheaper to split the store of a merged int val 637 /// from a pair of smaller values into multiple stores. isMultiStoresCheaperThanBitsMerge(EVT LTy,EVT HTy)638 virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const { 639 return false; 640 } 641 642 /// Return if the target supports combining a 643 /// chain like: 644 /// \code 645 /// %andResult = and %val1, #mask 646 /// %icmpResult = icmp %andResult, 0 647 /// \endcode 648 /// into a single machine instruction of a form like: 649 /// \code 650 /// cc = test %register, #mask 651 /// \endcode isMaskAndCmp0FoldingBeneficial(const Instruction & AndI)652 virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { 653 return false; 654 } 655 656 /// Use bitwise logic to make pairs of compares more efficient. For example: 657 /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 658 /// This should be true when it takes more than one instruction to lower 659 /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on 660 /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win. convertSetCCLogicToBitwiseLogic(EVT VT)661 virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const { 662 return false; 663 } 664 665 /// Return the preferred operand type if the target has a quick way to compare 666 /// integer values of the given size. Assume that any legal integer type can 667 /// be compared efficiently. Targets may override this to allow illegal wide 668 /// types to return a vector type if there is support to compare that type. hasFastEqualityCompare(unsigned NumBits)669 virtual MVT hasFastEqualityCompare(unsigned NumBits) const { 670 MVT VT = MVT::getIntegerVT(NumBits); 671 return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE; 672 } 673 674 /// Return true if the target should transform: 675 /// (X & Y) == Y ---> (~X & Y) == 0 676 /// (X & Y) != Y ---> (~X & Y) != 0 677 /// 678 /// This may be profitable if the target has a bitwise and-not operation that 679 /// sets comparison flags. A target may want to limit the transformation based 680 /// on the type of Y or if Y is a constant. 681 /// 682 /// Note that the transform will not occur if Y is known to be a power-of-2 683 /// because a mask and compare of a single bit can be handled by inverting the 684 /// predicate, for example: 685 /// (X & 8) == 8 ---> (X & 8) != 0 hasAndNotCompare(SDValue Y)686 virtual bool hasAndNotCompare(SDValue Y) const { 687 return false; 688 } 689 690 /// Return true if the target has a bitwise and-not operation: 691 /// X = ~A & B 692 /// This can be used to simplify select or other instructions. hasAndNot(SDValue X)693 virtual bool hasAndNot(SDValue X) const { 694 // If the target has the more complex version of this operation, assume that 695 // it has this operation too. 696 return hasAndNotCompare(X); 697 } 698 699 /// Return true if the target has a bit-test instruction: 700 /// (X & (1 << Y)) ==/!= 0 701 /// This knowledge can be used to prevent breaking the pattern, 702 /// or creating it if it could be recognized. hasBitTest(SDValue X,SDValue Y)703 virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; } 704 705 /// There are two ways to clear extreme bits (either low or high): 706 /// Mask: x & (-1 << y) (the instcombine canonical form) 707 /// Shifts: x >> y << y 708 /// Return true if the variant with 2 variable shifts is preferred. 709 /// Return false if there is no preference. shouldFoldMaskToVariableShiftPair(SDValue X)710 virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const { 711 // By default, let's assume that no one prefers shifts. 712 return false; 713 } 714 715 /// Return true if it is profitable to fold a pair of shifts into a mask. 716 /// This is usually true on most targets. But some targets, like Thumb1, 717 /// have immediate shift instructions, but no immediate "and" instruction; 718 /// this makes the fold unprofitable. shouldFoldConstantShiftPairToMask(const SDNode * N,CombineLevel Level)719 virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, 720 CombineLevel Level) const { 721 return true; 722 } 723 724 /// Should we tranform the IR-optimal check for whether given truncation 725 /// down into KeptBits would be truncating or not: 726 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) 727 /// Into it's more traditional form: 728 /// ((%x << C) a>> C) dstcond %x 729 /// Return true if we should transform. 730 /// Return false if there is no preference. shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)731 virtual bool shouldTransformSignedTruncationCheck(EVT XVT, 732 unsigned KeptBits) const { 733 // By default, let's assume that no one prefers shifts. 734 return false; 735 } 736 737 /// Given the pattern 738 /// (X & (C l>>/<< Y)) ==/!= 0 739 /// return true if it should be transformed into: 740 /// ((X <</l>> Y) & C) ==/!= 0 741 /// WARNING: if 'X' is a constant, the fold may deadlock! 742 /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat() 743 /// here because it can end up being not linked in. shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X,ConstantSDNode * XC,ConstantSDNode * CC,SDValue Y,unsigned OldShiftOpcode,unsigned NewShiftOpcode,SelectionDAG & DAG)744 virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 745 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 746 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 747 SelectionDAG &DAG) const { 748 if (hasBitTest(X, Y)) { 749 // One interesting pattern that we'd want to form is 'bit test': 750 // ((1 << Y) & C) ==/!= 0 751 // But we also need to be careful not to try to reverse that fold. 752 753 // Is this '1 << Y' ? 754 if (OldShiftOpcode == ISD::SHL && CC->isOne()) 755 return false; // Keep the 'bit test' pattern. 756 757 // Will it be '1 << Y' after the transform ? 758 if (XC && NewShiftOpcode == ISD::SHL && XC->isOne()) 759 return true; // Do form the 'bit test' pattern. 760 } 761 762 // If 'X' is a constant, and we transform, then we will immediately 763 // try to undo the fold, thus causing endless combine loop. 764 // So by default, let's assume everyone prefers the fold 765 // iff 'X' is not a constant. 766 return !XC; 767 } 768 769 /// These two forms are equivalent: 770 /// sub %y, (xor %x, -1) 771 /// add (add %x, 1), %y 772 /// The variant with two add's is IR-canonical. 773 /// Some targets may prefer one to the other. preferIncOfAddToSubOfNot(EVT VT)774 virtual bool preferIncOfAddToSubOfNot(EVT VT) const { 775 // By default, let's assume that everyone prefers the form with two add's. 776 return true; 777 } 778 779 /// Return true if the target wants to use the optimization that 780 /// turns ext(promotableInst1(...(promotableInstN(load)))) into 781 /// promotedInst1(...(promotedInstN(ext(load)))). enableExtLdPromotion()782 bool enableExtLdPromotion() const { return EnableExtLdPromotion; } 783 784 /// Return true if the target can combine store(extractelement VectorTy, 785 /// Idx). 786 /// \p Cost[out] gives the cost of that transformation when this is true. canCombineStoreAndExtract(Type * VectorTy,Value * Idx,unsigned & Cost)787 virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, 788 unsigned &Cost) const { 789 return false; 790 } 791 792 /// Return true if inserting a scalar into a variable element of an undef 793 /// vector is more efficiently handled by splatting the scalar instead. shouldSplatInsEltVarIndex(EVT)794 virtual bool shouldSplatInsEltVarIndex(EVT) const { 795 return false; 796 } 797 798 /// Return true if target always benefits from combining into FMA for a 799 /// given value type. This must typically return false on targets where FMA 800 /// takes more cycles to execute than FADD. enableAggressiveFMAFusion(EVT VT)801 virtual bool enableAggressiveFMAFusion(EVT VT) const { 802 return false; 803 } 804 805 /// Return the ValueType of the result of SETCC operations. 806 virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 807 EVT VT) const; 808 809 /// Return the ValueType for comparison libcalls. Comparions libcalls include 810 /// floating point comparion calls, and Ordered/Unordered check calls on 811 /// floating point numbers. 812 virtual 813 MVT::SimpleValueType getCmpLibcallReturnType() const; 814 815 /// For targets without i1 registers, this gives the nature of the high-bits 816 /// of boolean values held in types wider than i1. 817 /// 818 /// "Boolean values" are special true/false values produced by nodes like 819 /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND. 820 /// Not to be confused with general values promoted from i1. Some cpus 821 /// distinguish between vectors of boolean and scalars; the isVec parameter 822 /// selects between the two kinds. For example on X86 a scalar boolean should 823 /// be zero extended from i1, while the elements of a vector of booleans 824 /// should be sign extended from i1. 825 /// 826 /// Some cpus also treat floating point types the same way as they treat 827 /// vectors instead of the way they treat scalars. getBooleanContents(bool isVec,bool isFloat)828 BooleanContent getBooleanContents(bool isVec, bool isFloat) const { 829 if (isVec) 830 return BooleanVectorContents; 831 return isFloat ? BooleanFloatContents : BooleanContents; 832 } 833 getBooleanContents(EVT Type)834 BooleanContent getBooleanContents(EVT Type) const { 835 return getBooleanContents(Type.isVector(), Type.isFloatingPoint()); 836 } 837 838 /// Return target scheduling preference. getSchedulingPreference()839 Sched::Preference getSchedulingPreference() const { 840 return SchedPreferenceInfo; 841 } 842 843 /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics 844 /// for different nodes. This function returns the preference (or none) for 845 /// the given node. getSchedulingPreference(SDNode *)846 virtual Sched::Preference getSchedulingPreference(SDNode *) const { 847 return Sched::None; 848 } 849 850 /// Return the register class that should be used for the specified value 851 /// type. 852 virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const { 853 (void)isDivergent; 854 const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; 855 assert(RC && "This value type is not natively supported!"); 856 return RC; 857 } 858 859 /// Allows target to decide about the register class of the 860 /// specific value that is live outside the defining block. 861 /// Returns true if the value needs uniform register class. requiresUniformRegister(MachineFunction & MF,const Value *)862 virtual bool requiresUniformRegister(MachineFunction &MF, 863 const Value *) const { 864 return false; 865 } 866 867 /// Return the 'representative' register class for the specified value 868 /// type. 869 /// 870 /// The 'representative' register class is the largest legal super-reg 871 /// register class for the register class of the value type. For example, on 872 /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep 873 /// register class is GR64 on x86_64. getRepRegClassFor(MVT VT)874 virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const { 875 const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy]; 876 return RC; 877 } 878 879 /// Return the cost of the 'representative' register class for the specified 880 /// value type. getRepRegClassCostFor(MVT VT)881 virtual uint8_t getRepRegClassCostFor(MVT VT) const { 882 return RepRegClassCostForVT[VT.SimpleTy]; 883 } 884 885 /// Return true if SHIFT instructions should be expanded to SHIFT_PARTS 886 /// instructions, and false if a library call is preferred (e.g for code-size 887 /// reasons). shouldExpandShift(SelectionDAG & DAG,SDNode * N)888 virtual bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const { 889 return true; 890 } 891 892 /// Return true if the target has native support for the specified value type. 893 /// This means that it has a register that directly holds it without 894 /// promotions or expansions. isTypeLegal(EVT VT)895 bool isTypeLegal(EVT VT) const { 896 assert(!VT.isSimple() || 897 (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)); 898 return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr; 899 } 900 901 class ValueTypeActionImpl { 902 /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum 903 /// that indicates how instruction selection should deal with the type. 904 LegalizeTypeAction ValueTypeActions[MVT::VALUETYPE_SIZE]; 905 906 public: ValueTypeActionImpl()907 ValueTypeActionImpl() { 908 std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions), 909 TypeLegal); 910 } 911 getTypeAction(MVT VT)912 LegalizeTypeAction getTypeAction(MVT VT) const { 913 return ValueTypeActions[VT.SimpleTy]; 914 } 915 setTypeAction(MVT VT,LegalizeTypeAction Action)916 void setTypeAction(MVT VT, LegalizeTypeAction Action) { 917 ValueTypeActions[VT.SimpleTy] = Action; 918 } 919 }; 920 getValueTypeActions()921 const ValueTypeActionImpl &getValueTypeActions() const { 922 return ValueTypeActions; 923 } 924 925 /// Return how we should legalize values of this type, either it is already 926 /// legal (return 'Legal') or we need to promote it to a larger type (return 927 /// 'Promote'), or we need to expand it into multiple registers of smaller 928 /// integer type (return 'Expand'). 'Custom' is not an option. getTypeAction(LLVMContext & Context,EVT VT)929 LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const { 930 return getTypeConversion(Context, VT).first; 931 } getTypeAction(MVT VT)932 LegalizeTypeAction getTypeAction(MVT VT) const { 933 return ValueTypeActions.getTypeAction(VT); 934 } 935 936 /// For types supported by the target, this is an identity function. For 937 /// types that must be promoted to larger types, this returns the larger type 938 /// to promote to. For integer types that are larger than the largest integer 939 /// register, this contains one step in the expansion to get to the smaller 940 /// register. For illegal floating point types, this returns the integer type 941 /// to transform to. getTypeToTransformTo(LLVMContext & Context,EVT VT)942 EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const { 943 return getTypeConversion(Context, VT).second; 944 } 945 946 /// For types supported by the target, this is an identity function. For 947 /// types that must be expanded (i.e. integer types that are larger than the 948 /// largest integer register or illegal floating point types), this returns 949 /// the largest legal type it will be expanded to. getTypeToExpandTo(LLVMContext & Context,EVT VT)950 EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const { 951 assert(!VT.isVector()); 952 while (true) { 953 switch (getTypeAction(Context, VT)) { 954 case TypeLegal: 955 return VT; 956 case TypeExpandInteger: 957 VT = getTypeToTransformTo(Context, VT); 958 break; 959 default: 960 llvm_unreachable("Type is not legal nor is it to be expanded!"); 961 } 962 } 963 } 964 965 /// Vector types are broken down into some number of legal first class types. 966 /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8 967 /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64 968 /// turns into 4 EVT::i32 values with both PPC and X86. 969 /// 970 /// This method returns the number of registers needed, and the VT for each 971 /// register. It also returns the VT and quantity of the intermediate values 972 /// before they are promoted/expanded. 973 unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT, 974 EVT &IntermediateVT, 975 unsigned &NumIntermediates, 976 MVT &RegisterVT) const; 977 978 /// Certain targets such as MIPS require that some types such as vectors are 979 /// always broken down into scalars in some contexts. This occurs even if the 980 /// vector type is legal. getVectorTypeBreakdownForCallingConv(LLVMContext & Context,CallingConv::ID CC,EVT VT,EVT & IntermediateVT,unsigned & NumIntermediates,MVT & RegisterVT)981 virtual unsigned getVectorTypeBreakdownForCallingConv( 982 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, 983 unsigned &NumIntermediates, MVT &RegisterVT) const { 984 return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates, 985 RegisterVT); 986 } 987 988 struct IntrinsicInfo { 989 unsigned opc = 0; // target opcode 990 EVT memVT; // memory VT 991 992 // value representing memory location 993 PointerUnion<const Value *, const PseudoSourceValue *> ptrVal; 994 995 int offset = 0; // offset off of ptrVal 996 uint64_t size = 0; // the size of the memory location 997 // (taken from memVT if zero) 998 MaybeAlign align = Align(1); // alignment 999 1000 MachineMemOperand::Flags flags = MachineMemOperand::MONone; 1001 IntrinsicInfo() = default; 1002 }; 1003 1004 /// Given an intrinsic, checks if on the target the intrinsic will need to map 1005 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns 1006 /// true and store the intrinsic information into the IntrinsicInfo that was 1007 /// passed to the function. getTgtMemIntrinsic(IntrinsicInfo &,const CallInst &,MachineFunction &,unsigned)1008 virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, 1009 MachineFunction &, 1010 unsigned /*Intrinsic*/) const { 1011 return false; 1012 } 1013 1014 /// Returns true if the target can instruction select the specified FP 1015 /// immediate natively. If false, the legalizer will materialize the FP 1016 /// immediate as a load from a constant pool. 1017 virtual bool isFPImmLegal(const APFloat & /*Imm*/, EVT /*VT*/, 1018 bool ForCodeSize = false) const { 1019 return false; 1020 } 1021 1022 /// Targets can use this to indicate that they only support *some* 1023 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a 1024 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be 1025 /// legal. isShuffleMaskLegal(ArrayRef<int>,EVT)1026 virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const { 1027 return true; 1028 } 1029 1030 /// Returns true if the operation can trap for the value type. 1031 /// 1032 /// VT must be a legal type. By default, we optimistically assume most 1033 /// operations don't trap except for integer divide and remainder. 1034 virtual bool canOpTrap(unsigned Op, EVT VT) const; 1035 1036 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there 1037 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a 1038 /// constant pool entry. isVectorClearMaskLegal(ArrayRef<int>,EVT)1039 virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/, 1040 EVT /*VT*/) const { 1041 return false; 1042 } 1043 1044 /// Return how this operation should be treated: either it is legal, needs to 1045 /// be promoted to a larger size, needs to be expanded to some other code 1046 /// sequence, or the target has a custom expander for it. getOperationAction(unsigned Op,EVT VT)1047 LegalizeAction getOperationAction(unsigned Op, EVT VT) const { 1048 if (VT.isExtended()) return Expand; 1049 // If a target-specific SDNode requires legalization, require the target 1050 // to provide custom legalization for it. 1051 if (Op >= array_lengthof(OpActions[0])) return Custom; 1052 return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; 1053 } 1054 1055 /// Custom method defined by each target to indicate if an operation which 1056 /// may require a scale is supported natively by the target. 1057 /// If not, the operation is illegal. isSupportedFixedPointOperation(unsigned Op,EVT VT,unsigned Scale)1058 virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT, 1059 unsigned Scale) const { 1060 return false; 1061 } 1062 1063 /// Some fixed point operations may be natively supported by the target but 1064 /// only for specific scales. This method allows for checking 1065 /// if the width is supported by the target for a given operation that may 1066 /// depend on scale. getFixedPointOperationAction(unsigned Op,EVT VT,unsigned Scale)1067 LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT, 1068 unsigned Scale) const { 1069 auto Action = getOperationAction(Op, VT); 1070 if (Action != Legal) 1071 return Action; 1072 1073 // This operation is supported in this type but may only work on specific 1074 // scales. 1075 bool Supported; 1076 switch (Op) { 1077 default: 1078 llvm_unreachable("Unexpected fixed point operation."); 1079 case ISD::SMULFIX: 1080 case ISD::SMULFIXSAT: 1081 case ISD::UMULFIX: 1082 case ISD::UMULFIXSAT: 1083 case ISD::SDIVFIX: 1084 case ISD::SDIVFIXSAT: 1085 case ISD::UDIVFIX: 1086 case ISD::UDIVFIXSAT: 1087 Supported = isSupportedFixedPointOperation(Op, VT, Scale); 1088 break; 1089 } 1090 1091 return Supported ? Action : Expand; 1092 } 1093 1094 // If Op is a strict floating-point operation, return the result 1095 // of getOperationAction for the equivalent non-strict operation. getStrictFPOperationAction(unsigned Op,EVT VT)1096 LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const { 1097 unsigned EqOpc; 1098 switch (Op) { 1099 default: llvm_unreachable("Unexpected FP pseudo-opcode"); 1100 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ 1101 case ISD::STRICT_##DAGN: EqOpc = ISD::DAGN; break; 1102 #define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ 1103 case ISD::STRICT_##DAGN: EqOpc = ISD::SETCC; break; 1104 #include "llvm/IR/ConstrainedOps.def" 1105 } 1106 1107 return getOperationAction(EqOpc, VT); 1108 } 1109 1110 /// Return true if the specified operation is legal on this target or can be 1111 /// made legal with custom lowering. This is used to help guide high-level 1112 /// lowering decisions. LegalOnly is an optional convenience for code paths 1113 /// traversed pre and post legalisation. 1114 bool isOperationLegalOrCustom(unsigned Op, EVT VT, 1115 bool LegalOnly = false) const { 1116 if (LegalOnly) 1117 return isOperationLegal(Op, VT); 1118 1119 return (VT == MVT::Other || isTypeLegal(VT)) && 1120 (getOperationAction(Op, VT) == Legal || 1121 getOperationAction(Op, VT) == Custom); 1122 } 1123 1124 /// Return true if the specified operation is legal on this target or can be 1125 /// made legal using promotion. This is used to help guide high-level lowering 1126 /// decisions. LegalOnly is an optional convenience for code paths traversed 1127 /// pre and post legalisation. 1128 bool isOperationLegalOrPromote(unsigned Op, EVT VT, 1129 bool LegalOnly = false) const { 1130 if (LegalOnly) 1131 return isOperationLegal(Op, VT); 1132 1133 return (VT == MVT::Other || isTypeLegal(VT)) && 1134 (getOperationAction(Op, VT) == Legal || 1135 getOperationAction(Op, VT) == Promote); 1136 } 1137 1138 /// Return true if the specified operation is legal on this target or can be 1139 /// made legal with custom lowering or using promotion. This is used to help 1140 /// guide high-level lowering decisions. LegalOnly is an optional convenience 1141 /// for code paths traversed pre and post legalisation. 1142 bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, 1143 bool LegalOnly = false) const { 1144 if (LegalOnly) 1145 return isOperationLegal(Op, VT); 1146 1147 return (VT == MVT::Other || isTypeLegal(VT)) && 1148 (getOperationAction(Op, VT) == Legal || 1149 getOperationAction(Op, VT) == Custom || 1150 getOperationAction(Op, VT) == Promote); 1151 } 1152 1153 /// Return true if the operation uses custom lowering, regardless of whether 1154 /// the type is legal or not. isOperationCustom(unsigned Op,EVT VT)1155 bool isOperationCustom(unsigned Op, EVT VT) const { 1156 return getOperationAction(Op, VT) == Custom; 1157 } 1158 1159 /// Return true if lowering to a jump table is allowed. areJTsAllowed(const Function * Fn)1160 virtual bool areJTsAllowed(const Function *Fn) const { 1161 if (Fn->getFnAttribute("no-jump-tables").getValueAsBool()) 1162 return false; 1163 1164 return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || 1165 isOperationLegalOrCustom(ISD::BRIND, MVT::Other); 1166 } 1167 1168 /// Check whether the range [Low,High] fits in a machine word. rangeFitsInWord(const APInt & Low,const APInt & High,const DataLayout & DL)1169 bool rangeFitsInWord(const APInt &Low, const APInt &High, 1170 const DataLayout &DL) const { 1171 // FIXME: Using the pointer type doesn't seem ideal. 1172 uint64_t BW = DL.getIndexSizeInBits(0u); 1173 uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; 1174 return Range <= BW; 1175 } 1176 1177 /// Return true if lowering to a jump table is suitable for a set of case 1178 /// clusters which may contain \p NumCases cases, \p Range range of values. 1179 virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, 1180 uint64_t Range, ProfileSummaryInfo *PSI, 1181 BlockFrequencyInfo *BFI) const; 1182 1183 /// Return true if lowering to a bit test is suitable for a set of case 1184 /// clusters which contains \p NumDests unique destinations, \p Low and 1185 /// \p High as its lowest and highest case values, and expects \p NumCmps 1186 /// case value comparisons. Check if the number of destinations, comparison 1187 /// metric, and range are all suitable. isSuitableForBitTests(unsigned NumDests,unsigned NumCmps,const APInt & Low,const APInt & High,const DataLayout & DL)1188 bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, 1189 const APInt &Low, const APInt &High, 1190 const DataLayout &DL) const { 1191 // FIXME: I don't think NumCmps is the correct metric: a single case and a 1192 // range of cases both require only one branch to lower. Just looking at the 1193 // number of clusters and destinations should be enough to decide whether to 1194 // build bit tests. 1195 1196 // To lower a range with bit tests, the range must fit the bitwidth of a 1197 // machine word. 1198 if (!rangeFitsInWord(Low, High, DL)) 1199 return false; 1200 1201 // Decide whether it's profitable to lower this range with bit tests. Each 1202 // destination requires a bit test and branch, and there is an overall range 1203 // check branch. For a small number of clusters, separate comparisons might 1204 // be cheaper, and for many destinations, splitting the range might be 1205 // better. 1206 return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) || 1207 (NumDests == 3 && NumCmps >= 6); 1208 } 1209 1210 /// Return true if the specified operation is illegal on this target or 1211 /// unlikely to be made legal with custom lowering. This is used to help guide 1212 /// high-level lowering decisions. isOperationExpand(unsigned Op,EVT VT)1213 bool isOperationExpand(unsigned Op, EVT VT) const { 1214 return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand); 1215 } 1216 1217 /// Return true if the specified operation is legal on this target. isOperationLegal(unsigned Op,EVT VT)1218 bool isOperationLegal(unsigned Op, EVT VT) const { 1219 return (VT == MVT::Other || isTypeLegal(VT)) && 1220 getOperationAction(Op, VT) == Legal; 1221 } 1222 1223 /// Return how this load with extension should be treated: either it is legal, 1224 /// needs to be promoted to a larger size, needs to be expanded to some other 1225 /// code sequence, or the target has a custom expander for it. getLoadExtAction(unsigned ExtType,EVT ValVT,EVT MemVT)1226 LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, 1227 EVT MemVT) const { 1228 if (ValVT.isExtended() || MemVT.isExtended()) return Expand; 1229 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; 1230 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; 1231 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::VALUETYPE_SIZE && 1232 MemI < MVT::VALUETYPE_SIZE && "Table isn't big enough!"); 1233 unsigned Shift = 4 * ExtType; 1234 return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf); 1235 } 1236 1237 /// Return true if the specified load with extension is legal on this target. isLoadExtLegal(unsigned ExtType,EVT ValVT,EVT MemVT)1238 bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const { 1239 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal; 1240 } 1241 1242 /// Return true if the specified load with extension is legal or custom 1243 /// on this target. isLoadExtLegalOrCustom(unsigned ExtType,EVT ValVT,EVT MemVT)1244 bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const { 1245 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal || 1246 getLoadExtAction(ExtType, ValVT, MemVT) == Custom; 1247 } 1248 1249 /// Return how this store with truncation should be treated: either it is 1250 /// legal, needs to be promoted to a larger size, needs to be expanded to some 1251 /// other code sequence, or the target has a custom expander for it. getTruncStoreAction(EVT ValVT,EVT MemVT)1252 LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const { 1253 if (ValVT.isExtended() || MemVT.isExtended()) return Expand; 1254 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; 1255 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; 1256 assert(ValI < MVT::VALUETYPE_SIZE && MemI < MVT::VALUETYPE_SIZE && 1257 "Table isn't big enough!"); 1258 return TruncStoreActions[ValI][MemI]; 1259 } 1260 1261 /// Return true if the specified store with truncation is legal on this 1262 /// target. isTruncStoreLegal(EVT ValVT,EVT MemVT)1263 bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const { 1264 return isTypeLegal(ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal; 1265 } 1266 1267 /// Return true if the specified store with truncation has solution on this 1268 /// target. isTruncStoreLegalOrCustom(EVT ValVT,EVT MemVT)1269 bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const { 1270 return isTypeLegal(ValVT) && 1271 (getTruncStoreAction(ValVT, MemVT) == Legal || 1272 getTruncStoreAction(ValVT, MemVT) == Custom); 1273 } 1274 canCombineTruncStore(EVT ValVT,EVT MemVT,bool LegalOnly)1275 virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, 1276 bool LegalOnly) const { 1277 if (LegalOnly) 1278 return isTruncStoreLegal(ValVT, MemVT); 1279 1280 return isTruncStoreLegalOrCustom(ValVT, MemVT); 1281 } 1282 1283 /// Return how the indexed load should be treated: either it is legal, needs 1284 /// to be promoted to a larger size, needs to be expanded to some other code 1285 /// sequence, or the target has a custom expander for it. getIndexedLoadAction(unsigned IdxMode,MVT VT)1286 LegalizeAction getIndexedLoadAction(unsigned IdxMode, MVT VT) const { 1287 return getIndexedModeAction(IdxMode, VT, IMAB_Load); 1288 } 1289 1290 /// Return true if the specified indexed load is legal on this target. isIndexedLoadLegal(unsigned IdxMode,EVT VT)1291 bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const { 1292 return VT.isSimple() && 1293 (getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Legal || 1294 getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Custom); 1295 } 1296 1297 /// Return how the indexed store should be treated: either it is legal, needs 1298 /// to be promoted to a larger size, needs to be expanded to some other code 1299 /// sequence, or the target has a custom expander for it. getIndexedStoreAction(unsigned IdxMode,MVT VT)1300 LegalizeAction getIndexedStoreAction(unsigned IdxMode, MVT VT) const { 1301 return getIndexedModeAction(IdxMode, VT, IMAB_Store); 1302 } 1303 1304 /// Return true if the specified indexed load is legal on this target. isIndexedStoreLegal(unsigned IdxMode,EVT VT)1305 bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const { 1306 return VT.isSimple() && 1307 (getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Legal || 1308 getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom); 1309 } 1310 1311 /// Return how the indexed load should be treated: either it is legal, needs 1312 /// to be promoted to a larger size, needs to be expanded to some other code 1313 /// sequence, or the target has a custom expander for it. getIndexedMaskedLoadAction(unsigned IdxMode,MVT VT)1314 LegalizeAction getIndexedMaskedLoadAction(unsigned IdxMode, MVT VT) const { 1315 return getIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad); 1316 } 1317 1318 /// Return true if the specified indexed load is legal on this target. isIndexedMaskedLoadLegal(unsigned IdxMode,EVT VT)1319 bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const { 1320 return VT.isSimple() && 1321 (getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Legal || 1322 getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Custom); 1323 } 1324 1325 /// Return how the indexed store should be treated: either it is legal, needs 1326 /// to be promoted to a larger size, needs to be expanded to some other code 1327 /// sequence, or the target has a custom expander for it. getIndexedMaskedStoreAction(unsigned IdxMode,MVT VT)1328 LegalizeAction getIndexedMaskedStoreAction(unsigned IdxMode, MVT VT) const { 1329 return getIndexedModeAction(IdxMode, VT, IMAB_MaskedStore); 1330 } 1331 1332 /// Return true if the specified indexed load is legal on this target. isIndexedMaskedStoreLegal(unsigned IdxMode,EVT VT)1333 bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const { 1334 return VT.isSimple() && 1335 (getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Legal || 1336 getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Custom); 1337 } 1338 1339 /// Returns true if the index type for a masked gather/scatter requires 1340 /// extending shouldExtendGSIndex(EVT VT,EVT & EltTy)1341 virtual bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const { return false; } 1342 1343 // Returns true if VT is a legal index type for masked gathers/scatters 1344 // on this target shouldRemoveExtendFromGSIndex(EVT VT)1345 virtual bool shouldRemoveExtendFromGSIndex(EVT VT) const { return false; } 1346 1347 /// Return how the condition code should be treated: either it is legal, needs 1348 /// to be expanded to some other code sequence, or the target has a custom 1349 /// expander for it. 1350 LegalizeAction getCondCodeAction(ISD::CondCode CC,MVT VT)1351 getCondCodeAction(ISD::CondCode CC, MVT VT) const { 1352 assert((unsigned)CC < array_lengthof(CondCodeActions) && 1353 ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && 1354 "Table isn't big enough!"); 1355 // See setCondCodeAction for how this is encoded. 1356 uint32_t Shift = 4 * (VT.SimpleTy & 0x7); 1357 uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3]; 1358 LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF); 1359 assert(Action != Promote && "Can't promote condition code!"); 1360 return Action; 1361 } 1362 1363 /// Return true if the specified condition code is legal on this target. isCondCodeLegal(ISD::CondCode CC,MVT VT)1364 bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const { 1365 return getCondCodeAction(CC, VT) == Legal; 1366 } 1367 1368 /// Return true if the specified condition code is legal or custom on this 1369 /// target. isCondCodeLegalOrCustom(ISD::CondCode CC,MVT VT)1370 bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const { 1371 return getCondCodeAction(CC, VT) == Legal || 1372 getCondCodeAction(CC, VT) == Custom; 1373 } 1374 1375 /// If the action for this operation is to promote, this method returns the 1376 /// ValueType to promote to. getTypeToPromoteTo(unsigned Op,MVT VT)1377 MVT getTypeToPromoteTo(unsigned Op, MVT VT) const { 1378 assert(getOperationAction(Op, VT) == Promote && 1379 "This operation isn't promoted!"); 1380 1381 // See if this has an explicit type specified. 1382 std::map<std::pair<unsigned, MVT::SimpleValueType>, 1383 MVT::SimpleValueType>::const_iterator PTTI = 1384 PromoteToType.find(std::make_pair(Op, VT.SimpleTy)); 1385 if (PTTI != PromoteToType.end()) return PTTI->second; 1386 1387 assert((VT.isInteger() || VT.isFloatingPoint()) && 1388 "Cannot autopromote this type, add it with AddPromotedToType."); 1389 1390 MVT NVT = VT; 1391 do { 1392 NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1); 1393 assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && 1394 "Didn't find type to promote to!"); 1395 } while (!isTypeLegal(NVT) || 1396 getOperationAction(Op, NVT) == Promote); 1397 return NVT; 1398 } 1399 1400 virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, 1401 bool AllowUnknown = false) const { 1402 return getValueType(DL, Ty, AllowUnknown); 1403 } 1404 1405 /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM 1406 /// operations except for the pointer size. If AllowUnknown is true, this 1407 /// will return MVT::Other for types with no EVT counterpart (e.g. structs), 1408 /// otherwise it will assert. 1409 EVT getValueType(const DataLayout &DL, Type *Ty, 1410 bool AllowUnknown = false) const { 1411 // Lower scalar pointers to native pointer types. 1412 if (auto *PTy = dyn_cast<PointerType>(Ty)) 1413 return getPointerTy(DL, PTy->getAddressSpace()); 1414 1415 if (auto *VTy = dyn_cast<VectorType>(Ty)) { 1416 Type *EltTy = VTy->getElementType(); 1417 // Lower vectors of pointers to native pointer types. 1418 if (auto *PTy = dyn_cast<PointerType>(EltTy)) { 1419 EVT PointerTy(getPointerTy(DL, PTy->getAddressSpace())); 1420 EltTy = PointerTy.getTypeForEVT(Ty->getContext()); 1421 } 1422 return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false), 1423 VTy->getElementCount()); 1424 } 1425 1426 return EVT::getEVT(Ty, AllowUnknown); 1427 } 1428 1429 EVT getMemValueType(const DataLayout &DL, Type *Ty, 1430 bool AllowUnknown = false) const { 1431 // Lower scalar pointers to native pointer types. 1432 if (PointerType *PTy = dyn_cast<PointerType>(Ty)) 1433 return getPointerMemTy(DL, PTy->getAddressSpace()); 1434 else if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { 1435 Type *Elm = VTy->getElementType(); 1436 if (PointerType *PT = dyn_cast<PointerType>(Elm)) { 1437 EVT PointerTy(getPointerMemTy(DL, PT->getAddressSpace())); 1438 Elm = PointerTy.getTypeForEVT(Ty->getContext()); 1439 } 1440 return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false), 1441 VTy->getElementCount()); 1442 } 1443 1444 return getValueType(DL, Ty, AllowUnknown); 1445 } 1446 1447 1448 /// Return the MVT corresponding to this LLVM type. See getValueType. 1449 MVT getSimpleValueType(const DataLayout &DL, Type *Ty, 1450 bool AllowUnknown = false) const { 1451 return getValueType(DL, Ty, AllowUnknown).getSimpleVT(); 1452 } 1453 1454 /// Return the desired alignment for ByVal or InAlloca aggregate function 1455 /// arguments in the caller parameter area. This is the actual alignment, not 1456 /// its logarithm. 1457 virtual uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const; 1458 1459 /// Return the type of registers that this ValueType will eventually require. getRegisterType(MVT VT)1460 MVT getRegisterType(MVT VT) const { 1461 assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT)); 1462 return RegisterTypeForVT[VT.SimpleTy]; 1463 } 1464 1465 /// Return the type of registers that this ValueType will eventually require. getRegisterType(LLVMContext & Context,EVT VT)1466 MVT getRegisterType(LLVMContext &Context, EVT VT) const { 1467 if (VT.isSimple()) { 1468 assert((unsigned)VT.getSimpleVT().SimpleTy < 1469 array_lengthof(RegisterTypeForVT)); 1470 return RegisterTypeForVT[VT.getSimpleVT().SimpleTy]; 1471 } 1472 if (VT.isVector()) { 1473 EVT VT1; 1474 MVT RegisterVT; 1475 unsigned NumIntermediates; 1476 (void)getVectorTypeBreakdown(Context, VT, VT1, 1477 NumIntermediates, RegisterVT); 1478 return RegisterVT; 1479 } 1480 if (VT.isInteger()) { 1481 return getRegisterType(Context, getTypeToTransformTo(Context, VT)); 1482 } 1483 llvm_unreachable("Unsupported extended type!"); 1484 } 1485 1486 /// Return the number of registers that this ValueType will eventually 1487 /// require. 1488 /// 1489 /// This is one for any types promoted to live in larger registers, but may be 1490 /// more than one for types (like i64) that are split into pieces. For types 1491 /// like i140, which are first promoted then expanded, it is the number of 1492 /// registers needed to hold all the bits of the original type. For an i140 1493 /// on a 32 bit machine this means 5 registers. 1494 /// 1495 /// RegisterVT may be passed as a way to override the default settings, for 1496 /// instance with i128 inline assembly operands on SystemZ. 1497 virtual unsigned 1498 getNumRegisters(LLVMContext &Context, EVT VT, 1499 Optional<MVT> RegisterVT = None) const { 1500 if (VT.isSimple()) { 1501 assert((unsigned)VT.getSimpleVT().SimpleTy < 1502 array_lengthof(NumRegistersForVT)); 1503 return NumRegistersForVT[VT.getSimpleVT().SimpleTy]; 1504 } 1505 if (VT.isVector()) { 1506 EVT VT1; 1507 MVT VT2; 1508 unsigned NumIntermediates; 1509 return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2); 1510 } 1511 if (VT.isInteger()) { 1512 unsigned BitWidth = VT.getSizeInBits(); 1513 unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits(); 1514 return (BitWidth + RegWidth - 1) / RegWidth; 1515 } 1516 llvm_unreachable("Unsupported extended type!"); 1517 } 1518 1519 /// Certain combinations of ABIs, Targets and features require that types 1520 /// are legal for some operations and not for other operations. 1521 /// For MIPS all vector types must be passed through the integer register set. getRegisterTypeForCallingConv(LLVMContext & Context,CallingConv::ID CC,EVT VT)1522 virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, 1523 CallingConv::ID CC, EVT VT) const { 1524 return getRegisterType(Context, VT); 1525 } 1526 1527 /// Certain targets require unusual breakdowns of certain types. For MIPS, 1528 /// this occurs when a vector type is used, as vector are passed through the 1529 /// integer register set. getNumRegistersForCallingConv(LLVMContext & Context,CallingConv::ID CC,EVT VT)1530 virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, 1531 CallingConv::ID CC, 1532 EVT VT) const { 1533 return getNumRegisters(Context, VT); 1534 } 1535 1536 /// Certain targets have context sensitive alignment requirements, where one 1537 /// type has the alignment requirement of another type. getABIAlignmentForCallingConv(Type * ArgTy,const DataLayout & DL)1538 virtual Align getABIAlignmentForCallingConv(Type *ArgTy, 1539 const DataLayout &DL) const { 1540 return DL.getABITypeAlign(ArgTy); 1541 } 1542 1543 /// If true, then instruction selection should seek to shrink the FP constant 1544 /// of the specified type to a smaller type in order to save space and / or 1545 /// reduce runtime. ShouldShrinkFPConstant(EVT)1546 virtual bool ShouldShrinkFPConstant(EVT) const { return true; } 1547 1548 /// Return true if it is profitable to reduce a load to a smaller type. 1549 /// Example: (i16 (trunc (i32 (load x))) -> i16 load x shouldReduceLoadWidth(SDNode * Load,ISD::LoadExtType ExtTy,EVT NewVT)1550 virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 1551 EVT NewVT) const { 1552 // By default, assume that it is cheaper to extract a subvector from a wide 1553 // vector load rather than creating multiple narrow vector loads. 1554 if (NewVT.isVector() && !Load->hasOneUse()) 1555 return false; 1556 1557 return true; 1558 } 1559 1560 /// When splitting a value of the specified type into parts, does the Lo 1561 /// or Hi part come first? This usually follows the endianness, except 1562 /// for ppcf128, where the Hi part always comes first. hasBigEndianPartOrdering(EVT VT,const DataLayout & DL)1563 bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const { 1564 return DL.isBigEndian() || VT == MVT::ppcf128; 1565 } 1566 1567 /// If true, the target has custom DAG combine transformations that it can 1568 /// perform for the specified node. hasTargetDAGCombine(ISD::NodeType NT)1569 bool hasTargetDAGCombine(ISD::NodeType NT) const { 1570 assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)); 1571 return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7)); 1572 } 1573 getGatherAllAliasesMaxDepth()1574 unsigned getGatherAllAliasesMaxDepth() const { 1575 return GatherAllAliasesMaxDepth; 1576 } 1577 1578 /// Returns the size of the platform's va_list object. getVaListSizeInBits(const DataLayout & DL)1579 virtual unsigned getVaListSizeInBits(const DataLayout &DL) const { 1580 return getPointerTy(DL).getSizeInBits(); 1581 } 1582 1583 /// Get maximum # of store operations permitted for llvm.memset 1584 /// 1585 /// This function returns the maximum number of store operations permitted 1586 /// to replace a call to llvm.memset. The value is set by the target at the 1587 /// performance threshold for such a replacement. If OptSize is true, 1588 /// return the limit for functions that have OptSize attribute. getMaxStoresPerMemset(bool OptSize)1589 unsigned getMaxStoresPerMemset(bool OptSize) const { 1590 return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset; 1591 } 1592 1593 /// Get maximum # of store operations permitted for llvm.memcpy 1594 /// 1595 /// This function returns the maximum number of store operations permitted 1596 /// to replace a call to llvm.memcpy. The value is set by the target at the 1597 /// performance threshold for such a replacement. If OptSize is true, 1598 /// return the limit for functions that have OptSize attribute. getMaxStoresPerMemcpy(bool OptSize)1599 unsigned getMaxStoresPerMemcpy(bool OptSize) const { 1600 return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy; 1601 } 1602 1603 /// \brief Get maximum # of store operations to be glued together 1604 /// 1605 /// This function returns the maximum number of store operations permitted 1606 /// to glue together during lowering of llvm.memcpy. The value is set by 1607 // the target at the performance threshold for such a replacement. getMaxGluedStoresPerMemcpy()1608 virtual unsigned getMaxGluedStoresPerMemcpy() const { 1609 return MaxGluedStoresPerMemcpy; 1610 } 1611 1612 /// Get maximum # of load operations permitted for memcmp 1613 /// 1614 /// This function returns the maximum number of load operations permitted 1615 /// to replace a call to memcmp. The value is set by the target at the 1616 /// performance threshold for such a replacement. If OptSize is true, 1617 /// return the limit for functions that have OptSize attribute. getMaxExpandSizeMemcmp(bool OptSize)1618 unsigned getMaxExpandSizeMemcmp(bool OptSize) const { 1619 return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp; 1620 } 1621 1622 /// Get maximum # of store operations permitted for llvm.memmove 1623 /// 1624 /// This function returns the maximum number of store operations permitted 1625 /// to replace a call to llvm.memmove. The value is set by the target at the 1626 /// performance threshold for such a replacement. If OptSize is true, 1627 /// return the limit for functions that have OptSize attribute. getMaxStoresPerMemmove(bool OptSize)1628 unsigned getMaxStoresPerMemmove(bool OptSize) const { 1629 return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove; 1630 } 1631 1632 /// Determine if the target supports unaligned memory accesses. 1633 /// 1634 /// This function returns true if the target allows unaligned memory accesses 1635 /// of the specified type in the given address space. If true, it also returns 1636 /// whether the unaligned memory access is "fast" in the last argument by 1637 /// reference. This is used, for example, in situations where an array 1638 /// copy/move/set is converted to a sequence of store operations. Its use 1639 /// helps to ensure that such replacements don't generate code that causes an 1640 /// alignment error (trap) on the target machine. 1641 virtual bool allowsMisalignedMemoryAccesses( 1642 EVT, unsigned AddrSpace = 0, Align Alignment = Align(1), 1643 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 1644 bool * /*Fast*/ = nullptr) const { 1645 return false; 1646 } 1647 1648 /// LLT handling variant. 1649 virtual bool allowsMisalignedMemoryAccesses( 1650 LLT, unsigned AddrSpace = 0, Align Alignment = Align(1), 1651 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 1652 bool * /*Fast*/ = nullptr) const { 1653 return false; 1654 } 1655 1656 /// This function returns true if the memory access is aligned or if the 1657 /// target allows this specific unaligned memory access. If the access is 1658 /// allowed, the optional final parameter returns if the access is also fast 1659 /// (as defined by the target). 1660 bool allowsMemoryAccessForAlignment( 1661 LLVMContext &Context, const DataLayout &DL, EVT VT, 1662 unsigned AddrSpace = 0, Align Alignment = Align(1), 1663 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 1664 bool *Fast = nullptr) const; 1665 1666 /// Return true if the memory access of this type is aligned or if the target 1667 /// allows this specific unaligned access for the given MachineMemOperand. 1668 /// If the access is allowed, the optional final parameter returns if the 1669 /// access is also fast (as defined by the target). 1670 bool allowsMemoryAccessForAlignment(LLVMContext &Context, 1671 const DataLayout &DL, EVT VT, 1672 const MachineMemOperand &MMO, 1673 bool *Fast = nullptr) const; 1674 1675 /// Return true if the target supports a memory access of this type for the 1676 /// given address space and alignment. If the access is allowed, the optional 1677 /// final parameter returns if the access is also fast (as defined by the 1678 /// target). 1679 virtual bool 1680 allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, 1681 unsigned AddrSpace = 0, Align Alignment = Align(1), 1682 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 1683 bool *Fast = nullptr) const; 1684 1685 /// Return true if the target supports a memory access of this type for the 1686 /// given MachineMemOperand. If the access is allowed, the optional 1687 /// final parameter returns if the access is also fast (as defined by the 1688 /// target). 1689 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, 1690 const MachineMemOperand &MMO, 1691 bool *Fast = nullptr) const; 1692 1693 /// LLT handling variant. 1694 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, LLT Ty, 1695 const MachineMemOperand &MMO, 1696 bool *Fast = nullptr) const; 1697 1698 /// Returns the target specific optimal type for load and store operations as 1699 /// a result of memset, memcpy, and memmove lowering. 1700 /// It returns EVT::Other if the type should be determined using generic 1701 /// target-independent logic. 1702 virtual EVT getOptimalMemOpType(const MemOp & Op,const AttributeList &)1703 getOptimalMemOpType(const MemOp &Op, 1704 const AttributeList & /*FuncAttributes*/) const { 1705 return MVT::Other; 1706 } 1707 1708 /// LLT returning variant. 1709 virtual LLT getOptimalMemOpLLT(const MemOp & Op,const AttributeList &)1710 getOptimalMemOpLLT(const MemOp &Op, 1711 const AttributeList & /*FuncAttributes*/) const { 1712 return LLT(); 1713 } 1714 1715 /// Returns true if it's safe to use load / store of the specified type to 1716 /// expand memcpy / memset inline. 1717 /// 1718 /// This is mostly true for all types except for some special cases. For 1719 /// example, on X86 targets without SSE2 f64 load / store are done with fldl / 1720 /// fstpl which also does type conversion. Note the specified type doesn't 1721 /// have to be legal as the hook is used before type legalization. isSafeMemOpType(MVT)1722 virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; } 1723 1724 /// Return lower limit for number of blocks in a jump table. 1725 virtual unsigned getMinimumJumpTableEntries() const; 1726 1727 /// Return lower limit of the density in a jump table. 1728 unsigned getMinimumJumpTableDensity(bool OptForSize) const; 1729 1730 /// Return upper limit for number of entries in a jump table. 1731 /// Zero if no limit. 1732 unsigned getMaximumJumpTableSize() const; 1733 1734 virtual bool isJumpTableRelative() const; 1735 1736 /// If a physical register, this specifies the register that 1737 /// llvm.savestack/llvm.restorestack should save and restore. getStackPointerRegisterToSaveRestore()1738 Register getStackPointerRegisterToSaveRestore() const { 1739 return StackPointerRegisterToSaveRestore; 1740 } 1741 1742 /// If a physical register, this returns the register that receives the 1743 /// exception address on entry to an EH pad. 1744 virtual Register getExceptionPointerRegister(const Constant * PersonalityFn)1745 getExceptionPointerRegister(const Constant *PersonalityFn) const { 1746 return Register(); 1747 } 1748 1749 /// If a physical register, this returns the register that receives the 1750 /// exception typeid on entry to a landing pad. 1751 virtual Register getExceptionSelectorRegister(const Constant * PersonalityFn)1752 getExceptionSelectorRegister(const Constant *PersonalityFn) const { 1753 return Register(); 1754 } 1755 needsFixedCatchObjects()1756 virtual bool needsFixedCatchObjects() const { 1757 report_fatal_error("Funclet EH is not implemented for this target"); 1758 } 1759 1760 /// Return the minimum stack alignment of an argument. getMinStackArgumentAlignment()1761 Align getMinStackArgumentAlignment() const { 1762 return MinStackArgumentAlignment; 1763 } 1764 1765 /// Return the minimum function alignment. getMinFunctionAlignment()1766 Align getMinFunctionAlignment() const { return MinFunctionAlignment; } 1767 1768 /// Return the preferred function alignment. getPrefFunctionAlignment()1769 Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; } 1770 1771 /// Return the preferred loop alignment. 1772 virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const; 1773 1774 /// Should loops be aligned even when the function is marked OptSize (but not 1775 /// MinSize). alignLoopsWithOptSize()1776 virtual bool alignLoopsWithOptSize() const { 1777 return false; 1778 } 1779 1780 /// If the target has a standard location for the stack protector guard, 1781 /// returns the address of that location. Otherwise, returns nullptr. 1782 /// DEPRECATED: please override useLoadStackGuardNode and customize 1783 /// LOAD_STACK_GUARD, or customize \@llvm.stackguard(). 1784 virtual Value *getIRStackGuard(IRBuilderBase &IRB) const; 1785 1786 /// Inserts necessary declarations for SSP (stack protection) purpose. 1787 /// Should be used only when getIRStackGuard returns nullptr. 1788 virtual void insertSSPDeclarations(Module &M) const; 1789 1790 /// Return the variable that's previously inserted by insertSSPDeclarations, 1791 /// if any, otherwise return nullptr. Should be used only when 1792 /// getIRStackGuard returns nullptr. 1793 virtual Value *getSDagStackGuard(const Module &M) const; 1794 1795 /// If this function returns true, stack protection checks should XOR the 1796 /// frame pointer (or whichever pointer is used to address locals) into the 1797 /// stack guard value before checking it. getIRStackGuard must return nullptr 1798 /// if this returns true. useStackGuardXorFP()1799 virtual bool useStackGuardXorFP() const { return false; } 1800 1801 /// If the target has a standard stack protection check function that 1802 /// performs validation and error handling, returns the function. Otherwise, 1803 /// returns nullptr. Must be previously inserted by insertSSPDeclarations. 1804 /// Should be used only when getIRStackGuard returns nullptr. 1805 virtual Function *getSSPStackGuardCheck(const Module &M) const; 1806 1807 /// \returns true if a constant G_UBFX is legal on the target. isConstantUnsignedBitfieldExtactLegal(unsigned Opc,LLT Ty1,LLT Ty2)1808 virtual bool isConstantUnsignedBitfieldExtactLegal(unsigned Opc, LLT Ty1, 1809 LLT Ty2) const { 1810 return false; 1811 } 1812 1813 protected: 1814 Value *getDefaultSafeStackPointerLocation(IRBuilderBase &IRB, 1815 bool UseTLS) const; 1816 1817 public: 1818 /// Returns the target-specific address of the unsafe stack pointer. 1819 virtual Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const; 1820 1821 /// Returns the name of the symbol used to emit stack probes or the empty 1822 /// string if not applicable. hasStackProbeSymbol(MachineFunction & MF)1823 virtual bool hasStackProbeSymbol(MachineFunction &MF) const { return false; } 1824 hasInlineStackProbe(MachineFunction & MF)1825 virtual bool hasInlineStackProbe(MachineFunction &MF) const { return false; } 1826 getStackProbeSymbolName(MachineFunction & MF)1827 virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const { 1828 return ""; 1829 } 1830 1831 /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we 1832 /// are happy to sink it into basic blocks. A cast may be free, but not 1833 /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer. 1834 virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const; 1835 1836 /// Return true if the pointer arguments to CI should be aligned by aligning 1837 /// the object whose address is being passed. If so then MinSize is set to the 1838 /// minimum size the object must be to be aligned and PrefAlign is set to the 1839 /// preferred alignment. shouldAlignPointerArgs(CallInst *,unsigned &,unsigned &)1840 virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/, 1841 unsigned & /*PrefAlign*/) const { 1842 return false; 1843 } 1844 1845 //===--------------------------------------------------------------------===// 1846 /// \name Helpers for TargetTransformInfo implementations 1847 /// @{ 1848 1849 /// Get the ISD node that corresponds to the Instruction class opcode. 1850 int InstructionOpcodeToISD(unsigned Opcode) const; 1851 1852 /// Estimate the cost of type-legalization and the legalized type. 1853 std::pair<InstructionCost, MVT> getTypeLegalizationCost(const DataLayout &DL, 1854 Type *Ty) const; 1855 1856 /// @} 1857 1858 //===--------------------------------------------------------------------===// 1859 /// \name Helpers for atomic expansion. 1860 /// @{ 1861 1862 /// Returns the maximum atomic operation size (in bits) supported by 1863 /// the backend. Atomic operations greater than this size (as well 1864 /// as ones that are not naturally aligned), will be expanded by 1865 /// AtomicExpandPass into an __atomic_* library call. getMaxAtomicSizeInBitsSupported()1866 unsigned getMaxAtomicSizeInBitsSupported() const { 1867 return MaxAtomicSizeInBitsSupported; 1868 } 1869 1870 /// Returns the size of the smallest cmpxchg or ll/sc instruction 1871 /// the backend supports. Any smaller operations are widened in 1872 /// AtomicExpandPass. 1873 /// 1874 /// Note that *unlike* operations above the maximum size, atomic ops 1875 /// are still natively supported below the minimum; they just 1876 /// require a more complex expansion. getMinCmpXchgSizeInBits()1877 unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; } 1878 1879 /// Whether the target supports unaligned atomic operations. supportsUnalignedAtomics()1880 bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; } 1881 1882 /// Whether AtomicExpandPass should automatically insert fences and reduce 1883 /// ordering for this atomic. This should be true for most architectures with 1884 /// weak memory ordering. Defaults to false. shouldInsertFencesForAtomic(const Instruction * I)1885 virtual bool shouldInsertFencesForAtomic(const Instruction *I) const { 1886 return false; 1887 } 1888 1889 /// Perform a load-linked operation on Addr, returning a "Value *" with the 1890 /// corresponding pointee type. This may entail some non-trivial operations to 1891 /// truncate or reconstruct types that will be illegal in the backend. See 1892 /// ARMISelLowering for an example implementation. emitLoadLinked(IRBuilderBase & Builder,Type * ValueTy,Value * Addr,AtomicOrdering Ord)1893 virtual Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, 1894 Value *Addr, AtomicOrdering Ord) const { 1895 llvm_unreachable("Load linked unimplemented on this target"); 1896 } 1897 1898 /// Perform a store-conditional operation to Addr. Return the status of the 1899 /// store. This should be 0 if the store succeeded, non-zero otherwise. emitStoreConditional(IRBuilderBase & Builder,Value * Val,Value * Addr,AtomicOrdering Ord)1900 virtual Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, 1901 Value *Addr, AtomicOrdering Ord) const { 1902 llvm_unreachable("Store conditional unimplemented on this target"); 1903 } 1904 1905 /// Perform a masked atomicrmw using a target-specific intrinsic. This 1906 /// represents the core LL/SC loop which will be lowered at a late stage by 1907 /// the backend. emitMaskedAtomicRMWIntrinsic(IRBuilderBase & Builder,AtomicRMWInst * AI,Value * AlignedAddr,Value * Incr,Value * Mask,Value * ShiftAmt,AtomicOrdering Ord)1908 virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, 1909 AtomicRMWInst *AI, 1910 Value *AlignedAddr, Value *Incr, 1911 Value *Mask, Value *ShiftAmt, 1912 AtomicOrdering Ord) const { 1913 llvm_unreachable("Masked atomicrmw expansion unimplemented on this target"); 1914 } 1915 1916 /// Perform a masked cmpxchg using a target-specific intrinsic. This 1917 /// represents the core LL/SC loop which will be lowered at a late stage by 1918 /// the backend. emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase & Builder,AtomicCmpXchgInst * CI,Value * AlignedAddr,Value * CmpVal,Value * NewVal,Value * Mask,AtomicOrdering Ord)1919 virtual Value *emitMaskedAtomicCmpXchgIntrinsic( 1920 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 1921 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 1922 llvm_unreachable("Masked cmpxchg expansion unimplemented on this target"); 1923 } 1924 1925 /// Inserts in the IR a target-specific intrinsic specifying a fence. 1926 /// It is called by AtomicExpandPass before expanding an 1927 /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad 1928 /// if shouldInsertFencesForAtomic returns true. 1929 /// 1930 /// Inst is the original atomic instruction, prior to other expansions that 1931 /// may be performed. 1932 /// 1933 /// This function should either return a nullptr, or a pointer to an IR-level 1934 /// Instruction*. Even complex fence sequences can be represented by a 1935 /// single Instruction* through an intrinsic to be lowered later. 1936 /// Backends should override this method to produce target-specific intrinsic 1937 /// for their fences. 1938 /// FIXME: Please note that the default implementation here in terms of 1939 /// IR-level fences exists for historical/compatibility reasons and is 1940 /// *unsound* ! Fences cannot, in general, be used to restore sequential 1941 /// consistency. For example, consider the following example: 1942 /// atomic<int> x = y = 0; 1943 /// int r1, r2, r3, r4; 1944 /// Thread 0: 1945 /// x.store(1); 1946 /// Thread 1: 1947 /// y.store(1); 1948 /// Thread 2: 1949 /// r1 = x.load(); 1950 /// r2 = y.load(); 1951 /// Thread 3: 1952 /// r3 = y.load(); 1953 /// r4 = x.load(); 1954 /// r1 = r3 = 1 and r2 = r4 = 0 is impossible as long as the accesses are all 1955 /// seq_cst. But if they are lowered to monotonic accesses, no amount of 1956 /// IR-level fences can prevent it. 1957 /// @{ 1958 virtual Instruction *emitLeadingFence(IRBuilderBase &Builder, 1959 Instruction *Inst, 1960 AtomicOrdering Ord) const; 1961 1962 virtual Instruction *emitTrailingFence(IRBuilderBase &Builder, 1963 Instruction *Inst, 1964 AtomicOrdering Ord) const; 1965 /// @} 1966 1967 // Emits code that executes when the comparison result in the ll/sc 1968 // expansion of a cmpxchg instruction is such that the store-conditional will 1969 // not execute. This makes it possible to balance out the load-linked with 1970 // a dedicated instruction, if desired. 1971 // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would 1972 // be unnecessarily held, except if clrex, inserted by this hook, is executed. emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase & Builder)1973 virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const {} 1974 1975 /// Returns true if the given (atomic) store should be expanded by the 1976 /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input. shouldExpandAtomicStoreInIR(StoreInst * SI)1977 virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const { 1978 return false; 1979 } 1980 1981 /// Returns true if arguments should be sign-extended in lib calls. shouldSignExtendTypeInLibCall(EVT Type,bool IsSigned)1982 virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 1983 return IsSigned; 1984 } 1985 1986 /// Returns true if arguments should be extended in lib calls. shouldExtendTypeInLibCall(EVT Type)1987 virtual bool shouldExtendTypeInLibCall(EVT Type) const { 1988 return true; 1989 } 1990 1991 /// Returns how the given (atomic) load should be expanded by the 1992 /// IR-level AtomicExpand pass. shouldExpandAtomicLoadInIR(LoadInst * LI)1993 virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { 1994 return AtomicExpansionKind::None; 1995 } 1996 1997 /// Returns how the given atomic cmpxchg should be expanded by the IR-level 1998 /// AtomicExpand pass. 1999 virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst * AI)2000 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { 2001 return AtomicExpansionKind::None; 2002 } 2003 2004 /// Returns how the IR-level AtomicExpand pass should expand the given 2005 /// AtomicRMW, if at all. Default is to never expand. shouldExpandAtomicRMWInIR(AtomicRMWInst * RMW)2006 virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { 2007 return RMW->isFloatingPointOperation() ? 2008 AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; 2009 } 2010 2011 /// On some platforms, an AtomicRMW that never actually modifies the value 2012 /// (such as fetch_add of 0) can be turned into a fence followed by an 2013 /// atomic load. This may sound useless, but it makes it possible for the 2014 /// processor to keep the cacheline shared, dramatically improving 2015 /// performance. And such idempotent RMWs are useful for implementing some 2016 /// kinds of locks, see for example (justification + benchmarks): 2017 /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf 2018 /// This method tries doing that transformation, returning the atomic load if 2019 /// it succeeds, and nullptr otherwise. 2020 /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo 2021 /// another round of expansion. 2022 virtual LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst * RMWI)2023 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const { 2024 return nullptr; 2025 } 2026 2027 /// Returns how the platform's atomic operations are extended (ZERO_EXTEND, 2028 /// SIGN_EXTEND, or ANY_EXTEND). getExtendForAtomicOps()2029 virtual ISD::NodeType getExtendForAtomicOps() const { 2030 return ISD::ZERO_EXTEND; 2031 } 2032 2033 /// Returns how the platform's atomic compare and swap expects its comparison 2034 /// value to be extended (ZERO_EXTEND, SIGN_EXTEND, or ANY_EXTEND). This is 2035 /// separate from getExtendForAtomicOps, which is concerned with the 2036 /// sign-extension of the instruction's output, whereas here we are concerned 2037 /// with the sign-extension of the input. For targets with compare-and-swap 2038 /// instructions (or sub-word comparisons in their LL/SC loop expansions), 2039 /// the input can be ANY_EXTEND, but the output will still have a specific 2040 /// extension. getExtendForAtomicCmpSwapArg()2041 virtual ISD::NodeType getExtendForAtomicCmpSwapArg() const { 2042 return ISD::ANY_EXTEND; 2043 } 2044 2045 /// @} 2046 2047 /// Returns true if we should normalize 2048 /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and 2049 /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely 2050 /// that it saves us from materializing N0 and N1 in an integer register. 2051 /// Targets that are able to perform and/or on flags should return false here. shouldNormalizeToSelectSequence(LLVMContext & Context,EVT VT)2052 virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, 2053 EVT VT) const { 2054 // If a target has multiple condition registers, then it likely has logical 2055 // operations on those registers. 2056 if (hasMultipleConditionRegisters()) 2057 return false; 2058 // Only do the transform if the value won't be split into multiple 2059 // registers. 2060 LegalizeTypeAction Action = getTypeAction(Context, VT); 2061 return Action != TypeExpandInteger && Action != TypeExpandFloat && 2062 Action != TypeSplitVector; 2063 } 2064 isProfitableToCombineMinNumMaxNum(EVT VT)2065 virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; } 2066 2067 /// Return true if a select of constants (select Cond, C1, C2) should be 2068 /// transformed into simple math ops with the condition value. For example: 2069 /// select Cond, C1, C1-1 --> add (zext Cond), C1-1 convertSelectOfConstantsToMath(EVT VT)2070 virtual bool convertSelectOfConstantsToMath(EVT VT) const { 2071 return false; 2072 } 2073 2074 /// Return true if it is profitable to transform an integer 2075 /// multiplication-by-constant into simpler operations like shifts and adds. 2076 /// This may be true if the target does not directly support the 2077 /// multiplication operation for the specified type or the sequence of simpler 2078 /// ops is faster than the multiply. decomposeMulByConstant(LLVMContext & Context,EVT VT,SDValue C)2079 virtual bool decomposeMulByConstant(LLVMContext &Context, 2080 EVT VT, SDValue C) const { 2081 return false; 2082 } 2083 2084 /// Return true if it may be profitable to transform 2085 /// (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). 2086 /// This may not be true if c1 and c2 can be represented as immediates but 2087 /// c1*c2 cannot, for example. 2088 /// The target should check if c1, c2 and c1*c2 can be represented as 2089 /// immediates, or have to be materialized into registers. If it is not sure 2090 /// about some cases, a default true can be returned to let the DAGCombiner 2091 /// decide. 2092 /// AddNode is (add x, c1), and ConstNode is c2. isMulAddWithConstProfitable(const SDValue & AddNode,const SDValue & ConstNode)2093 virtual bool isMulAddWithConstProfitable(const SDValue &AddNode, 2094 const SDValue &ConstNode) const { 2095 return true; 2096 } 2097 2098 /// Return true if it is more correct/profitable to use strict FP_TO_INT 2099 /// conversion operations - canonicalizing the FP source value instead of 2100 /// converting all cases and then selecting based on value. 2101 /// This may be true if the target throws exceptions for out of bounds 2102 /// conversions or has fast FP CMOV. shouldUseStrictFP_TO_INT(EVT FpVT,EVT IntVT,bool IsSigned)2103 virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, 2104 bool IsSigned) const { 2105 return false; 2106 } 2107 2108 //===--------------------------------------------------------------------===// 2109 // TargetLowering Configuration Methods - These methods should be invoked by 2110 // the derived class constructor to configure this object for the target. 2111 // 2112 protected: 2113 /// Specify how the target extends the result of integer and floating point 2114 /// boolean values from i1 to a wider type. See getBooleanContents. setBooleanContents(BooleanContent Ty)2115 void setBooleanContents(BooleanContent Ty) { 2116 BooleanContents = Ty; 2117 BooleanFloatContents = Ty; 2118 } 2119 2120 /// Specify how the target extends the result of integer and floating point 2121 /// boolean values from i1 to a wider type. See getBooleanContents. setBooleanContents(BooleanContent IntTy,BooleanContent FloatTy)2122 void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) { 2123 BooleanContents = IntTy; 2124 BooleanFloatContents = FloatTy; 2125 } 2126 2127 /// Specify how the target extends the result of a vector boolean value from a 2128 /// vector of i1 to a wider type. See getBooleanContents. setBooleanVectorContents(BooleanContent Ty)2129 void setBooleanVectorContents(BooleanContent Ty) { 2130 BooleanVectorContents = Ty; 2131 } 2132 2133 /// Specify the target scheduling preference. setSchedulingPreference(Sched::Preference Pref)2134 void setSchedulingPreference(Sched::Preference Pref) { 2135 SchedPreferenceInfo = Pref; 2136 } 2137 2138 /// Indicate the minimum number of blocks to generate jump tables. 2139 void setMinimumJumpTableEntries(unsigned Val); 2140 2141 /// Indicate the maximum number of entries in jump tables. 2142 /// Set to zero to generate unlimited jump tables. 2143 void setMaximumJumpTableSize(unsigned); 2144 2145 /// If set to a physical register, this specifies the register that 2146 /// llvm.savestack/llvm.restorestack should save and restore. setStackPointerRegisterToSaveRestore(Register R)2147 void setStackPointerRegisterToSaveRestore(Register R) { 2148 StackPointerRegisterToSaveRestore = R; 2149 } 2150 2151 /// Tells the code generator that the target has multiple (allocatable) 2152 /// condition registers that can be used to store the results of comparisons 2153 /// for use by selects and conditional branches. With multiple condition 2154 /// registers, the code generator will not aggressively sink comparisons into 2155 /// the blocks of their users. 2156 void setHasMultipleConditionRegisters(bool hasManyRegs = true) { 2157 HasMultipleConditionRegisters = hasManyRegs; 2158 } 2159 2160 /// Tells the code generator that the target has BitExtract instructions. 2161 /// The code generator will aggressively sink "shift"s into the blocks of 2162 /// their users if the users will generate "and" instructions which can be 2163 /// combined with "shift" to BitExtract instructions. 2164 void setHasExtractBitsInsn(bool hasExtractInsn = true) { 2165 HasExtractBitsInsn = hasExtractInsn; 2166 } 2167 2168 /// Tells the code generator not to expand logic operations on comparison 2169 /// predicates into separate sequences that increase the amount of flow 2170 /// control. 2171 void setJumpIsExpensive(bool isExpensive = true); 2172 2173 /// Tells the code generator which bitwidths to bypass. addBypassSlowDiv(unsigned int SlowBitWidth,unsigned int FastBitWidth)2174 void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) { 2175 BypassSlowDivWidths[SlowBitWidth] = FastBitWidth; 2176 } 2177 2178 /// Add the specified register class as an available regclass for the 2179 /// specified value type. This indicates the selector can handle values of 2180 /// that class natively. addRegisterClass(MVT VT,const TargetRegisterClass * RC)2181 void addRegisterClass(MVT VT, const TargetRegisterClass *RC) { 2182 assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)); 2183 RegClassForVT[VT.SimpleTy] = RC; 2184 } 2185 2186 /// Return the largest legal super-reg register class of the register class 2187 /// for the specified type and its associated "cost". 2188 virtual std::pair<const TargetRegisterClass *, uint8_t> 2189 findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const; 2190 2191 /// Once all of the register classes are added, this allows us to compute 2192 /// derived properties we expose. 2193 void computeRegisterProperties(const TargetRegisterInfo *TRI); 2194 2195 /// Indicate that the specified operation does not work with the specified 2196 /// type and indicate what to do about it. Note that VT may refer to either 2197 /// the type of a result or that of an operand of Op. setOperationAction(unsigned Op,MVT VT,LegalizeAction Action)2198 void setOperationAction(unsigned Op, MVT VT, 2199 LegalizeAction Action) { 2200 assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"); 2201 OpActions[(unsigned)VT.SimpleTy][Op] = Action; 2202 } 2203 2204 /// Indicate that the specified load with extension does not work with the 2205 /// specified type and indicate what to do about it. setLoadExtAction(unsigned ExtType,MVT ValVT,MVT MemVT,LegalizeAction Action)2206 void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, 2207 LegalizeAction Action) { 2208 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && 2209 MemVT.isValid() && "Table isn't big enough!"); 2210 assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); 2211 unsigned Shift = 4 * ExtType; 2212 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift); 2213 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift; 2214 } 2215 2216 /// Indicate that the specified truncating store does not work with the 2217 /// specified type and indicate what to do about it. setTruncStoreAction(MVT ValVT,MVT MemVT,LegalizeAction Action)2218 void setTruncStoreAction(MVT ValVT, MVT MemVT, 2219 LegalizeAction Action) { 2220 assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"); 2221 TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action; 2222 } 2223 2224 /// Indicate that the specified indexed load does or does not work with the 2225 /// specified type and indicate what to do abort it. 2226 /// 2227 /// NOTE: All indexed mode loads are initialized to Expand in 2228 /// TargetLowering.cpp setIndexedLoadAction(unsigned IdxMode,MVT VT,LegalizeAction Action)2229 void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action) { 2230 setIndexedModeAction(IdxMode, VT, IMAB_Load, Action); 2231 } 2232 2233 /// Indicate that the specified indexed store does or does not work with the 2234 /// specified type and indicate what to do about it. 2235 /// 2236 /// NOTE: All indexed mode stores are initialized to Expand in 2237 /// TargetLowering.cpp setIndexedStoreAction(unsigned IdxMode,MVT VT,LegalizeAction Action)2238 void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action) { 2239 setIndexedModeAction(IdxMode, VT, IMAB_Store, Action); 2240 } 2241 2242 /// Indicate that the specified indexed masked load does or does not work with 2243 /// the specified type and indicate what to do about it. 2244 /// 2245 /// NOTE: All indexed mode masked loads are initialized to Expand in 2246 /// TargetLowering.cpp setIndexedMaskedLoadAction(unsigned IdxMode,MVT VT,LegalizeAction Action)2247 void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, 2248 LegalizeAction Action) { 2249 setIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad, Action); 2250 } 2251 2252 /// Indicate that the specified indexed masked store does or does not work 2253 /// with the specified type and indicate what to do about it. 2254 /// 2255 /// NOTE: All indexed mode masked stores are initialized to Expand in 2256 /// TargetLowering.cpp setIndexedMaskedStoreAction(unsigned IdxMode,MVT VT,LegalizeAction Action)2257 void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, 2258 LegalizeAction Action) { 2259 setIndexedModeAction(IdxMode, VT, IMAB_MaskedStore, Action); 2260 } 2261 2262 /// Indicate that the specified condition code is or isn't supported on the 2263 /// target and indicate what to do about it. setCondCodeAction(ISD::CondCode CC,MVT VT,LegalizeAction Action)2264 void setCondCodeAction(ISD::CondCode CC, MVT VT, 2265 LegalizeAction Action) { 2266 assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && 2267 "Table isn't big enough!"); 2268 assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); 2269 /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit 2270 /// value and the upper 29 bits index into the second dimension of the array 2271 /// to select what 32-bit value to use. 2272 uint32_t Shift = 4 * (VT.SimpleTy & 0x7); 2273 CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift); 2274 CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift; 2275 } 2276 2277 /// If Opc/OrigVT is specified as being promoted, the promotion code defaults 2278 /// to trying a larger integer/fp until it can find one that works. If that 2279 /// default is insufficient, this method can be used by the target to override 2280 /// the default. AddPromotedToType(unsigned Opc,MVT OrigVT,MVT DestVT)2281 void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { 2282 PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy; 2283 } 2284 2285 /// Convenience method to set an operation to Promote and specify the type 2286 /// in a single call. setOperationPromotedToType(unsigned Opc,MVT OrigVT,MVT DestVT)2287 void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { 2288 setOperationAction(Opc, OrigVT, Promote); 2289 AddPromotedToType(Opc, OrigVT, DestVT); 2290 } 2291 2292 /// Targets should invoke this method for each target independent node that 2293 /// they want to provide a custom DAG combiner for by implementing the 2294 /// PerformDAGCombine virtual method. setTargetDAGCombine(ISD::NodeType NT)2295 void setTargetDAGCombine(ISD::NodeType NT) { 2296 assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)); 2297 TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7); 2298 } 2299 2300 /// Set the target's minimum function alignment. setMinFunctionAlignment(Align Alignment)2301 void setMinFunctionAlignment(Align Alignment) { 2302 MinFunctionAlignment = Alignment; 2303 } 2304 2305 /// Set the target's preferred function alignment. This should be set if 2306 /// there is a performance benefit to higher-than-minimum alignment setPrefFunctionAlignment(Align Alignment)2307 void setPrefFunctionAlignment(Align Alignment) { 2308 PrefFunctionAlignment = Alignment; 2309 } 2310 2311 /// Set the target's preferred loop alignment. Default alignment is one, it 2312 /// means the target does not care about loop alignment. The target may also 2313 /// override getPrefLoopAlignment to provide per-loop values. setPrefLoopAlignment(Align Alignment)2314 void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; } 2315 2316 /// Set the minimum stack alignment of an argument. setMinStackArgumentAlignment(Align Alignment)2317 void setMinStackArgumentAlignment(Align Alignment) { 2318 MinStackArgumentAlignment = Alignment; 2319 } 2320 2321 /// Set the maximum atomic operation size supported by the 2322 /// backend. Atomic operations greater than this size (as well as 2323 /// ones that are not naturally aligned), will be expanded by 2324 /// AtomicExpandPass into an __atomic_* library call. setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)2325 void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) { 2326 MaxAtomicSizeInBitsSupported = SizeInBits; 2327 } 2328 2329 /// Sets the minimum cmpxchg or ll/sc size supported by the backend. setMinCmpXchgSizeInBits(unsigned SizeInBits)2330 void setMinCmpXchgSizeInBits(unsigned SizeInBits) { 2331 MinCmpXchgSizeInBits = SizeInBits; 2332 } 2333 2334 /// Sets whether unaligned atomic operations are supported. setSupportsUnalignedAtomics(bool UnalignedSupported)2335 void setSupportsUnalignedAtomics(bool UnalignedSupported) { 2336 SupportsUnalignedAtomics = UnalignedSupported; 2337 } 2338 2339 public: 2340 //===--------------------------------------------------------------------===// 2341 // Addressing mode description hooks (used by LSR etc). 2342 // 2343 2344 /// CodeGenPrepare sinks address calculations into the same BB as Load/Store 2345 /// instructions reading the address. This allows as much computation as 2346 /// possible to be done in the address mode for that operand. This hook lets 2347 /// targets also pass back when this should be done on intrinsics which 2348 /// load/store. getAddrModeArguments(IntrinsicInst *,SmallVectorImpl<Value * > &,Type * &)2349 virtual bool getAddrModeArguments(IntrinsicInst * /*I*/, 2350 SmallVectorImpl<Value*> &/*Ops*/, 2351 Type *&/*AccessTy*/) const { 2352 return false; 2353 } 2354 2355 /// This represents an addressing mode of: 2356 /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg 2357 /// If BaseGV is null, there is no BaseGV. 2358 /// If BaseOffs is zero, there is no base offset. 2359 /// If HasBaseReg is false, there is no base register. 2360 /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with 2361 /// no scale. 2362 struct AddrMode { 2363 GlobalValue *BaseGV = nullptr; 2364 int64_t BaseOffs = 0; 2365 bool HasBaseReg = false; 2366 int64_t Scale = 0; 2367 AddrMode() = default; 2368 }; 2369 2370 /// Return true if the addressing mode represented by AM is legal for this 2371 /// target, for a load/store of the specified type. 2372 /// 2373 /// The type may be VoidTy, in which case only return true if the addressing 2374 /// mode is legal for a load/store of any legal type. TODO: Handle 2375 /// pre/postinc as well. 2376 /// 2377 /// If the address space cannot be determined, it will be -1. 2378 /// 2379 /// TODO: Remove default argument 2380 virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, 2381 Type *Ty, unsigned AddrSpace, 2382 Instruction *I = nullptr) const; 2383 2384 /// Return the cost of the scaling factor used in the addressing mode 2385 /// represented by AM for this target, for a load/store of the specified type. 2386 /// 2387 /// If the AM is supported, the return value must be >= 0. 2388 /// If the AM is not supported, it returns a negative value. 2389 /// TODO: Handle pre/postinc as well. 2390 /// TODO: Remove default argument 2391 virtual InstructionCost getScalingFactorCost(const DataLayout &DL, 2392 const AddrMode &AM, Type *Ty, 2393 unsigned AS = 0) const { 2394 // Default: assume that any scaling factor used in a legal AM is free. 2395 if (isLegalAddressingMode(DL, AM, Ty, AS)) 2396 return 0; 2397 return -1; 2398 } 2399 2400 /// Return true if the specified immediate is legal icmp immediate, that is 2401 /// the target has icmp instructions which can compare a register against the 2402 /// immediate without having to materialize the immediate into a register. isLegalICmpImmediate(int64_t)2403 virtual bool isLegalICmpImmediate(int64_t) const { 2404 return true; 2405 } 2406 2407 /// Return true if the specified immediate is legal add immediate, that is the 2408 /// target has add instructions which can add a register with the immediate 2409 /// without having to materialize the immediate into a register. isLegalAddImmediate(int64_t)2410 virtual bool isLegalAddImmediate(int64_t) const { 2411 return true; 2412 } 2413 2414 /// Return true if the specified immediate is legal for the value input of a 2415 /// store instruction. isLegalStoreImmediate(int64_t Value)2416 virtual bool isLegalStoreImmediate(int64_t Value) const { 2417 // Default implementation assumes that at least 0 works since it is likely 2418 // that a zero register exists or a zero immediate is allowed. 2419 return Value == 0; 2420 } 2421 2422 /// Return true if it's significantly cheaper to shift a vector by a uniform 2423 /// scalar than by an amount which will vary across each lane. On x86 before 2424 /// AVX2 for example, there is a "psllw" instruction for the former case, but 2425 /// no simple instruction for a general "a << b" operation on vectors. 2426 /// This should also apply to lowering for vector funnel shifts (rotates). isVectorShiftByScalarCheap(Type * Ty)2427 virtual bool isVectorShiftByScalarCheap(Type *Ty) const { 2428 return false; 2429 } 2430 2431 /// Given a shuffle vector SVI representing a vector splat, return a new 2432 /// scalar type of size equal to SVI's scalar type if the new type is more 2433 /// profitable. Returns nullptr otherwise. For example under MVE float splats 2434 /// are converted to integer to prevent the need to move from SPR to GPR 2435 /// registers. shouldConvertSplatType(ShuffleVectorInst * SVI)2436 virtual Type* shouldConvertSplatType(ShuffleVectorInst* SVI) const { 2437 return nullptr; 2438 } 2439 2440 /// Given a set in interconnected phis of type 'From' that are loaded/stored 2441 /// or bitcast to type 'To', return true if the set should be converted to 2442 /// 'To'. shouldConvertPhiType(Type * From,Type * To)2443 virtual bool shouldConvertPhiType(Type *From, Type *To) const { 2444 return (From->isIntegerTy() || From->isFloatingPointTy()) && 2445 (To->isIntegerTy() || To->isFloatingPointTy()); 2446 } 2447 2448 /// Returns true if the opcode is a commutative binary operation. isCommutativeBinOp(unsigned Opcode)2449 virtual bool isCommutativeBinOp(unsigned Opcode) const { 2450 // FIXME: This should get its info from the td file. 2451 switch (Opcode) { 2452 case ISD::ADD: 2453 case ISD::SMIN: 2454 case ISD::SMAX: 2455 case ISD::UMIN: 2456 case ISD::UMAX: 2457 case ISD::MUL: 2458 case ISD::MULHU: 2459 case ISD::MULHS: 2460 case ISD::SMUL_LOHI: 2461 case ISD::UMUL_LOHI: 2462 case ISD::FADD: 2463 case ISD::FMUL: 2464 case ISD::AND: 2465 case ISD::OR: 2466 case ISD::XOR: 2467 case ISD::SADDO: 2468 case ISD::UADDO: 2469 case ISD::ADDC: 2470 case ISD::ADDE: 2471 case ISD::SADDSAT: 2472 case ISD::UADDSAT: 2473 case ISD::FMINNUM: 2474 case ISD::FMAXNUM: 2475 case ISD::FMINNUM_IEEE: 2476 case ISD::FMAXNUM_IEEE: 2477 case ISD::FMINIMUM: 2478 case ISD::FMAXIMUM: 2479 return true; 2480 default: return false; 2481 } 2482 } 2483 2484 /// Return true if the node is a math/logic binary operator. isBinOp(unsigned Opcode)2485 virtual bool isBinOp(unsigned Opcode) const { 2486 // A commutative binop must be a binop. 2487 if (isCommutativeBinOp(Opcode)) 2488 return true; 2489 // These are non-commutative binops. 2490 switch (Opcode) { 2491 case ISD::SUB: 2492 case ISD::SHL: 2493 case ISD::SRL: 2494 case ISD::SRA: 2495 case ISD::SDIV: 2496 case ISD::UDIV: 2497 case ISD::SREM: 2498 case ISD::UREM: 2499 case ISD::SSUBSAT: 2500 case ISD::USUBSAT: 2501 case ISD::FSUB: 2502 case ISD::FDIV: 2503 case ISD::FREM: 2504 return true; 2505 default: 2506 return false; 2507 } 2508 } 2509 2510 /// Return true if it's free to truncate a value of type FromTy to type 2511 /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 2512 /// by referencing its sub-register AX. 2513 /// Targets must return false when FromTy <= ToTy. isTruncateFree(Type * FromTy,Type * ToTy)2514 virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const { 2515 return false; 2516 } 2517 2518 /// Return true if a truncation from FromTy to ToTy is permitted when deciding 2519 /// whether a call is in tail position. Typically this means that both results 2520 /// would be assigned to the same register or stack slot, but it could mean 2521 /// the target performs adequate checks of its own before proceeding with the 2522 /// tail call. Targets must return false when FromTy <= ToTy. allowTruncateForTailCall(Type * FromTy,Type * ToTy)2523 virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const { 2524 return false; 2525 } 2526 isTruncateFree(EVT FromVT,EVT ToVT)2527 virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { return false; } isTruncateFree(LLT FromTy,LLT ToTy,const DataLayout & DL,LLVMContext & Ctx)2528 virtual bool isTruncateFree(LLT FromTy, LLT ToTy, const DataLayout &DL, 2529 LLVMContext &Ctx) const { 2530 return isTruncateFree(getApproximateEVTForLLT(FromTy, DL, Ctx), 2531 getApproximateEVTForLLT(ToTy, DL, Ctx)); 2532 } 2533 isProfitableToHoist(Instruction * I)2534 virtual bool isProfitableToHoist(Instruction *I) const { return true; } 2535 2536 /// Return true if the extension represented by \p I is free. 2537 /// Unlikely the is[Z|FP]ExtFree family which is based on types, 2538 /// this method can use the context provided by \p I to decide 2539 /// whether or not \p I is free. 2540 /// This method extends the behavior of the is[Z|FP]ExtFree family. 2541 /// In other words, if is[Z|FP]Free returns true, then this method 2542 /// returns true as well. The converse is not true. 2543 /// The target can perform the adequate checks by overriding isExtFreeImpl. 2544 /// \pre \p I must be a sign, zero, or fp extension. isExtFree(const Instruction * I)2545 bool isExtFree(const Instruction *I) const { 2546 switch (I->getOpcode()) { 2547 case Instruction::FPExt: 2548 if (isFPExtFree(EVT::getEVT(I->getType()), 2549 EVT::getEVT(I->getOperand(0)->getType()))) 2550 return true; 2551 break; 2552 case Instruction::ZExt: 2553 if (isZExtFree(I->getOperand(0)->getType(), I->getType())) 2554 return true; 2555 break; 2556 case Instruction::SExt: 2557 break; 2558 default: 2559 llvm_unreachable("Instruction is not an extension"); 2560 } 2561 return isExtFreeImpl(I); 2562 } 2563 2564 /// Return true if \p Load and \p Ext can form an ExtLoad. 2565 /// For example, in AArch64 2566 /// %L = load i8, i8* %ptr 2567 /// %E = zext i8 %L to i32 2568 /// can be lowered into one load instruction 2569 /// ldrb w0, [x0] isExtLoad(const LoadInst * Load,const Instruction * Ext,const DataLayout & DL)2570 bool isExtLoad(const LoadInst *Load, const Instruction *Ext, 2571 const DataLayout &DL) const { 2572 EVT VT = getValueType(DL, Ext->getType()); 2573 EVT LoadVT = getValueType(DL, Load->getType()); 2574 2575 // If the load has other users and the truncate is not free, the ext 2576 // probably isn't free. 2577 if (!Load->hasOneUse() && (isTypeLegal(LoadVT) || !isTypeLegal(VT)) && 2578 !isTruncateFree(Ext->getType(), Load->getType())) 2579 return false; 2580 2581 // Check whether the target supports casts folded into loads. 2582 unsigned LType; 2583 if (isa<ZExtInst>(Ext)) 2584 LType = ISD::ZEXTLOAD; 2585 else { 2586 assert(isa<SExtInst>(Ext) && "Unexpected ext type!"); 2587 LType = ISD::SEXTLOAD; 2588 } 2589 2590 return isLoadExtLegal(LType, VT, LoadVT); 2591 } 2592 2593 /// Return true if any actual instruction that defines a value of type FromTy 2594 /// implicitly zero-extends the value to ToTy in the result register. 2595 /// 2596 /// The function should return true when it is likely that the truncate can 2597 /// be freely folded with an instruction defining a value of FromTy. If 2598 /// the defining instruction is unknown (because you're looking at a 2599 /// function argument, PHI, etc.) then the target may require an 2600 /// explicit truncate, which is not necessarily free, but this function 2601 /// does not deal with those cases. 2602 /// Targets must return false when FromTy >= ToTy. isZExtFree(Type * FromTy,Type * ToTy)2603 virtual bool isZExtFree(Type *FromTy, Type *ToTy) const { 2604 return false; 2605 } 2606 isZExtFree(EVT FromTy,EVT ToTy)2607 virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { return false; } isZExtFree(LLT FromTy,LLT ToTy,const DataLayout & DL,LLVMContext & Ctx)2608 virtual bool isZExtFree(LLT FromTy, LLT ToTy, const DataLayout &DL, 2609 LLVMContext &Ctx) const { 2610 return isZExtFree(getApproximateEVTForLLT(FromTy, DL, Ctx), 2611 getApproximateEVTForLLT(ToTy, DL, Ctx)); 2612 } 2613 2614 /// Return true if sign-extension from FromTy to ToTy is cheaper than 2615 /// zero-extension. isSExtCheaperThanZExt(EVT FromTy,EVT ToTy)2616 virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const { 2617 return false; 2618 } 2619 2620 /// Return true if sinking I's operands to the same basic block as I is 2621 /// profitable, e.g. because the operands can be folded into a target 2622 /// instruction during instruction selection. After calling the function 2623 /// \p Ops contains the Uses to sink ordered by dominance (dominating users 2624 /// come first). shouldSinkOperands(Instruction * I,SmallVectorImpl<Use * > & Ops)2625 virtual bool shouldSinkOperands(Instruction *I, 2626 SmallVectorImpl<Use *> &Ops) const { 2627 return false; 2628 } 2629 2630 /// Return true if the target supplies and combines to a paired load 2631 /// two loaded values of type LoadedType next to each other in memory. 2632 /// RequiredAlignment gives the minimal alignment constraints that must be met 2633 /// to be able to select this paired load. 2634 /// 2635 /// This information is *not* used to generate actual paired loads, but it is 2636 /// used to generate a sequence of loads that is easier to combine into a 2637 /// paired load. 2638 /// For instance, something like this: 2639 /// a = load i64* addr 2640 /// b = trunc i64 a to i32 2641 /// c = lshr i64 a, 32 2642 /// d = trunc i64 c to i32 2643 /// will be optimized into: 2644 /// b = load i32* addr1 2645 /// d = load i32* addr2 2646 /// Where addr1 = addr2 +/- sizeof(i32). 2647 /// 2648 /// In other words, unless the target performs a post-isel load combining, 2649 /// this information should not be provided because it will generate more 2650 /// loads. hasPairedLoad(EVT,Align &)2651 virtual bool hasPairedLoad(EVT /*LoadedType*/, 2652 Align & /*RequiredAlignment*/) const { 2653 return false; 2654 } 2655 2656 /// Return true if the target has a vector blend instruction. hasVectorBlend()2657 virtual bool hasVectorBlend() const { return false; } 2658 2659 /// Get the maximum supported factor for interleaved memory accesses. 2660 /// Default to be the minimum interleave factor: 2. getMaxSupportedInterleaveFactor()2661 virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; } 2662 2663 /// Lower an interleaved load to target specific intrinsics. Return 2664 /// true on success. 2665 /// 2666 /// \p LI is the vector load instruction. 2667 /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector. 2668 /// \p Indices is the corresponding indices for each shufflevector. 2669 /// \p Factor is the interleave factor. lowerInterleavedLoad(LoadInst * LI,ArrayRef<ShuffleVectorInst * > Shuffles,ArrayRef<unsigned> Indices,unsigned Factor)2670 virtual bool lowerInterleavedLoad(LoadInst *LI, 2671 ArrayRef<ShuffleVectorInst *> Shuffles, 2672 ArrayRef<unsigned> Indices, 2673 unsigned Factor) const { 2674 return false; 2675 } 2676 2677 /// Lower an interleaved store to target specific intrinsics. Return 2678 /// true on success. 2679 /// 2680 /// \p SI is the vector store instruction. 2681 /// \p SVI is the shufflevector to RE-interleave the stored vector. 2682 /// \p Factor is the interleave factor. lowerInterleavedStore(StoreInst * SI,ShuffleVectorInst * SVI,unsigned Factor)2683 virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 2684 unsigned Factor) const { 2685 return false; 2686 } 2687 2688 /// Return true if zero-extending the specific node Val to type VT2 is free 2689 /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or 2690 /// because it's folded such as X86 zero-extending loads). isZExtFree(SDValue Val,EVT VT2)2691 virtual bool isZExtFree(SDValue Val, EVT VT2) const { 2692 return isZExtFree(Val.getValueType(), VT2); 2693 } 2694 2695 /// Return true if an fpext operation is free (for instance, because 2696 /// single-precision floating-point numbers are implicitly extended to 2697 /// double-precision). isFPExtFree(EVT DestVT,EVT SrcVT)2698 virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const { 2699 assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && 2700 "invalid fpext types"); 2701 return false; 2702 } 2703 2704 /// Return true if an fpext operation input to an \p Opcode operation is free 2705 /// (for instance, because half-precision floating-point numbers are 2706 /// implicitly extended to float-precision) for an FMA instruction. isFPExtFoldable(const SelectionDAG & DAG,unsigned Opcode,EVT DestVT,EVT SrcVT)2707 virtual bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, 2708 EVT DestVT, EVT SrcVT) const { 2709 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && 2710 "invalid fpext types"); 2711 return isFPExtFree(DestVT, SrcVT); 2712 } 2713 2714 /// Return true if folding a vector load into ExtVal (a sign, zero, or any 2715 /// extend node) is profitable. isVectorLoadExtDesirable(SDValue ExtVal)2716 virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; } 2717 2718 /// Return true if an fneg operation is free to the point where it is never 2719 /// worthwhile to replace it with a bitwise operation. isFNegFree(EVT VT)2720 virtual bool isFNegFree(EVT VT) const { 2721 assert(VT.isFloatingPoint()); 2722 return false; 2723 } 2724 2725 /// Return true if an fabs operation is free to the point where it is never 2726 /// worthwhile to replace it with a bitwise operation. isFAbsFree(EVT VT)2727 virtual bool isFAbsFree(EVT VT) const { 2728 assert(VT.isFloatingPoint()); 2729 return false; 2730 } 2731 2732 /// Return true if an FMA operation is faster than a pair of fmul and fadd 2733 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method 2734 /// returns true, otherwise fmuladd is expanded to fmul + fadd. 2735 /// 2736 /// NOTE: This may be called before legalization on types for which FMAs are 2737 /// not legal, but should return true if those types will eventually legalize 2738 /// to types that support FMAs. After legalization, it will only be called on 2739 /// types that support FMAs (via Legal or Custom actions) isFMAFasterThanFMulAndFAdd(const MachineFunction & MF,EVT)2740 virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 2741 EVT) const { 2742 return false; 2743 } 2744 2745 /// IR version isFMAFasterThanFMulAndFAdd(const Function & F,Type *)2746 virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const { 2747 return false; 2748 } 2749 2750 /// Returns true if be combined with to form an ISD::FMAD. \p N may be an 2751 /// ISD::FADD, ISD::FSUB, or an ISD::FMUL which will be distributed into an 2752 /// fadd/fsub. isFMADLegal(const SelectionDAG & DAG,const SDNode * N)2753 virtual bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const { 2754 assert((N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB || 2755 N->getOpcode() == ISD::FMUL) && 2756 "unexpected node in FMAD forming combine"); 2757 return isOperationLegal(ISD::FMAD, N->getValueType(0)); 2758 } 2759 2760 // Return true when the decision to generate FMA's (or FMS, FMLA etc) rather 2761 // than FMUL and ADD is delegated to the machine combiner. generateFMAsInMachineCombiner(EVT VT,CodeGenOpt::Level OptLevel)2762 virtual bool generateFMAsInMachineCombiner(EVT VT, 2763 CodeGenOpt::Level OptLevel) const { 2764 return false; 2765 } 2766 2767 /// Return true if it's profitable to narrow operations of type VT1 to 2768 /// VT2. e.g. on x86, it's profitable to narrow from i32 to i8 but not from 2769 /// i32 to i16. isNarrowingProfitable(EVT,EVT)2770 virtual bool isNarrowingProfitable(EVT /*VT1*/, EVT /*VT2*/) const { 2771 return false; 2772 } 2773 2774 /// Return true if it is beneficial to convert a load of a constant to 2775 /// just the constant itself. 2776 /// On some targets it might be more efficient to use a combination of 2777 /// arithmetic instructions to materialize the constant instead of loading it 2778 /// from a constant pool. shouldConvertConstantLoadToIntImm(const APInt & Imm,Type * Ty)2779 virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 2780 Type *Ty) const { 2781 return false; 2782 } 2783 2784 /// Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type 2785 /// from this source type with this index. This is needed because 2786 /// EXTRACT_SUBVECTOR usually has custom lowering that depends on the index of 2787 /// the first element, and only the target knows which lowering is cheap. isExtractSubvectorCheap(EVT ResVT,EVT SrcVT,unsigned Index)2788 virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 2789 unsigned Index) const { 2790 return false; 2791 } 2792 2793 /// Try to convert an extract element of a vector binary operation into an 2794 /// extract element followed by a scalar operation. shouldScalarizeBinop(SDValue VecOp)2795 virtual bool shouldScalarizeBinop(SDValue VecOp) const { 2796 return false; 2797 } 2798 2799 /// Return true if extraction of a scalar element from the given vector type 2800 /// at the given index is cheap. For example, if scalar operations occur on 2801 /// the same register file as vector operations, then an extract element may 2802 /// be a sub-register rename rather than an actual instruction. isExtractVecEltCheap(EVT VT,unsigned Index)2803 virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const { 2804 return false; 2805 } 2806 2807 /// Try to convert math with an overflow comparison into the corresponding DAG 2808 /// node operation. Targets may want to override this independently of whether 2809 /// the operation is legal/custom for the given type because it may obscure 2810 /// matching of other patterns. shouldFormOverflowOp(unsigned Opcode,EVT VT,bool MathUsed)2811 virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 2812 bool MathUsed) const { 2813 // TODO: The default logic is inherited from code in CodeGenPrepare. 2814 // The opcode should not make a difference by default? 2815 if (Opcode != ISD::UADDO) 2816 return false; 2817 2818 // Allow the transform as long as we have an integer type that is not 2819 // obviously illegal and unsupported and if the math result is used 2820 // besides the overflow check. On some targets (e.g. SPARC), it is 2821 // not profitable to form on overflow op if the math result has no 2822 // concrete users. 2823 if (VT.isVector()) 2824 return false; 2825 return MathUsed && (VT.isSimple() || !isOperationExpand(Opcode, VT)); 2826 } 2827 2828 // Return true if it is profitable to use a scalar input to a BUILD_VECTOR 2829 // even if the vector itself has multiple uses. aggressivelyPreferBuildVectorSources(EVT VecVT)2830 virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const { 2831 return false; 2832 } 2833 2834 // Return true if CodeGenPrepare should consider splitting large offset of a 2835 // GEP to make the GEP fit into the addressing mode and can be sunk into the 2836 // same blocks of its users. shouldConsiderGEPOffsetSplit()2837 virtual bool shouldConsiderGEPOffsetSplit() const { return false; } 2838 2839 /// Return true if creating a shift of the type by the given 2840 /// amount is not profitable. shouldAvoidTransformToShift(EVT VT,unsigned Amount)2841 virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const { 2842 return false; 2843 } 2844 2845 /// Does this target require the clearing of high-order bits in a register 2846 /// passed to the fp16 to fp conversion library function. shouldKeepZExtForFP16Conv()2847 virtual bool shouldKeepZExtForFP16Conv() const { return false; } 2848 2849 //===--------------------------------------------------------------------===// 2850 // Runtime Library hooks 2851 // 2852 2853 /// Rename the default libcall routine name for the specified libcall. setLibcallName(RTLIB::Libcall Call,const char * Name)2854 void setLibcallName(RTLIB::Libcall Call, const char *Name) { 2855 LibcallRoutineNames[Call] = Name; 2856 } 2857 2858 /// Get the libcall routine name for the specified libcall. getLibcallName(RTLIB::Libcall Call)2859 const char *getLibcallName(RTLIB::Libcall Call) const { 2860 return LibcallRoutineNames[Call]; 2861 } 2862 2863 /// Override the default CondCode to be used to test the result of the 2864 /// comparison libcall against zero. setCmpLibcallCC(RTLIB::Libcall Call,ISD::CondCode CC)2865 void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) { 2866 CmpLibcallCCs[Call] = CC; 2867 } 2868 2869 /// Get the CondCode that's to be used to test the result of the comparison 2870 /// libcall against zero. getCmpLibcallCC(RTLIB::Libcall Call)2871 ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const { 2872 return CmpLibcallCCs[Call]; 2873 } 2874 2875 /// Set the CallingConv that should be used for the specified libcall. setLibcallCallingConv(RTLIB::Libcall Call,CallingConv::ID CC)2876 void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) { 2877 LibcallCallingConvs[Call] = CC; 2878 } 2879 2880 /// Get the CallingConv that should be used for the specified libcall. getLibcallCallingConv(RTLIB::Libcall Call)2881 CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const { 2882 return LibcallCallingConvs[Call]; 2883 } 2884 2885 /// Execute target specific actions to finalize target lowering. 2886 /// This is used to set extra flags in MachineFrameInformation and freezing 2887 /// the set of reserved registers. 2888 /// The default implementation just freezes the set of reserved registers. 2889 virtual void finalizeLowering(MachineFunction &MF) const; 2890 2891 //===----------------------------------------------------------------------===// 2892 // GlobalISel Hooks 2893 //===----------------------------------------------------------------------===// 2894 /// Check whether or not \p MI needs to be moved close to its uses. 2895 virtual bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const; 2896 2897 2898 private: 2899 const TargetMachine &TM; 2900 2901 /// Tells the code generator that the target has multiple (allocatable) 2902 /// condition registers that can be used to store the results of comparisons 2903 /// for use by selects and conditional branches. With multiple condition 2904 /// registers, the code generator will not aggressively sink comparisons into 2905 /// the blocks of their users. 2906 bool HasMultipleConditionRegisters; 2907 2908 /// Tells the code generator that the target has BitExtract instructions. 2909 /// The code generator will aggressively sink "shift"s into the blocks of 2910 /// their users if the users will generate "and" instructions which can be 2911 /// combined with "shift" to BitExtract instructions. 2912 bool HasExtractBitsInsn; 2913 2914 /// Tells the code generator to bypass slow divide or remainder 2915 /// instructions. For example, BypassSlowDivWidths[32,8] tells the code 2916 /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer 2917 /// div/rem when the operands are positive and less than 256. 2918 DenseMap <unsigned int, unsigned int> BypassSlowDivWidths; 2919 2920 /// Tells the code generator that it shouldn't generate extra flow control 2921 /// instructions and should attempt to combine flow control instructions via 2922 /// predication. 2923 bool JumpIsExpensive; 2924 2925 /// Information about the contents of the high-bits in boolean values held in 2926 /// a type wider than i1. See getBooleanContents. 2927 BooleanContent BooleanContents; 2928 2929 /// Information about the contents of the high-bits in boolean values held in 2930 /// a type wider than i1. See getBooleanContents. 2931 BooleanContent BooleanFloatContents; 2932 2933 /// Information about the contents of the high-bits in boolean vector values 2934 /// when the element type is wider than i1. See getBooleanContents. 2935 BooleanContent BooleanVectorContents; 2936 2937 /// The target scheduling preference: shortest possible total cycles or lowest 2938 /// register usage. 2939 Sched::Preference SchedPreferenceInfo; 2940 2941 /// The minimum alignment that any argument on the stack needs to have. 2942 Align MinStackArgumentAlignment; 2943 2944 /// The minimum function alignment (used when optimizing for size, and to 2945 /// prevent explicitly provided alignment from leading to incorrect code). 2946 Align MinFunctionAlignment; 2947 2948 /// The preferred function alignment (used when alignment unspecified and 2949 /// optimizing for speed). 2950 Align PrefFunctionAlignment; 2951 2952 /// The preferred loop alignment (in log2 bot in bytes). 2953 Align PrefLoopAlignment; 2954 2955 /// Size in bits of the maximum atomics size the backend supports. 2956 /// Accesses larger than this will be expanded by AtomicExpandPass. 2957 unsigned MaxAtomicSizeInBitsSupported; 2958 2959 /// Size in bits of the minimum cmpxchg or ll/sc operation the 2960 /// backend supports. 2961 unsigned MinCmpXchgSizeInBits; 2962 2963 /// This indicates if the target supports unaligned atomic operations. 2964 bool SupportsUnalignedAtomics; 2965 2966 /// If set to a physical register, this specifies the register that 2967 /// llvm.savestack/llvm.restorestack should save and restore. 2968 Register StackPointerRegisterToSaveRestore; 2969 2970 /// This indicates the default register class to use for each ValueType the 2971 /// target supports natively. 2972 const TargetRegisterClass *RegClassForVT[MVT::VALUETYPE_SIZE]; 2973 uint16_t NumRegistersForVT[MVT::VALUETYPE_SIZE]; 2974 MVT RegisterTypeForVT[MVT::VALUETYPE_SIZE]; 2975 2976 /// This indicates the "representative" register class to use for each 2977 /// ValueType the target supports natively. This information is used by the 2978 /// scheduler to track register pressure. By default, the representative 2979 /// register class is the largest legal super-reg register class of the 2980 /// register class of the specified type. e.g. On x86, i8, i16, and i32's 2981 /// representative class would be GR32. 2982 const TargetRegisterClass *RepRegClassForVT[MVT::VALUETYPE_SIZE]; 2983 2984 /// This indicates the "cost" of the "representative" register class for each 2985 /// ValueType. The cost is used by the scheduler to approximate register 2986 /// pressure. 2987 uint8_t RepRegClassCostForVT[MVT::VALUETYPE_SIZE]; 2988 2989 /// For any value types we are promoting or expanding, this contains the value 2990 /// type that we are changing to. For Expanded types, this contains one step 2991 /// of the expand (e.g. i64 -> i32), even if there are multiple steps required 2992 /// (e.g. i64 -> i16). For types natively supported by the system, this holds 2993 /// the same type (e.g. i32 -> i32). 2994 MVT TransformToType[MVT::VALUETYPE_SIZE]; 2995 2996 /// For each operation and each value type, keep a LegalizeAction that 2997 /// indicates how instruction selection should deal with the operation. Most 2998 /// operations are Legal (aka, supported natively by the target), but 2999 /// operations that are not should be described. Note that operations on 3000 /// non-legal value types are not described here. 3001 LegalizeAction OpActions[MVT::VALUETYPE_SIZE][ISD::BUILTIN_OP_END]; 3002 3003 /// For each load extension type and each value type, keep a LegalizeAction 3004 /// that indicates how instruction selection should deal with a load of a 3005 /// specific value type and extension type. Uses 4-bits to store the action 3006 /// for each of the 4 load ext types. 3007 uint16_t LoadExtActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE]; 3008 3009 /// For each value type pair keep a LegalizeAction that indicates whether a 3010 /// truncating store of a specific value type and truncating type is legal. 3011 LegalizeAction TruncStoreActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE]; 3012 3013 /// For each indexed mode and each value type, keep a quad of LegalizeAction 3014 /// that indicates how instruction selection should deal with the load / 3015 /// store / maskedload / maskedstore. 3016 /// 3017 /// The first dimension is the value_type for the reference. The second 3018 /// dimension represents the various modes for load store. 3019 uint16_t IndexedModeActions[MVT::VALUETYPE_SIZE][ISD::LAST_INDEXED_MODE]; 3020 3021 /// For each condition code (ISD::CondCode) keep a LegalizeAction that 3022 /// indicates how instruction selection should deal with the condition code. 3023 /// 3024 /// Because each CC action takes up 4 bits, we need to have the array size be 3025 /// large enough to fit all of the value types. This can be done by rounding 3026 /// up the MVT::VALUETYPE_SIZE value to the next multiple of 8. 3027 uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::VALUETYPE_SIZE + 7) / 8]; 3028 3029 ValueTypeActionImpl ValueTypeActions; 3030 3031 private: 3032 LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const; 3033 3034 /// Targets can specify ISD nodes that they would like PerformDAGCombine 3035 /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this 3036 /// array. 3037 unsigned char 3038 TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT-1)/CHAR_BIT]; 3039 3040 /// For operations that must be promoted to a specific type, this holds the 3041 /// destination type. This map should be sparse, so don't hold it as an 3042 /// array. 3043 /// 3044 /// Targets add entries to this map with AddPromotedToType(..), clients access 3045 /// this with getTypeToPromoteTo(..). 3046 std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType> 3047 PromoteToType; 3048 3049 /// Stores the name each libcall. 3050 const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL + 1]; 3051 3052 /// The ISD::CondCode that should be used to test the result of each of the 3053 /// comparison libcall against zero. 3054 ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL]; 3055 3056 /// Stores the CallingConv that should be used for each libcall. 3057 CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL]; 3058 3059 /// Set default libcall names and calling conventions. 3060 void InitLibcalls(const Triple &TT); 3061 3062 /// The bits of IndexedModeActions used to store the legalisation actions 3063 /// We store the data as | ML | MS | L | S | each taking 4 bits. 3064 enum IndexedModeActionsBits { 3065 IMAB_Store = 0, 3066 IMAB_Load = 4, 3067 IMAB_MaskedStore = 8, 3068 IMAB_MaskedLoad = 12 3069 }; 3070 setIndexedModeAction(unsigned IdxMode,MVT VT,unsigned Shift,LegalizeAction Action)3071 void setIndexedModeAction(unsigned IdxMode, MVT VT, unsigned Shift, 3072 LegalizeAction Action) { 3073 assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && 3074 (unsigned)Action < 0xf && "Table isn't big enough!"); 3075 unsigned Ty = (unsigned)VT.SimpleTy; 3076 IndexedModeActions[Ty][IdxMode] &= ~(0xf << Shift); 3077 IndexedModeActions[Ty][IdxMode] |= ((uint16_t)Action) << Shift; 3078 } 3079 getIndexedModeAction(unsigned IdxMode,MVT VT,unsigned Shift)3080 LegalizeAction getIndexedModeAction(unsigned IdxMode, MVT VT, 3081 unsigned Shift) const { 3082 assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && 3083 "Table isn't big enough!"); 3084 unsigned Ty = (unsigned)VT.SimpleTy; 3085 return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] >> Shift) & 0xf); 3086 } 3087 3088 protected: 3089 /// Return true if the extension represented by \p I is free. 3090 /// \pre \p I is a sign, zero, or fp extension and 3091 /// is[Z|FP]ExtFree of the related types is not true. isExtFreeImpl(const Instruction * I)3092 virtual bool isExtFreeImpl(const Instruction *I) const { return false; } 3093 3094 /// Depth that GatherAllAliases should should continue looking for chain 3095 /// dependencies when trying to find a more preferable chain. As an 3096 /// approximation, this should be more than the number of consecutive stores 3097 /// expected to be merged. 3098 unsigned GatherAllAliasesMaxDepth; 3099 3100 /// \brief Specify maximum number of store instructions per memset call. 3101 /// 3102 /// When lowering \@llvm.memset this field specifies the maximum number of 3103 /// store operations that may be substituted for the call to memset. Targets 3104 /// must set this value based on the cost threshold for that target. Targets 3105 /// should assume that the memset will be done using as many of the largest 3106 /// store operations first, followed by smaller ones, if necessary, per 3107 /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine 3108 /// with 16-bit alignment would result in four 2-byte stores and one 1-byte 3109 /// store. This only applies to setting a constant array of a constant size. 3110 unsigned MaxStoresPerMemset; 3111 /// Likewise for functions with the OptSize attribute. 3112 unsigned MaxStoresPerMemsetOptSize; 3113 3114 /// \brief Specify maximum number of store instructions per memcpy call. 3115 /// 3116 /// When lowering \@llvm.memcpy this field specifies the maximum number of 3117 /// store operations that may be substituted for a call to memcpy. Targets 3118 /// must set this value based on the cost threshold for that target. Targets 3119 /// should assume that the memcpy will be done using as many of the largest 3120 /// store operations first, followed by smaller ones, if necessary, per 3121 /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine 3122 /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store 3123 /// and one 1-byte store. This only applies to copying a constant array of 3124 /// constant size. 3125 unsigned MaxStoresPerMemcpy; 3126 /// Likewise for functions with the OptSize attribute. 3127 unsigned MaxStoresPerMemcpyOptSize; 3128 /// \brief Specify max number of store instructions to glue in inlined memcpy. 3129 /// 3130 /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number 3131 /// of store instructions to keep together. This helps in pairing and 3132 // vectorization later on. 3133 unsigned MaxGluedStoresPerMemcpy = 0; 3134 3135 /// \brief Specify maximum number of load instructions per memcmp call. 3136 /// 3137 /// When lowering \@llvm.memcmp this field specifies the maximum number of 3138 /// pairs of load operations that may be substituted for a call to memcmp. 3139 /// Targets must set this value based on the cost threshold for that target. 3140 /// Targets should assume that the memcmp will be done using as many of the 3141 /// largest load operations first, followed by smaller ones, if necessary, per 3142 /// alignment restrictions. For example, loading 7 bytes on a 32-bit machine 3143 /// with 32-bit alignment would result in one 4-byte load, a one 2-byte load 3144 /// and one 1-byte load. This only applies to copying a constant array of 3145 /// constant size. 3146 unsigned MaxLoadsPerMemcmp; 3147 /// Likewise for functions with the OptSize attribute. 3148 unsigned MaxLoadsPerMemcmpOptSize; 3149 3150 /// \brief Specify maximum number of store instructions per memmove call. 3151 /// 3152 /// When lowering \@llvm.memmove this field specifies the maximum number of 3153 /// store instructions that may be substituted for a call to memmove. Targets 3154 /// must set this value based on the cost threshold for that target. Targets 3155 /// should assume that the memmove will be done using as many of the largest 3156 /// store operations first, followed by smaller ones, if necessary, per 3157 /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine 3158 /// with 8-bit alignment would result in nine 1-byte stores. This only 3159 /// applies to copying a constant array of constant size. 3160 unsigned MaxStoresPerMemmove; 3161 /// Likewise for functions with the OptSize attribute. 3162 unsigned MaxStoresPerMemmoveOptSize; 3163 3164 /// Tells the code generator that select is more expensive than a branch if 3165 /// the branch is usually predicted right. 3166 bool PredictableSelectIsExpensive; 3167 3168 /// \see enableExtLdPromotion. 3169 bool EnableExtLdPromotion; 3170 3171 /// Return true if the value types that can be represented by the specified 3172 /// register class are all legal. 3173 bool isLegalRC(const TargetRegisterInfo &TRI, 3174 const TargetRegisterClass &RC) const; 3175 3176 /// Replace/modify any TargetFrameIndex operands with a targte-dependent 3177 /// sequence of memory operands that is recognized by PrologEpilogInserter. 3178 MachineBasicBlock *emitPatchPoint(MachineInstr &MI, 3179 MachineBasicBlock *MBB) const; 3180 3181 bool IsStrictFPEnabled; 3182 }; 3183 3184 /// This class defines information used to lower LLVM code to legal SelectionDAG 3185 /// operators that the target instruction selector can accept natively. 3186 /// 3187 /// This class also defines callbacks that targets must implement to lower 3188 /// target-specific constructs to SelectionDAG operators. 3189 class TargetLowering : public TargetLoweringBase { 3190 public: 3191 struct DAGCombinerInfo; 3192 struct MakeLibCallOptions; 3193 3194 TargetLowering(const TargetLowering &) = delete; 3195 TargetLowering &operator=(const TargetLowering &) = delete; 3196 3197 explicit TargetLowering(const TargetMachine &TM); 3198 3199 bool isPositionIndependent() const; 3200 isSDNodeSourceOfDivergence(const SDNode * N,FunctionLoweringInfo * FLI,LegacyDivergenceAnalysis * DA)3201 virtual bool isSDNodeSourceOfDivergence(const SDNode *N, 3202 FunctionLoweringInfo *FLI, 3203 LegacyDivergenceAnalysis *DA) const { 3204 return false; 3205 } 3206 isSDNodeAlwaysUniform(const SDNode * N)3207 virtual bool isSDNodeAlwaysUniform(const SDNode * N) const { 3208 return false; 3209 } 3210 3211 /// Returns true by value, base pointer and offset pointer and addressing mode 3212 /// by reference if the node's address can be legally represented as 3213 /// pre-indexed load / store address. getPreIndexedAddressParts(SDNode *,SDValue &,SDValue &,ISD::MemIndexedMode &,SelectionDAG &)3214 virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/, 3215 SDValue &/*Offset*/, 3216 ISD::MemIndexedMode &/*AM*/, 3217 SelectionDAG &/*DAG*/) const { 3218 return false; 3219 } 3220 3221 /// Returns true by value, base pointer and offset pointer and addressing mode 3222 /// by reference if this node can be combined with a load / store to form a 3223 /// post-indexed load / store. getPostIndexedAddressParts(SDNode *,SDNode *,SDValue &,SDValue &,ISD::MemIndexedMode &,SelectionDAG &)3224 virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/, 3225 SDValue &/*Base*/, 3226 SDValue &/*Offset*/, 3227 ISD::MemIndexedMode &/*AM*/, 3228 SelectionDAG &/*DAG*/) const { 3229 return false; 3230 } 3231 3232 /// Returns true if the specified base+offset is a legal indexed addressing 3233 /// mode for this target. \p MI is the load or store instruction that is being 3234 /// considered for transformation. isIndexingLegal(MachineInstr & MI,Register Base,Register Offset,bool IsPre,MachineRegisterInfo & MRI)3235 virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset, 3236 bool IsPre, MachineRegisterInfo &MRI) const { 3237 return false; 3238 } 3239 3240 /// Return the entry encoding for a jump table in the current function. The 3241 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. 3242 virtual unsigned getJumpTableEncoding() const; 3243 3244 virtual const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *,const MachineBasicBlock *,unsigned,MCContext &)3245 LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/, 3246 const MachineBasicBlock * /*MBB*/, unsigned /*uid*/, 3247 MCContext &/*Ctx*/) const { 3248 llvm_unreachable("Need to implement this hook if target has custom JTIs"); 3249 } 3250 3251 /// Returns relocation base for the given PIC jumptable. 3252 virtual SDValue getPICJumpTableRelocBase(SDValue Table, 3253 SelectionDAG &DAG) const; 3254 3255 /// This returns the relocation base for the given PIC jumptable, the same as 3256 /// getPICJumpTableRelocBase, but as an MCExpr. 3257 virtual const MCExpr * 3258 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 3259 unsigned JTI, MCContext &Ctx) const; 3260 3261 /// Return true if folding a constant offset with the given GlobalAddress is 3262 /// legal. It is frequently not legal in PIC relocation models. 3263 virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; 3264 3265 bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, 3266 SDValue &Chain) const; 3267 3268 void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, 3269 SDValue &NewRHS, ISD::CondCode &CCCode, 3270 const SDLoc &DL, const SDValue OldLHS, 3271 const SDValue OldRHS) const; 3272 3273 void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, 3274 SDValue &NewRHS, ISD::CondCode &CCCode, 3275 const SDLoc &DL, const SDValue OldLHS, 3276 const SDValue OldRHS, SDValue &Chain, 3277 bool IsSignaling = false) const; 3278 3279 /// Returns a pair of (return value, chain). 3280 /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC. 3281 std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, 3282 EVT RetVT, ArrayRef<SDValue> Ops, 3283 MakeLibCallOptions CallOptions, 3284 const SDLoc &dl, 3285 SDValue Chain = SDValue()) const; 3286 3287 /// Check whether parameters to a call that are passed in callee saved 3288 /// registers are the same as from the calling function. This needs to be 3289 /// checked for tail call eligibility. 3290 bool parametersInCSRMatch(const MachineRegisterInfo &MRI, 3291 const uint32_t *CallerPreservedMask, 3292 const SmallVectorImpl<CCValAssign> &ArgLocs, 3293 const SmallVectorImpl<SDValue> &OutVals) const; 3294 3295 //===--------------------------------------------------------------------===// 3296 // TargetLowering Optimization Methods 3297 // 3298 3299 /// A convenience struct that encapsulates a DAG, and two SDValues for 3300 /// returning information from TargetLowering to its clients that want to 3301 /// combine. 3302 struct TargetLoweringOpt { 3303 SelectionDAG &DAG; 3304 bool LegalTys; 3305 bool LegalOps; 3306 SDValue Old; 3307 SDValue New; 3308 TargetLoweringOptTargetLoweringOpt3309 explicit TargetLoweringOpt(SelectionDAG &InDAG, 3310 bool LT, bool LO) : 3311 DAG(InDAG), LegalTys(LT), LegalOps(LO) {} 3312 LegalTypesTargetLoweringOpt3313 bool LegalTypes() const { return LegalTys; } LegalOperationsTargetLoweringOpt3314 bool LegalOperations() const { return LegalOps; } 3315 CombineToTargetLoweringOpt3316 bool CombineTo(SDValue O, SDValue N) { 3317 Old = O; 3318 New = N; 3319 return true; 3320 } 3321 }; 3322 3323 /// Determines the optimal series of memory ops to replace the memset / memcpy. 3324 /// Return true if the number of memory ops is below the threshold (Limit). 3325 /// It returns the types of the sequence of memory ops to perform 3326 /// memset / memcpy by reference. 3327 bool findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit, 3328 const MemOp &Op, unsigned DstAS, unsigned SrcAS, 3329 const AttributeList &FuncAttributes) const; 3330 3331 /// Check to see if the specified operand of the specified instruction is a 3332 /// constant integer. If so, check to see if there are any bits set in the 3333 /// constant that are not demanded. If so, shrink the constant and return 3334 /// true. 3335 bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 3336 const APInt &DemandedElts, 3337 TargetLoweringOpt &TLO) const; 3338 3339 /// Helper wrapper around ShrinkDemandedConstant, demanding all elements. 3340 bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 3341 TargetLoweringOpt &TLO) const; 3342 3343 // Target hook to do target-specific const optimization, which is called by 3344 // ShrinkDemandedConstant. This function should return true if the target 3345 // doesn't want ShrinkDemandedConstant to further optimize the constant. targetShrinkDemandedConstant(SDValue Op,const APInt & DemandedBits,const APInt & DemandedElts,TargetLoweringOpt & TLO)3346 virtual bool targetShrinkDemandedConstant(SDValue Op, 3347 const APInt &DemandedBits, 3348 const APInt &DemandedElts, 3349 TargetLoweringOpt &TLO) const { 3350 return false; 3351 } 3352 3353 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. This 3354 /// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be 3355 /// generalized for targets with other types of implicit widening casts. 3356 bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded, 3357 TargetLoweringOpt &TLO) const; 3358 3359 /// Look at Op. At this point, we know that only the DemandedBits bits of the 3360 /// result of Op are ever used downstream. If we can use this information to 3361 /// simplify Op, create a new simplified DAG node and return true, returning 3362 /// the original and new nodes in Old and New. Otherwise, analyze the 3363 /// expression and return a mask of KnownOne and KnownZero bits for the 3364 /// expression (used to simplify the caller). The KnownZero/One bits may only 3365 /// be accurate for those bits in the Demanded masks. 3366 /// \p AssumeSingleUse When this parameter is true, this function will 3367 /// attempt to simplify \p Op even if there are multiple uses. 3368 /// Callers are responsible for correctly updating the DAG based on the 3369 /// results of this function, because simply replacing replacing TLO.Old 3370 /// with TLO.New will be incorrect when this parameter is true and TLO.Old 3371 /// has multiple uses. 3372 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, 3373 const APInt &DemandedElts, KnownBits &Known, 3374 TargetLoweringOpt &TLO, unsigned Depth = 0, 3375 bool AssumeSingleUse = false) const; 3376 3377 /// Helper wrapper around SimplifyDemandedBits, demanding all elements. 3378 /// Adds Op back to the worklist upon success. 3379 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, 3380 KnownBits &Known, TargetLoweringOpt &TLO, 3381 unsigned Depth = 0, 3382 bool AssumeSingleUse = false) const; 3383 3384 /// Helper wrapper around SimplifyDemandedBits. 3385 /// Adds Op back to the worklist upon success. 3386 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, 3387 DAGCombinerInfo &DCI) const; 3388 3389 /// More limited version of SimplifyDemandedBits that can be used to "look 3390 /// through" ops that don't contribute to the DemandedBits/DemandedElts - 3391 /// bitwise ops etc. 3392 SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, 3393 const APInt &DemandedElts, 3394 SelectionDAG &DAG, 3395 unsigned Depth) const; 3396 3397 /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all 3398 /// elements. 3399 SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, 3400 SelectionDAG &DAG, 3401 unsigned Depth = 0) const; 3402 3403 /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all 3404 /// bits from only some vector elements. 3405 SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, 3406 const APInt &DemandedElts, 3407 SelectionDAG &DAG, 3408 unsigned Depth = 0) const; 3409 3410 /// Look at Vector Op. At this point, we know that only the DemandedElts 3411 /// elements of the result of Op are ever used downstream. If we can use 3412 /// this information to simplify Op, create a new simplified DAG node and 3413 /// return true, storing the original and new nodes in TLO. 3414 /// Otherwise, analyze the expression and return a mask of KnownUndef and 3415 /// KnownZero elements for the expression (used to simplify the caller). 3416 /// The KnownUndef/Zero elements may only be accurate for those bits 3417 /// in the DemandedMask. 3418 /// \p AssumeSingleUse When this parameter is true, this function will 3419 /// attempt to simplify \p Op even if there are multiple uses. 3420 /// Callers are responsible for correctly updating the DAG based on the 3421 /// results of this function, because simply replacing replacing TLO.Old 3422 /// with TLO.New will be incorrect when this parameter is true and TLO.Old 3423 /// has multiple uses. 3424 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, 3425 APInt &KnownUndef, APInt &KnownZero, 3426 TargetLoweringOpt &TLO, unsigned Depth = 0, 3427 bool AssumeSingleUse = false) const; 3428 3429 /// Helper wrapper around SimplifyDemandedVectorElts. 3430 /// Adds Op back to the worklist upon success. 3431 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, 3432 APInt &KnownUndef, APInt &KnownZero, 3433 DAGCombinerInfo &DCI) const; 3434 3435 /// Determine which of the bits specified in Mask are known to be either zero 3436 /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts 3437 /// argument allows us to only collect the known bits that are shared by the 3438 /// requested vector elements. 3439 virtual void computeKnownBitsForTargetNode(const SDValue Op, 3440 KnownBits &Known, 3441 const APInt &DemandedElts, 3442 const SelectionDAG &DAG, 3443 unsigned Depth = 0) const; 3444 3445 /// Determine which of the bits specified in Mask are known to be either zero 3446 /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts 3447 /// argument allows us to only collect the known bits that are shared by the 3448 /// requested vector elements. This is for GISel. 3449 virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, 3450 Register R, KnownBits &Known, 3451 const APInt &DemandedElts, 3452 const MachineRegisterInfo &MRI, 3453 unsigned Depth = 0) const; 3454 3455 /// Determine the known alignment for the pointer value \p R. This is can 3456 /// typically be inferred from the number of low known 0 bits. However, for a 3457 /// pointer with a non-integral address space, the alignment value may be 3458 /// independent from the known low bits. 3459 virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, 3460 Register R, 3461 const MachineRegisterInfo &MRI, 3462 unsigned Depth = 0) const; 3463 3464 /// Determine which of the bits of FrameIndex \p FIOp are known to be 0. 3465 /// Default implementation computes low bits based on alignment 3466 /// information. This should preserve known bits passed into it. 3467 virtual void computeKnownBitsForFrameIndex(int FIOp, 3468 KnownBits &Known, 3469 const MachineFunction &MF) const; 3470 3471 /// This method can be implemented by targets that want to expose additional 3472 /// information about sign bits to the DAG Combiner. The DemandedElts 3473 /// argument allows us to only collect the minimum sign bits that are shared 3474 /// by the requested vector elements. 3475 virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 3476 const APInt &DemandedElts, 3477 const SelectionDAG &DAG, 3478 unsigned Depth = 0) const; 3479 3480 /// This method can be implemented by targets that want to expose additional 3481 /// information about sign bits to GlobalISel combiners. The DemandedElts 3482 /// argument allows us to only collect the minimum sign bits that are shared 3483 /// by the requested vector elements. 3484 virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, 3485 Register R, 3486 const APInt &DemandedElts, 3487 const MachineRegisterInfo &MRI, 3488 unsigned Depth = 0) const; 3489 3490 /// Attempt to simplify any target nodes based on the demanded vector 3491 /// elements, returning true on success. Otherwise, analyze the expression and 3492 /// return a mask of KnownUndef and KnownZero elements for the expression 3493 /// (used to simplify the caller). The KnownUndef/Zero elements may only be 3494 /// accurate for those bits in the DemandedMask. 3495 virtual bool SimplifyDemandedVectorEltsForTargetNode( 3496 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, 3497 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const; 3498 3499 /// Attempt to simplify any target nodes based on the demanded bits/elts, 3500 /// returning true on success. Otherwise, analyze the 3501 /// expression and return a mask of KnownOne and KnownZero bits for the 3502 /// expression (used to simplify the caller). The KnownZero/One bits may only 3503 /// be accurate for those bits in the Demanded masks. 3504 virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, 3505 const APInt &DemandedBits, 3506 const APInt &DemandedElts, 3507 KnownBits &Known, 3508 TargetLoweringOpt &TLO, 3509 unsigned Depth = 0) const; 3510 3511 /// More limited version of SimplifyDemandedBits that can be used to "look 3512 /// through" ops that don't contribute to the DemandedBits/DemandedElts - 3513 /// bitwise ops etc. 3514 virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode( 3515 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 3516 SelectionDAG &DAG, unsigned Depth) const; 3517 3518 /// Return true if this function can prove that \p Op is never poison 3519 /// and, if \p PoisonOnly is false, does not have undef bits. The DemandedElts 3520 /// argument limits the check to the requested vector elements. 3521 virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode( 3522 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 3523 bool PoisonOnly, unsigned Depth) const; 3524 3525 /// Tries to build a legal vector shuffle using the provided parameters 3526 /// or equivalent variations. The Mask argument maybe be modified as the 3527 /// function tries different variations. 3528 /// Returns an empty SDValue if the operation fails. 3529 SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, 3530 SDValue N1, MutableArrayRef<int> Mask, 3531 SelectionDAG &DAG) const; 3532 3533 /// This method returns the constant pool value that will be loaded by LD. 3534 /// NOTE: You must check for implicit extensions of the constant by LD. 3535 virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const; 3536 3537 /// If \p SNaN is false, \returns true if \p Op is known to never be any 3538 /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling 3539 /// NaN. 3540 virtual bool isKnownNeverNaNForTargetNode(SDValue Op, 3541 const SelectionDAG &DAG, 3542 bool SNaN = false, 3543 unsigned Depth = 0) const; 3544 struct DAGCombinerInfo { 3545 void *DC; // The DAG Combiner object. 3546 CombineLevel Level; 3547 bool CalledByLegalizer; 3548 3549 public: 3550 SelectionDAG &DAG; 3551 DAGCombinerInfoDAGCombinerInfo3552 DAGCombinerInfo(SelectionDAG &dag, CombineLevel level, bool cl, void *dc) 3553 : DC(dc), Level(level), CalledByLegalizer(cl), DAG(dag) {} 3554 isBeforeLegalizeDAGCombinerInfo3555 bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; } isBeforeLegalizeOpsDAGCombinerInfo3556 bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; } isAfterLegalizeDAGDAGCombinerInfo3557 bool isAfterLegalizeDAG() const { return Level >= AfterLegalizeDAG; } getDAGCombineLevelDAGCombinerInfo3558 CombineLevel getDAGCombineLevel() { return Level; } isCalledByLegalizerDAGCombinerInfo3559 bool isCalledByLegalizer() const { return CalledByLegalizer; } 3560 3561 void AddToWorklist(SDNode *N); 3562 SDValue CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo = true); 3563 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true); 3564 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true); 3565 3566 bool recursivelyDeleteUnusedNodes(SDNode *N); 3567 3568 void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO); 3569 }; 3570 3571 /// Return if the N is a constant or constant vector equal to the true value 3572 /// from getBooleanContents(). 3573 bool isConstTrueVal(const SDNode *N) const; 3574 3575 /// Return if the N is a constant or constant vector equal to the false value 3576 /// from getBooleanContents(). 3577 bool isConstFalseVal(const SDNode *N) const; 3578 3579 /// Return if \p N is a True value when extended to \p VT. 3580 bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const; 3581 3582 /// Try to simplify a setcc built with the specified operands and cc. If it is 3583 /// unable to simplify it, return a null SDValue. 3584 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 3585 bool foldBooleans, DAGCombinerInfo &DCI, 3586 const SDLoc &dl) const; 3587 3588 // For targets which wrap address, unwrap for analysis. unwrapAddress(SDValue N)3589 virtual SDValue unwrapAddress(SDValue N) const { return N; } 3590 3591 /// Returns true (and the GlobalValue and the offset) if the node is a 3592 /// GlobalAddress + offset. 3593 virtual bool 3594 isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const; 3595 3596 /// This method will be invoked for all target nodes and for any 3597 /// target-independent nodes that the target has registered with invoke it 3598 /// for. 3599 /// 3600 /// The semantics are as follows: 3601 /// Return Value: 3602 /// SDValue.Val == 0 - No change was made 3603 /// SDValue.Val == N - N was replaced, is dead, and is already handled. 3604 /// otherwise - N should be replaced by the returned Operand. 3605 /// 3606 /// In addition, methods provided by DAGCombinerInfo may be used to perform 3607 /// more complex transformations. 3608 /// 3609 virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; 3610 3611 /// Return true if it is profitable to move this shift by a constant amount 3612 /// though its operand, adjusting any immediate operands as necessary to 3613 /// preserve semantics. This transformation may not be desirable if it 3614 /// disrupts a particularly auspicious target-specific tree (e.g. bitfield 3615 /// extraction in AArch64). By default, it returns true. 3616 /// 3617 /// @param N the shift node 3618 /// @param Level the current DAGCombine legalization level. isDesirableToCommuteWithShift(const SDNode * N,CombineLevel Level)3619 virtual bool isDesirableToCommuteWithShift(const SDNode *N, 3620 CombineLevel Level) const { 3621 return true; 3622 } 3623 3624 /// Return true if the target has native support for the specified value type 3625 /// and it is 'desirable' to use the type for the given node type. e.g. On x86 3626 /// i16 is legal, but undesirable since i16 instruction encodings are longer 3627 /// and some i16 instructions are slow. isTypeDesirableForOp(unsigned,EVT VT)3628 virtual bool isTypeDesirableForOp(unsigned /*Opc*/, EVT VT) const { 3629 // By default, assume all legal types are desirable. 3630 return isTypeLegal(VT); 3631 } 3632 3633 /// Return true if it is profitable for dag combiner to transform a floating 3634 /// point op of specified opcode to a equivalent op of an integer 3635 /// type. e.g. f32 load -> i32 load can be profitable on ARM. isDesirableToTransformToIntegerOp(unsigned,EVT)3636 virtual bool isDesirableToTransformToIntegerOp(unsigned /*Opc*/, 3637 EVT /*VT*/) const { 3638 return false; 3639 } 3640 3641 /// This method query the target whether it is beneficial for dag combiner to 3642 /// promote the specified node. If true, it should return the desired 3643 /// promotion type by reference. IsDesirableToPromoteOp(SDValue,EVT &)3644 virtual bool IsDesirableToPromoteOp(SDValue /*Op*/, EVT &/*PVT*/) const { 3645 return false; 3646 } 3647 3648 /// Return true if the target supports swifterror attribute. It optimizes 3649 /// loads and stores to reading and writing a specific register. supportSwiftError()3650 virtual bool supportSwiftError() const { 3651 return false; 3652 } 3653 3654 /// Return true if the target supports that a subset of CSRs for the given 3655 /// machine function is handled explicitly via copies. supportSplitCSR(MachineFunction * MF)3656 virtual bool supportSplitCSR(MachineFunction *MF) const { 3657 return false; 3658 } 3659 3660 /// Perform necessary initialization to handle a subset of CSRs explicitly 3661 /// via copies. This function is called at the beginning of instruction 3662 /// selection. initializeSplitCSR(MachineBasicBlock * Entry)3663 virtual void initializeSplitCSR(MachineBasicBlock *Entry) const { 3664 llvm_unreachable("Not Implemented"); 3665 } 3666 3667 /// Insert explicit copies in entry and exit blocks. We copy a subset of 3668 /// CSRs to virtual registers in the entry block, and copy them back to 3669 /// physical registers in the exit blocks. This function is called at the end 3670 /// of instruction selection. insertCopiesSplitCSR(MachineBasicBlock * Entry,const SmallVectorImpl<MachineBasicBlock * > & Exits)3671 virtual void insertCopiesSplitCSR( 3672 MachineBasicBlock *Entry, 3673 const SmallVectorImpl<MachineBasicBlock *> &Exits) const { 3674 llvm_unreachable("Not Implemented"); 3675 } 3676 3677 /// Return the newly negated expression if the cost is not expensive and 3678 /// set the cost in \p Cost to indicate that if it is cheaper or neutral to 3679 /// do the negation. 3680 virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, 3681 bool LegalOps, bool OptForSize, 3682 NegatibleCost &Cost, 3683 unsigned Depth = 0) const; 3684 3685 /// This is the helper function to return the newly negated expression only 3686 /// when the cost is cheaper. 3687 SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, 3688 bool LegalOps, bool OptForSize, 3689 unsigned Depth = 0) const { 3690 NegatibleCost Cost = NegatibleCost::Expensive; 3691 SDValue Neg = 3692 getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); 3693 if (Neg && Cost == NegatibleCost::Cheaper) 3694 return Neg; 3695 // Remove the new created node to avoid the side effect to the DAG. 3696 if (Neg && Neg.getNode()->use_empty()) 3697 DAG.RemoveDeadNode(Neg.getNode()); 3698 return SDValue(); 3699 } 3700 3701 /// This is the helper function to return the newly negated expression if 3702 /// the cost is not expensive. 3703 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, 3704 bool OptForSize, unsigned Depth = 0) const { 3705 NegatibleCost Cost = NegatibleCost::Expensive; 3706 return getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); 3707 } 3708 3709 //===--------------------------------------------------------------------===// 3710 // Lowering methods - These methods must be implemented by targets so that 3711 // the SelectionDAGBuilder code knows how to lower these. 3712 // 3713 3714 /// Target-specific splitting of values into parts that fit a register 3715 /// storing a legal type splitValueIntoRegisterParts(SelectionDAG & DAG,const SDLoc & DL,SDValue Val,SDValue * Parts,unsigned NumParts,MVT PartVT,Optional<CallingConv::ID> CC)3716 virtual bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, 3717 SDValue Val, SDValue *Parts, 3718 unsigned NumParts, MVT PartVT, 3719 Optional<CallingConv::ID> CC) const { 3720 return false; 3721 } 3722 3723 /// Target-specific combining of register parts into its original value 3724 virtual SDValue joinRegisterPartsIntoValue(SelectionDAG & DAG,const SDLoc & DL,const SDValue * Parts,unsigned NumParts,MVT PartVT,EVT ValueVT,Optional<CallingConv::ID> CC)3725 joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, 3726 const SDValue *Parts, unsigned NumParts, 3727 MVT PartVT, EVT ValueVT, 3728 Optional<CallingConv::ID> CC) const { 3729 return SDValue(); 3730 } 3731 3732 /// This hook must be implemented to lower the incoming (formal) arguments, 3733 /// described by the Ins array, into the specified DAG. The implementation 3734 /// should fill in the InVals array with legal-type argument values, and 3735 /// return the resulting token chain value. LowerFormalArguments(SDValue,CallingConv::ID,bool,const SmallVectorImpl<ISD::InputArg> &,const SDLoc &,SelectionDAG &,SmallVectorImpl<SDValue> &)3736 virtual SDValue LowerFormalArguments( 3737 SDValue /*Chain*/, CallingConv::ID /*CallConv*/, bool /*isVarArg*/, 3738 const SmallVectorImpl<ISD::InputArg> & /*Ins*/, const SDLoc & /*dl*/, 3739 SelectionDAG & /*DAG*/, SmallVectorImpl<SDValue> & /*InVals*/) const { 3740 llvm_unreachable("Not Implemented"); 3741 } 3742 3743 /// This structure contains all information that is necessary for lowering 3744 /// calls. It is passed to TLI::LowerCallTo when the SelectionDAG builder 3745 /// needs to lower a call, and targets will see this struct in their LowerCall 3746 /// implementation. 3747 struct CallLoweringInfo { 3748 SDValue Chain; 3749 Type *RetTy = nullptr; 3750 bool RetSExt : 1; 3751 bool RetZExt : 1; 3752 bool IsVarArg : 1; 3753 bool IsInReg : 1; 3754 bool DoesNotReturn : 1; 3755 bool IsReturnValueUsed : 1; 3756 bool IsConvergent : 1; 3757 bool IsPatchPoint : 1; 3758 bool IsPreallocated : 1; 3759 bool NoMerge : 1; 3760 3761 // IsTailCall should be modified by implementations of 3762 // TargetLowering::LowerCall that perform tail call conversions. 3763 bool IsTailCall = false; 3764 3765 // Is Call lowering done post SelectionDAG type legalization. 3766 bool IsPostTypeLegalization = false; 3767 3768 unsigned NumFixedArgs = -1; 3769 CallingConv::ID CallConv = CallingConv::C; 3770 SDValue Callee; 3771 ArgListTy Args; 3772 SelectionDAG &DAG; 3773 SDLoc DL; 3774 const CallBase *CB = nullptr; 3775 SmallVector<ISD::OutputArg, 32> Outs; 3776 SmallVector<SDValue, 32> OutVals; 3777 SmallVector<ISD::InputArg, 32> Ins; 3778 SmallVector<SDValue, 4> InVals; 3779 CallLoweringInfoCallLoweringInfo3780 CallLoweringInfo(SelectionDAG &DAG) 3781 : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), 3782 DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false), 3783 IsPatchPoint(false), IsPreallocated(false), NoMerge(false), 3784 DAG(DAG) {} 3785 setDebugLocCallLoweringInfo3786 CallLoweringInfo &setDebugLoc(const SDLoc &dl) { 3787 DL = dl; 3788 return *this; 3789 } 3790 setChainCallLoweringInfo3791 CallLoweringInfo &setChain(SDValue InChain) { 3792 Chain = InChain; 3793 return *this; 3794 } 3795 3796 // setCallee with target/module-specific attributes setLibCalleeCallLoweringInfo3797 CallLoweringInfo &setLibCallee(CallingConv::ID CC, Type *ResultType, 3798 SDValue Target, ArgListTy &&ArgsList) { 3799 RetTy = ResultType; 3800 Callee = Target; 3801 CallConv = CC; 3802 NumFixedArgs = ArgsList.size(); 3803 Args = std::move(ArgsList); 3804 3805 DAG.getTargetLoweringInfo().markLibCallAttributes( 3806 &(DAG.getMachineFunction()), CC, Args); 3807 return *this; 3808 } 3809 setCalleeCallLoweringInfo3810 CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultType, 3811 SDValue Target, ArgListTy &&ArgsList) { 3812 RetTy = ResultType; 3813 Callee = Target; 3814 CallConv = CC; 3815 NumFixedArgs = ArgsList.size(); 3816 Args = std::move(ArgsList); 3817 return *this; 3818 } 3819 setCalleeCallLoweringInfo3820 CallLoweringInfo &setCallee(Type *ResultType, FunctionType *FTy, 3821 SDValue Target, ArgListTy &&ArgsList, 3822 const CallBase &Call) { 3823 RetTy = ResultType; 3824 3825 IsInReg = Call.hasRetAttr(Attribute::InReg); 3826 DoesNotReturn = 3827 Call.doesNotReturn() || 3828 (!isa<InvokeInst>(Call) && isa<UnreachableInst>(Call.getNextNode())); 3829 IsVarArg = FTy->isVarArg(); 3830 IsReturnValueUsed = !Call.use_empty(); 3831 RetSExt = Call.hasRetAttr(Attribute::SExt); 3832 RetZExt = Call.hasRetAttr(Attribute::ZExt); 3833 NoMerge = Call.hasFnAttr(Attribute::NoMerge); 3834 3835 Callee = Target; 3836 3837 CallConv = Call.getCallingConv(); 3838 NumFixedArgs = FTy->getNumParams(); 3839 Args = std::move(ArgsList); 3840 3841 CB = &Call; 3842 3843 return *this; 3844 } 3845 3846 CallLoweringInfo &setInRegister(bool Value = true) { 3847 IsInReg = Value; 3848 return *this; 3849 } 3850 3851 CallLoweringInfo &setNoReturn(bool Value = true) { 3852 DoesNotReturn = Value; 3853 return *this; 3854 } 3855 3856 CallLoweringInfo &setVarArg(bool Value = true) { 3857 IsVarArg = Value; 3858 return *this; 3859 } 3860 3861 CallLoweringInfo &setTailCall(bool Value = true) { 3862 IsTailCall = Value; 3863 return *this; 3864 } 3865 3866 CallLoweringInfo &setDiscardResult(bool Value = true) { 3867 IsReturnValueUsed = !Value; 3868 return *this; 3869 } 3870 3871 CallLoweringInfo &setConvergent(bool Value = true) { 3872 IsConvergent = Value; 3873 return *this; 3874 } 3875 3876 CallLoweringInfo &setSExtResult(bool Value = true) { 3877 RetSExt = Value; 3878 return *this; 3879 } 3880 3881 CallLoweringInfo &setZExtResult(bool Value = true) { 3882 RetZExt = Value; 3883 return *this; 3884 } 3885 3886 CallLoweringInfo &setIsPatchPoint(bool Value = true) { 3887 IsPatchPoint = Value; 3888 return *this; 3889 } 3890 3891 CallLoweringInfo &setIsPreallocated(bool Value = true) { 3892 IsPreallocated = Value; 3893 return *this; 3894 } 3895 3896 CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) { 3897 IsPostTypeLegalization = Value; 3898 return *this; 3899 } 3900 getArgsCallLoweringInfo3901 ArgListTy &getArgs() { 3902 return Args; 3903 } 3904 }; 3905 3906 /// This structure is used to pass arguments to makeLibCall function. 3907 struct MakeLibCallOptions { 3908 // By passing type list before soften to makeLibCall, the target hook 3909 // shouldExtendTypeInLibCall can get the original type before soften. 3910 ArrayRef<EVT> OpsVTBeforeSoften; 3911 EVT RetVTBeforeSoften; 3912 bool IsSExt : 1; 3913 bool DoesNotReturn : 1; 3914 bool IsReturnValueUsed : 1; 3915 bool IsPostTypeLegalization : 1; 3916 bool IsSoften : 1; 3917 MakeLibCallOptionsMakeLibCallOptions3918 MakeLibCallOptions() 3919 : IsSExt(false), DoesNotReturn(false), IsReturnValueUsed(true), 3920 IsPostTypeLegalization(false), IsSoften(false) {} 3921 3922 MakeLibCallOptions &setSExt(bool Value = true) { 3923 IsSExt = Value; 3924 return *this; 3925 } 3926 3927 MakeLibCallOptions &setNoReturn(bool Value = true) { 3928 DoesNotReturn = Value; 3929 return *this; 3930 } 3931 3932 MakeLibCallOptions &setDiscardResult(bool Value = true) { 3933 IsReturnValueUsed = !Value; 3934 return *this; 3935 } 3936 3937 MakeLibCallOptions &setIsPostTypeLegalization(bool Value = true) { 3938 IsPostTypeLegalization = Value; 3939 return *this; 3940 } 3941 3942 MakeLibCallOptions &setTypeListBeforeSoften(ArrayRef<EVT> OpsVT, EVT RetVT, 3943 bool Value = true) { 3944 OpsVTBeforeSoften = OpsVT; 3945 RetVTBeforeSoften = RetVT; 3946 IsSoften = Value; 3947 return *this; 3948 } 3949 }; 3950 3951 /// This function lowers an abstract call to a function into an actual call. 3952 /// This returns a pair of operands. The first element is the return value 3953 /// for the function (if RetTy is not VoidTy). The second element is the 3954 /// outgoing token chain. It calls LowerCall to do the actual lowering. 3955 std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const; 3956 3957 /// This hook must be implemented to lower calls into the specified 3958 /// DAG. The outgoing arguments to the call are described by the Outs array, 3959 /// and the values to be returned by the call are described by the Ins 3960 /// array. The implementation should fill in the InVals array with legal-type 3961 /// return values from the call, and return the resulting token chain value. 3962 virtual SDValue LowerCall(CallLoweringInfo &,SmallVectorImpl<SDValue> &)3963 LowerCall(CallLoweringInfo &/*CLI*/, 3964 SmallVectorImpl<SDValue> &/*InVals*/) const { 3965 llvm_unreachable("Not Implemented"); 3966 } 3967 3968 /// Target-specific cleanup for formal ByVal parameters. HandleByVal(CCState *,unsigned &,Align)3969 virtual void HandleByVal(CCState *, unsigned &, Align) const {} 3970 3971 /// This hook should be implemented to check whether the return values 3972 /// described by the Outs array can fit into the return registers. If false 3973 /// is returned, an sret-demotion is performed. CanLowerReturn(CallingConv::ID,MachineFunction &,bool,const SmallVectorImpl<ISD::OutputArg> &,LLVMContext &)3974 virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/, 3975 MachineFunction &/*MF*/, bool /*isVarArg*/, 3976 const SmallVectorImpl<ISD::OutputArg> &/*Outs*/, 3977 LLVMContext &/*Context*/) const 3978 { 3979 // Return true by default to get preexisting behavior. 3980 return true; 3981 } 3982 3983 /// This hook must be implemented to lower outgoing return values, described 3984 /// by the Outs array, into the specified DAG. The implementation should 3985 /// return the resulting token chain value. LowerReturn(SDValue,CallingConv::ID,bool,const SmallVectorImpl<ISD::OutputArg> &,const SmallVectorImpl<SDValue> &,const SDLoc &,SelectionDAG &)3986 virtual SDValue LowerReturn(SDValue /*Chain*/, CallingConv::ID /*CallConv*/, 3987 bool /*isVarArg*/, 3988 const SmallVectorImpl<ISD::OutputArg> & /*Outs*/, 3989 const SmallVectorImpl<SDValue> & /*OutVals*/, 3990 const SDLoc & /*dl*/, 3991 SelectionDAG & /*DAG*/) const { 3992 llvm_unreachable("Not Implemented"); 3993 } 3994 3995 /// Return true if result of the specified node is used by a return node 3996 /// only. It also compute and return the input chain for the tail call. 3997 /// 3998 /// This is used to determine whether it is possible to codegen a libcall as 3999 /// tail call at legalization time. isUsedByReturnOnly(SDNode *,SDValue &)4000 virtual bool isUsedByReturnOnly(SDNode *, SDValue &/*Chain*/) const { 4001 return false; 4002 } 4003 4004 /// Return true if the target may be able emit the call instruction as a tail 4005 /// call. This is used by optimization passes to determine if it's profitable 4006 /// to duplicate return instructions to enable tailcall optimization. mayBeEmittedAsTailCall(const CallInst *)4007 virtual bool mayBeEmittedAsTailCall(const CallInst *) const { 4008 return false; 4009 } 4010 4011 /// Return the builtin name for the __builtin___clear_cache intrinsic 4012 /// Default is to invoke the clear cache library call getClearCacheBuiltinName()4013 virtual const char * getClearCacheBuiltinName() const { 4014 return "__clear_cache"; 4015 } 4016 4017 /// Return the register ID of the name passed in. Used by named register 4018 /// global variables extension. There is no target-independent behaviour 4019 /// so the default action is to bail. getRegisterByName(const char * RegName,LLT Ty,const MachineFunction & MF)4020 virtual Register getRegisterByName(const char* RegName, LLT Ty, 4021 const MachineFunction &MF) const { 4022 report_fatal_error("Named registers not implemented for this target"); 4023 } 4024 4025 /// Return the type that should be used to zero or sign extend a 4026 /// zeroext/signext integer return value. FIXME: Some C calling conventions 4027 /// require the return type to be promoted, but this is not true all the time, 4028 /// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling 4029 /// conventions. The frontend should handle this and include all of the 4030 /// necessary information. getTypeForExtReturn(LLVMContext & Context,EVT VT,ISD::NodeType)4031 virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, 4032 ISD::NodeType /*ExtendKind*/) const { 4033 EVT MinVT = getRegisterType(Context, MVT::i32); 4034 return VT.bitsLT(MinVT) ? MinVT : VT; 4035 } 4036 4037 /// For some targets, an LLVM struct type must be broken down into multiple 4038 /// simple types, but the calling convention specifies that the entire struct 4039 /// must be passed in a block of consecutive registers. 4040 virtual bool functionArgumentNeedsConsecutiveRegisters(Type * Ty,CallingConv::ID CallConv,bool isVarArg,const DataLayout & DL)4041 functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, 4042 bool isVarArg, 4043 const DataLayout &DL) const { 4044 return false; 4045 } 4046 4047 /// For most targets, an LLVM type must be broken down into multiple 4048 /// smaller types. Usually the halves are ordered according to the endianness 4049 /// but for some platform that would break. So this method will default to 4050 /// matching the endianness but can be overridden. 4051 virtual bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout & DL)4052 shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const { 4053 return DL.isLittleEndian(); 4054 } 4055 4056 /// Returns a 0 terminated array of registers that can be safely used as 4057 /// scratch registers. getScratchRegisters(CallingConv::ID CC)4058 virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const { 4059 return nullptr; 4060 } 4061 4062 /// This callback is used to prepare for a volatile or atomic load. 4063 /// It takes a chain node as input and returns the chain for the load itself. 4064 /// 4065 /// Having a callback like this is necessary for targets like SystemZ, 4066 /// which allows a CPU to reuse the result of a previous load indefinitely, 4067 /// even if a cache-coherent store is performed by another CPU. The default 4068 /// implementation does nothing. prepareVolatileOrAtomicLoad(SDValue Chain,const SDLoc & DL,SelectionDAG & DAG)4069 virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL, 4070 SelectionDAG &DAG) const { 4071 return Chain; 4072 } 4073 4074 /// Should SelectionDAG lower an atomic store of the given kind as a normal 4075 /// StoreSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to 4076 /// eventually migrate all targets to the using StoreSDNodes, but porting is 4077 /// being done target at a time. lowerAtomicStoreAsStoreSDNode(const StoreInst & SI)4078 virtual bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const { 4079 assert(SI.isAtomic() && "violated precondition"); 4080 return false; 4081 } 4082 4083 /// Should SelectionDAG lower an atomic load of the given kind as a normal 4084 /// LoadSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to 4085 /// eventually migrate all targets to the using LoadSDNodes, but porting is 4086 /// being done target at a time. lowerAtomicLoadAsLoadSDNode(const LoadInst & LI)4087 virtual bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const { 4088 assert(LI.isAtomic() && "violated precondition"); 4089 return false; 4090 } 4091 4092 4093 /// This callback is invoked by the type legalizer to legalize nodes with an 4094 /// illegal operand type but legal result types. It replaces the 4095 /// LowerOperation callback in the type Legalizer. The reason we can not do 4096 /// away with LowerOperation entirely is that LegalizeDAG isn't yet ready to 4097 /// use this callback. 4098 /// 4099 /// TODO: Consider merging with ReplaceNodeResults. 4100 /// 4101 /// The target places new result values for the node in Results (their number 4102 /// and types must exactly match those of the original return values of 4103 /// the node), or leaves Results empty, which indicates that the node is not 4104 /// to be custom lowered after all. 4105 /// The default implementation calls LowerOperation. 4106 virtual void LowerOperationWrapper(SDNode *N, 4107 SmallVectorImpl<SDValue> &Results, 4108 SelectionDAG &DAG) const; 4109 4110 /// This callback is invoked for operations that are unsupported by the 4111 /// target, which are registered to use 'custom' lowering, and whose defined 4112 /// values are all legal. If the target has no operations that require custom 4113 /// lowering, it need not implement this. The default implementation of this 4114 /// aborts. 4115 virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; 4116 4117 /// This callback is invoked when a node result type is illegal for the 4118 /// target, and the operation was registered to use 'custom' lowering for that 4119 /// result type. The target places new result values for the node in Results 4120 /// (their number and types must exactly match those of the original return 4121 /// values of the node), or leaves Results empty, which indicates that the 4122 /// node is not to be custom lowered after all. 4123 /// 4124 /// If the target has no operations that require custom lowering, it need not 4125 /// implement this. The default implementation aborts. ReplaceNodeResults(SDNode *,SmallVectorImpl<SDValue> &,SelectionDAG &)4126 virtual void ReplaceNodeResults(SDNode * /*N*/, 4127 SmallVectorImpl<SDValue> &/*Results*/, 4128 SelectionDAG &/*DAG*/) const { 4129 llvm_unreachable("ReplaceNodeResults not implemented for this target!"); 4130 } 4131 4132 /// This method returns the name of a target specific DAG node. 4133 virtual const char *getTargetNodeName(unsigned Opcode) const; 4134 4135 /// This method returns a target specific FastISel object, or null if the 4136 /// target does not support "fast" ISel. createFastISel(FunctionLoweringInfo &,const TargetLibraryInfo *)4137 virtual FastISel *createFastISel(FunctionLoweringInfo &, 4138 const TargetLibraryInfo *) const { 4139 return nullptr; 4140 } 4141 4142 bool verifyReturnAddressArgumentIsConstant(SDValue Op, 4143 SelectionDAG &DAG) const; 4144 4145 //===--------------------------------------------------------------------===// 4146 // Inline Asm Support hooks 4147 // 4148 4149 /// This hook allows the target to expand an inline asm call to be explicit 4150 /// llvm code if it wants to. This is useful for turning simple inline asms 4151 /// into LLVM intrinsics, which gives the compiler more information about the 4152 /// behavior of the code. ExpandInlineAsm(CallInst *)4153 virtual bool ExpandInlineAsm(CallInst *) const { 4154 return false; 4155 } 4156 4157 enum ConstraintType { 4158 C_Register, // Constraint represents specific register(s). 4159 C_RegisterClass, // Constraint represents any of register(s) in class. 4160 C_Memory, // Memory constraint. 4161 C_Immediate, // Requires an immediate. 4162 C_Other, // Something else. 4163 C_Unknown // Unsupported constraint. 4164 }; 4165 4166 enum ConstraintWeight { 4167 // Generic weights. 4168 CW_Invalid = -1, // No match. 4169 CW_Okay = 0, // Acceptable. 4170 CW_Good = 1, // Good weight. 4171 CW_Better = 2, // Better weight. 4172 CW_Best = 3, // Best weight. 4173 4174 // Well-known weights. 4175 CW_SpecificReg = CW_Okay, // Specific register operands. 4176 CW_Register = CW_Good, // Register operands. 4177 CW_Memory = CW_Better, // Memory operands. 4178 CW_Constant = CW_Best, // Constant operand. 4179 CW_Default = CW_Okay // Default or don't know type. 4180 }; 4181 4182 /// This contains information for each constraint that we are lowering. 4183 struct AsmOperandInfo : public InlineAsm::ConstraintInfo { 4184 /// This contains the actual string for the code, like "m". TargetLowering 4185 /// picks the 'best' code from ConstraintInfo::Codes that most closely 4186 /// matches the operand. 4187 std::string ConstraintCode; 4188 4189 /// Information about the constraint code, e.g. Register, RegisterClass, 4190 /// Memory, Other, Unknown. 4191 TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown; 4192 4193 /// If this is the result output operand or a clobber, this is null, 4194 /// otherwise it is the incoming operand to the CallInst. This gets 4195 /// modified as the asm is processed. 4196 Value *CallOperandVal = nullptr; 4197 4198 /// The ValueType for the operand value. 4199 MVT ConstraintVT = MVT::Other; 4200 4201 /// Copy constructor for copying from a ConstraintInfo. AsmOperandInfoAsmOperandInfo4202 AsmOperandInfo(InlineAsm::ConstraintInfo Info) 4203 : InlineAsm::ConstraintInfo(std::move(Info)) {} 4204 4205 /// Return true of this is an input operand that is a matching constraint 4206 /// like "4". 4207 bool isMatchingInputConstraint() const; 4208 4209 /// If this is an input matching constraint, this method returns the output 4210 /// operand it matches. 4211 unsigned getMatchedOperand() const; 4212 }; 4213 4214 using AsmOperandInfoVector = std::vector<AsmOperandInfo>; 4215 4216 /// Split up the constraint string from the inline assembly value into the 4217 /// specific constraints and their prefixes, and also tie in the associated 4218 /// operand values. If this returns an empty vector, and if the constraint 4219 /// string itself isn't empty, there was an error parsing. 4220 virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, 4221 const TargetRegisterInfo *TRI, 4222 const CallBase &Call) const; 4223 4224 /// Examine constraint type and operand type and determine a weight value. 4225 /// The operand object must already have been set up with the operand type. 4226 virtual ConstraintWeight getMultipleConstraintMatchWeight( 4227 AsmOperandInfo &info, int maIndex) const; 4228 4229 /// Examine constraint string and operand type and determine a weight value. 4230 /// The operand object must already have been set up with the operand type. 4231 virtual ConstraintWeight getSingleConstraintMatchWeight( 4232 AsmOperandInfo &info, const char *constraint) const; 4233 4234 /// Determines the constraint code and constraint type to use for the specific 4235 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType. 4236 /// If the actual operand being passed in is available, it can be passed in as 4237 /// Op, otherwise an empty SDValue can be passed. 4238 virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, 4239 SDValue Op, 4240 SelectionDAG *DAG = nullptr) const; 4241 4242 /// Given a constraint, return the type of constraint it is for this target. 4243 virtual ConstraintType getConstraintType(StringRef Constraint) const; 4244 4245 /// Given a physical register constraint (e.g. {edx}), return the register 4246 /// number and the register class for the register. 4247 /// 4248 /// Given a register class constraint, like 'r', if this corresponds directly 4249 /// to an LLVM register class, return a register of 0 and the register class 4250 /// pointer. 4251 /// 4252 /// This should only be used for C_Register constraints. On error, this 4253 /// returns a register number of 0 and a null register class pointer. 4254 virtual std::pair<unsigned, const TargetRegisterClass *> 4255 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 4256 StringRef Constraint, MVT VT) const; 4257 getInlineAsmMemConstraint(StringRef ConstraintCode)4258 virtual unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const { 4259 if (ConstraintCode == "m") 4260 return InlineAsm::Constraint_m; 4261 if (ConstraintCode == "o") 4262 return InlineAsm::Constraint_o; 4263 if (ConstraintCode == "X") 4264 return InlineAsm::Constraint_X; 4265 return InlineAsm::Constraint_Unknown; 4266 } 4267 4268 /// Try to replace an X constraint, which matches anything, with another that 4269 /// has more specific requirements based on the type of the corresponding 4270 /// operand. This returns null if there is no replacement to make. 4271 virtual const char *LowerXConstraint(EVT ConstraintVT) const; 4272 4273 /// Lower the specified operand into the Ops vector. If it is invalid, don't 4274 /// add anything to Ops. 4275 virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, 4276 std::vector<SDValue> &Ops, 4277 SelectionDAG &DAG) const; 4278 4279 // Lower custom output constraints. If invalid, return SDValue(). 4280 virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, 4281 const SDLoc &DL, 4282 const AsmOperandInfo &OpInfo, 4283 SelectionDAG &DAG) const; 4284 4285 //===--------------------------------------------------------------------===// 4286 // Div utility functions 4287 // 4288 SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, 4289 SmallVectorImpl<SDNode *> &Created) const; 4290 SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, 4291 SmallVectorImpl<SDNode *> &Created) const; 4292 4293 /// Targets may override this function to provide custom SDIV lowering for 4294 /// power-of-2 denominators. If the target returns an empty SDValue, LLVM 4295 /// assumes SDIV is expensive and replaces it with a series of other integer 4296 /// operations. 4297 virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, 4298 SelectionDAG &DAG, 4299 SmallVectorImpl<SDNode *> &Created) const; 4300 4301 /// Indicate whether this target prefers to combine FDIVs with the same 4302 /// divisor. If the transform should never be done, return zero. If the 4303 /// transform should be done, return the minimum number of divisor uses 4304 /// that must exist. combineRepeatedFPDivisors()4305 virtual unsigned combineRepeatedFPDivisors() const { 4306 return 0; 4307 } 4308 4309 /// Hooks for building estimates in place of slower divisions and square 4310 /// roots. 4311 4312 /// Return either a square root or its reciprocal estimate value for the input 4313 /// operand. 4314 /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or 4315 /// 'Enabled' as set by a potential default override attribute. 4316 /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson 4317 /// refinement iterations required to generate a sufficient (though not 4318 /// necessarily IEEE-754 compliant) estimate is returned in that parameter. 4319 /// The boolean UseOneConstNR output is used to select a Newton-Raphson 4320 /// algorithm implementation that uses either one or two constants. 4321 /// The boolean Reciprocal is used to select whether the estimate is for the 4322 /// square root of the input operand or the reciprocal of its square root. 4323 /// A target may choose to implement its own refinement within this function. 4324 /// If that's true, then return '0' as the number of RefinementSteps to avoid 4325 /// any further refinement of the estimate. 4326 /// An empty SDValue return means no estimate sequence can be created. getSqrtEstimate(SDValue Operand,SelectionDAG & DAG,int Enabled,int & RefinementSteps,bool & UseOneConstNR,bool Reciprocal)4327 virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, 4328 int Enabled, int &RefinementSteps, 4329 bool &UseOneConstNR, bool Reciprocal) const { 4330 return SDValue(); 4331 } 4332 4333 /// Return a reciprocal estimate value for the input operand. 4334 /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or 4335 /// 'Enabled' as set by a potential default override attribute. 4336 /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson 4337 /// refinement iterations required to generate a sufficient (though not 4338 /// necessarily IEEE-754 compliant) estimate is returned in that parameter. 4339 /// A target may choose to implement its own refinement within this function. 4340 /// If that's true, then return '0' as the number of RefinementSteps to avoid 4341 /// any further refinement of the estimate. 4342 /// An empty SDValue return means no estimate sequence can be created. getRecipEstimate(SDValue Operand,SelectionDAG & DAG,int Enabled,int & RefinementSteps)4343 virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, 4344 int Enabled, int &RefinementSteps) const { 4345 return SDValue(); 4346 } 4347 4348 /// Return a target-dependent comparison result if the input operand is 4349 /// suitable for use with a square root estimate calculation. For example, the 4350 /// comparison may check if the operand is NAN, INF, zero, normal, etc. The 4351 /// result should be used as the condition operand for a select or branch. 4352 virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, 4353 const DenormalMode &Mode) const; 4354 4355 /// Return a target-dependent result if the input operand is not suitable for 4356 /// use with a square root estimate calculation. getSqrtResultForDenormInput(SDValue Operand,SelectionDAG & DAG)4357 virtual SDValue getSqrtResultForDenormInput(SDValue Operand, 4358 SelectionDAG &DAG) const { 4359 return DAG.getConstantFP(0.0, SDLoc(Operand), Operand.getValueType()); 4360 } 4361 4362 //===--------------------------------------------------------------------===// 4363 // Legalization utility functions 4364 // 4365 4366 /// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, 4367 /// respectively, each computing an n/2-bit part of the result. 4368 /// \param Result A vector that will be filled with the parts of the result 4369 /// in little-endian order. 4370 /// \param LL Low bits of the LHS of the MUL. You can use this parameter 4371 /// if you want to control how low bits are extracted from the LHS. 4372 /// \param LH High bits of the LHS of the MUL. See LL for meaning. 4373 /// \param RL Low bits of the RHS of the MUL. See LL for meaning 4374 /// \param RH High bits of the RHS of the MUL. See LL for meaning. 4375 /// \returns true if the node has been expanded, false if it has not 4376 bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, 4377 SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT, 4378 SelectionDAG &DAG, MulExpansionKind Kind, 4379 SDValue LL = SDValue(), SDValue LH = SDValue(), 4380 SDValue RL = SDValue(), SDValue RH = SDValue()) const; 4381 4382 /// Expand a MUL into two nodes. One that computes the high bits of 4383 /// the result and one that computes the low bits. 4384 /// \param HiLoVT The value type to use for the Lo and Hi nodes. 4385 /// \param LL Low bits of the LHS of the MUL. You can use this parameter 4386 /// if you want to control how low bits are extracted from the LHS. 4387 /// \param LH High bits of the LHS of the MUL. See LL for meaning. 4388 /// \param RL Low bits of the RHS of the MUL. See LL for meaning 4389 /// \param RH High bits of the RHS of the MUL. See LL for meaning. 4390 /// \returns true if the node has been expanded. false if it has not 4391 bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, 4392 SelectionDAG &DAG, MulExpansionKind Kind, 4393 SDValue LL = SDValue(), SDValue LH = SDValue(), 4394 SDValue RL = SDValue(), SDValue RH = SDValue()) const; 4395 4396 /// Expand funnel shift. 4397 /// \param N Node to expand 4398 /// \param Result output after conversion 4399 /// \returns True, if the expansion was successful, false otherwise 4400 bool expandFunnelShift(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; 4401 4402 /// Expand rotations. 4403 /// \param N Node to expand 4404 /// \param AllowVectorOps expand vector rotate, this should only be performed 4405 /// if the legalization is happening outside of LegalizeVectorOps 4406 /// \param Result output after conversion 4407 /// \returns True, if the expansion was successful, false otherwise 4408 bool expandROT(SDNode *N, bool AllowVectorOps, SDValue &Result, 4409 SelectionDAG &DAG) const; 4410 4411 /// Expand shift-by-parts. 4412 /// \param N Node to expand 4413 /// \param Lo lower-output-part after conversion 4414 /// \param Hi upper-output-part after conversion 4415 void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, 4416 SelectionDAG &DAG) const; 4417 4418 /// Expand float(f32) to SINT(i64) conversion 4419 /// \param N Node to expand 4420 /// \param Result output after conversion 4421 /// \returns True, if the expansion was successful, false otherwise 4422 bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; 4423 4424 /// Expand float to UINT conversion 4425 /// \param N Node to expand 4426 /// \param Result output after conversion 4427 /// \param Chain output chain after conversion 4428 /// \returns True, if the expansion was successful, false otherwise 4429 bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, 4430 SelectionDAG &DAG) const; 4431 4432 /// Expand UINT(i64) to double(f64) conversion 4433 /// \param N Node to expand 4434 /// \param Result output after conversion 4435 /// \param Chain output chain after conversion 4436 /// \returns True, if the expansion was successful, false otherwise 4437 bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, 4438 SelectionDAG &DAG) const; 4439 4440 /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs. 4441 SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const; 4442 4443 /// Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max. 4444 /// \param N Node to expand 4445 /// \returns The expansion result 4446 SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const; 4447 4448 /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes, 4449 /// vector nodes can only succeed if all operations are legal/custom. 4450 /// \param N Node to expand 4451 /// \param Result output after conversion 4452 /// \returns True, if the expansion was successful, false otherwise 4453 bool expandCTPOP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; 4454 4455 /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes, 4456 /// vector nodes can only succeed if all operations are legal/custom. 4457 /// \param N Node to expand 4458 /// \param Result output after conversion 4459 /// \returns True, if the expansion was successful, false otherwise 4460 bool expandCTLZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; 4461 4462 /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes, 4463 /// vector nodes can only succeed if all operations are legal/custom. 4464 /// \param N Node to expand 4465 /// \param Result output after conversion 4466 /// \returns True, if the expansion was successful, false otherwise 4467 bool expandCTTZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; 4468 4469 /// Expand ABS nodes. Expands vector/scalar ABS nodes, 4470 /// vector nodes can only succeed if all operations are legal/custom. 4471 /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size)) 4472 /// \param N Node to expand 4473 /// \param Result output after conversion 4474 /// \param IsNegative indicate negated abs 4475 /// \returns True, if the expansion was successful, false otherwise 4476 bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG, 4477 bool IsNegative = false) const; 4478 4479 /// Expand BSWAP nodes. Expands scalar/vector BSWAP nodes with i16/i32/i64 4480 /// scalar types. Returns SDValue() if expand fails. 4481 /// \param N Node to expand 4482 /// \returns The expansion result or SDValue() if it fails. 4483 SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const; 4484 4485 /// Expand BITREVERSE nodes. Expands scalar/vector BITREVERSE nodes. 4486 /// Returns SDValue() if expand fails. 4487 /// \param N Node to expand 4488 /// \returns The expansion result or SDValue() if it fails. 4489 SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const; 4490 4491 /// Turn load of vector type into a load of the individual elements. 4492 /// \param LD load to expand 4493 /// \returns BUILD_VECTOR and TokenFactor nodes. 4494 std::pair<SDValue, SDValue> scalarizeVectorLoad(LoadSDNode *LD, 4495 SelectionDAG &DAG) const; 4496 4497 // Turn a store of a vector type into stores of the individual elements. 4498 /// \param ST Store with a vector value type 4499 /// \returns TokenFactor of the individual store chains. 4500 SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const; 4501 4502 /// Expands an unaligned load to 2 half-size loads for an integer, and 4503 /// possibly more for vectors. 4504 std::pair<SDValue, SDValue> expandUnalignedLoad(LoadSDNode *LD, 4505 SelectionDAG &DAG) const; 4506 4507 /// Expands an unaligned store to 2 half-size stores for integer values, and 4508 /// possibly more for vectors. 4509 SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const; 4510 4511 /// Increments memory address \p Addr according to the type of the value 4512 /// \p DataVT that should be stored. If the data is stored in compressed 4513 /// form, the memory address should be incremented according to the number of 4514 /// the stored elements. This number is equal to the number of '1's bits 4515 /// in the \p Mask. 4516 /// \p DataVT is a vector type. \p Mask is a vector value. 4517 /// \p DataVT and \p Mask have the same number of vector elements. 4518 SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, 4519 EVT DataVT, SelectionDAG &DAG, 4520 bool IsCompressedMemory) const; 4521 4522 /// Get a pointer to vector element \p Idx located in memory for a vector of 4523 /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of 4524 /// bounds the returned pointer is unspecified, but will be within the vector 4525 /// bounds. 4526 SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, 4527 SDValue Index) const; 4528 4529 /// Get a pointer to a sub-vector of type \p SubVecVT at index \p Idx located 4530 /// in memory for a vector of type \p VecVT starting at a base address of 4531 /// \p VecPtr. If \p Idx plus the size of \p SubVecVT is out of bounds the 4532 /// returned pointer is unspecified, but the value returned will be such that 4533 /// the entire subvector would be within the vector bounds. 4534 SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, 4535 EVT SubVecVT, SDValue Index) const; 4536 4537 /// Method for building the DAG expansion of ISD::[US][MIN|MAX]. This 4538 /// method accepts integers as its arguments. 4539 SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const; 4540 4541 /// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This 4542 /// method accepts integers as its arguments. 4543 SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const; 4544 4545 /// Method for building the DAG expansion of ISD::[US]SHLSAT. This 4546 /// method accepts integers as its arguments. 4547 SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const; 4548 4549 /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This 4550 /// method accepts integers as its arguments. 4551 SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const; 4552 4553 /// Method for building the DAG expansion of ISD::[US]DIVFIX[SAT]. This 4554 /// method accepts integers as its arguments. 4555 /// Note: This method may fail if the division could not be performed 4556 /// within the type. Clients must retry with a wider type if this happens. 4557 SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, 4558 SDValue LHS, SDValue RHS, 4559 unsigned Scale, SelectionDAG &DAG) const; 4560 4561 /// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion 4562 /// always suceeds and populates the Result and Overflow arguments. 4563 void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, 4564 SelectionDAG &DAG) const; 4565 4566 /// Method for building the DAG expansion of ISD::S(ADD|SUB)O. Expansion 4567 /// always suceeds and populates the Result and Overflow arguments. 4568 void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, 4569 SelectionDAG &DAG) const; 4570 4571 /// Method for building the DAG expansion of ISD::[US]MULO. Returns whether 4572 /// expansion was successful and populates the Result and Overflow arguments. 4573 bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, 4574 SelectionDAG &DAG) const; 4575 4576 /// Expand a VECREDUCE_* into an explicit calculation. If Count is specified, 4577 /// only the first Count elements of the vector are used. 4578 SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const; 4579 4580 /// Expand a VECREDUCE_SEQ_* into an explicit ordered calculation. 4581 SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const; 4582 4583 /// Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal. 4584 /// Returns true if the expansion was successful. 4585 bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const; 4586 4587 /// Method for building the DAG expansion of ISD::VECTOR_SPLICE. This 4588 /// method accepts vectors as its arguments. 4589 SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const; 4590 4591 /// Legalize a SETCC with given LHS and RHS and condition code CC on the 4592 /// current target. 4593 /// 4594 /// If the SETCC has been legalized using AND / OR, then the legalized node 4595 /// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert 4596 /// will be set to false. 4597 /// 4598 /// If the SETCC has been legalized by using getSetCCSwappedOperands(), 4599 /// then the values of LHS and RHS will be swapped, CC will be set to the 4600 /// new condition, and NeedInvert will be set to false. 4601 /// 4602 /// If the SETCC has been legalized using the inverse condcode, then LHS and 4603 /// RHS will be unchanged, CC will set to the inverted condcode, and 4604 /// NeedInvert will be set to true. The caller must invert the result of the 4605 /// SETCC with SelectionDAG::getLogicalNOT() or take equivalent action to swap 4606 /// the effect of a true/false result. 4607 /// 4608 /// \returns true if the SetCC has been legalized, false if it hasn't. 4609 bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, 4610 SDValue &RHS, SDValue &CC, bool &NeedInvert, 4611 const SDLoc &dl, SDValue &Chain, 4612 bool IsSignaling = false) const; 4613 4614 //===--------------------------------------------------------------------===// 4615 // Instruction Emitting Hooks 4616 // 4617 4618 /// This method should be implemented by targets that mark instructions with 4619 /// the 'usesCustomInserter' flag. These instructions are special in various 4620 /// ways, which require special support to insert. The specified MachineInstr 4621 /// is created but not inserted into any basic blocks, and this method is 4622 /// called to expand it into a sequence of instructions, potentially also 4623 /// creating new basic blocks and control flow. 4624 /// As long as the returned basic block is different (i.e., we created a new 4625 /// one), the custom inserter is free to modify the rest of \p MBB. 4626 virtual MachineBasicBlock * 4627 EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; 4628 4629 /// This method should be implemented by targets that mark instructions with 4630 /// the 'hasPostISelHook' flag. These instructions must be adjusted after 4631 /// instruction selection by target hooks. e.g. To fill in optional defs for 4632 /// ARM 's' setting instructions. 4633 virtual void AdjustInstrPostInstrSelection(MachineInstr &MI, 4634 SDNode *Node) const; 4635 4636 /// If this function returns true, SelectionDAGBuilder emits a 4637 /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector. useLoadStackGuardNode()4638 virtual bool useLoadStackGuardNode() const { 4639 return false; 4640 } 4641 emitStackGuardXorFP(SelectionDAG & DAG,SDValue Val,const SDLoc & DL)4642 virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, 4643 const SDLoc &DL) const { 4644 llvm_unreachable("not implemented for this target"); 4645 } 4646 4647 /// Lower TLS global address SDNode for target independent emulated TLS model. 4648 virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, 4649 SelectionDAG &DAG) const; 4650 4651 /// Expands target specific indirect branch for the case of JumpTable 4652 /// expanasion. expandIndirectJTBranch(const SDLoc & dl,SDValue Value,SDValue Addr,SelectionDAG & DAG)4653 virtual SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, SDValue Addr, 4654 SelectionDAG &DAG) const { 4655 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Value, Addr); 4656 } 4657 4658 // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits))) 4659 // If we're comparing for equality to zero and isCtlzFast is true, expose the 4660 // fact that this can be implemented as a ctlz/srl pair, so that the dag 4661 // combiner can fold the new nodes. 4662 SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const; 4663 4664 /// Give targets the chance to reduce the number of distinct addresing modes. 4665 ISD::MemIndexType getCanonicalIndexType(ISD::MemIndexType IndexType, 4666 EVT MemVT, SDValue Offsets) const; 4667 4668 private: 4669 SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 4670 const SDLoc &DL, DAGCombinerInfo &DCI) const; 4671 SDValue foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 4672 const SDLoc &DL, DAGCombinerInfo &DCI) const; 4673 4674 SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0, 4675 SDValue N1, ISD::CondCode Cond, 4676 DAGCombinerInfo &DCI, 4677 const SDLoc &DL) const; 4678 4679 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 4680 SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift( 4681 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, 4682 DAGCombinerInfo &DCI, const SDLoc &DL) const; 4683 4684 SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, 4685 SDValue CompTargetNode, ISD::CondCode Cond, 4686 DAGCombinerInfo &DCI, const SDLoc &DL, 4687 SmallVectorImpl<SDNode *> &Created) const; 4688 SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, 4689 ISD::CondCode Cond, DAGCombinerInfo &DCI, 4690 const SDLoc &DL) const; 4691 4692 SDValue prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, 4693 SDValue CompTargetNode, ISD::CondCode Cond, 4694 DAGCombinerInfo &DCI, const SDLoc &DL, 4695 SmallVectorImpl<SDNode *> &Created) const; 4696 SDValue buildSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, 4697 ISD::CondCode Cond, DAGCombinerInfo &DCI, 4698 const SDLoc &DL) const; 4699 }; 4700 4701 /// Given an LLVM IR type and return type attributes, compute the return value 4702 /// EVTs and flags, and optionally also the offsets, if the return value is 4703 /// being lowered to memory. 4704 void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, 4705 SmallVectorImpl<ISD::OutputArg> &Outs, 4706 const TargetLowering &TLI, const DataLayout &DL); 4707 4708 } // end namespace llvm 4709 4710 #endif // LLVM_CODEGEN_TARGETLOWERING_H 4711