1 //===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file describes how to lower LLVM code to machine code. This has two 11 /// main components: 12 /// 13 /// 1. Which ValueTypes are natively supported by the target. 14 /// 2. Which operations are supported for supported ValueTypes. 15 /// 3. Cost thresholds for alternative implementations of certain operations. 16 /// 17 /// In addition it has a few other components, like information about FP 18 /// immediates. 19 /// 20 //===----------------------------------------------------------------------===// 21 22 #ifndef LLVM_CODEGEN_TARGETLOWERING_H 23 #define LLVM_CODEGEN_TARGETLOWERING_H 24 25 #include "llvm/ADT/APInt.h" 26 #include "llvm/ADT/ArrayRef.h" 27 #include "llvm/ADT/DenseMap.h" 28 #include "llvm/ADT/STLExtras.h" 29 #include "llvm/ADT/SmallVector.h" 30 #include "llvm/ADT/StringRef.h" 31 #include "llvm/CodeGen/DAGCombine.h" 32 #include "llvm/CodeGen/ISDOpcodes.h" 33 #include "llvm/CodeGen/RuntimeLibcalls.h" 34 #include "llvm/CodeGen/SelectionDAG.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/TargetCallingConv.h" 37 #include "llvm/CodeGen/ValueTypes.h" 38 #include "llvm/IR/Attributes.h" 39 #include "llvm/IR/CallingConv.h" 40 #include "llvm/IR/DataLayout.h" 41 #include "llvm/IR/DerivedTypes.h" 42 #include "llvm/IR/Function.h" 43 #include "llvm/IR/IRBuilder.h" 44 #include "llvm/IR/InlineAsm.h" 45 #include "llvm/IR/Instruction.h" 46 #include "llvm/IR/Instructions.h" 47 #include "llvm/IR/Type.h" 48 #include "llvm/Support/Alignment.h" 49 #include "llvm/Support/AtomicOrdering.h" 50 #include "llvm/Support/Casting.h" 51 #include "llvm/Support/ErrorHandling.h" 52 #include "llvm/Support/MachineValueType.h" 53 #include <algorithm> 54 #include <cassert> 55 #include <climits> 56 #include <cstdint> 57 #include <iterator> 58 #include <map> 59 #include <string> 60 #include <utility> 61 #include <vector> 62 63 namespace llvm { 64 65 class BranchProbability; 66 class CCState; 67 class CCValAssign; 68 class Constant; 69 class FastISel; 70 class FunctionLoweringInfo; 71 class GlobalValue; 72 class GISelKnownBits; 73 class IntrinsicInst; 74 struct KnownBits; 75 class LegacyDivergenceAnalysis; 76 class LLVMContext; 77 class MachineBasicBlock; 78 class MachineFunction; 79 class MachineInstr; 80 class MachineJumpTableInfo; 81 class MachineLoop; 82 class MachineRegisterInfo; 83 class MCContext; 84 class MCExpr; 85 class Module; 86 class ProfileSummaryInfo; 87 class TargetLibraryInfo; 88 class TargetMachine; 89 class TargetRegisterClass; 90 class TargetRegisterInfo; 91 class TargetTransformInfo; 92 class Value; 93 94 namespace Sched { 95 96 enum Preference { 97 None, // No preference 98 Source, // Follow source order. 99 RegPressure, // Scheduling for lowest register pressure. 100 Hybrid, // Scheduling for both latency and register pressure. 101 ILP, // Scheduling for ILP in low register pressure mode. 102 VLIW // Scheduling for VLIW targets. 103 }; 104 105 } // end namespace Sched 106 107 // MemOp models a memory operation, either memset or memcpy/memmove. 108 struct MemOp { 109 private: 110 // Shared 111 uint64_t Size; 112 bool DstAlignCanChange; // true if destination alignment can satisfy any 113 // constraint. 114 Align DstAlign; // Specified alignment of the memory operation. 115 116 bool AllowOverlap; 117 // memset only 118 bool IsMemset; // If setthis memory operation is a memset. 119 bool ZeroMemset; // If set clears out memory with zeros. 120 // memcpy only 121 bool MemcpyStrSrc; // Indicates whether the memcpy source is an in-register 122 // constant so it does not need to be loaded. 123 Align SrcAlign; // Inferred alignment of the source or default value if the 124 // memory operation does not need to load the value. 125 public: 126 bool MustPreserveCheriCaps; // memcpy must preserve CHERI tags even if 127 // SrcAlign < CapSize (since it could be aligned 128 // at run time) 129 public: 130 static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, 131 Align SrcAlign, bool IsVolatile, bool MustPreserveCheriCaps, 132 bool MemcpyStrSrc = false) { 133 MemOp Op; 134 Op.Size = Size; 135 Op.DstAlignCanChange = DstAlignCanChange; 136 Op.DstAlign = DstAlign; 137 Op.AllowOverlap = !IsVolatile; 138 Op.IsMemset = false; 139 Op.ZeroMemset = false; 140 Op.MemcpyStrSrc = MemcpyStrSrc; 141 Op.MustPreserveCheriCaps = MustPreserveCheriCaps; 142 Op.SrcAlign = SrcAlign; 143 return Op; 144 } 145 SetMemOp146 static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, 147 bool IsZeroMemset, bool IsVolatile) { 148 MemOp Op; 149 Op.Size = Size; 150 Op.DstAlignCanChange = DstAlignCanChange; 151 Op.DstAlign = DstAlign; 152 Op.AllowOverlap = !IsVolatile; 153 Op.IsMemset = true; 154 Op.ZeroMemset = IsZeroMemset; 155 Op.MemcpyStrSrc = false; 156 Op.MustPreserveCheriCaps = false; 157 return Op; 158 } 159 sizeMemOp160 uint64_t size() const { return Size; } getDstAlignMemOp161 Align getDstAlign() const { 162 assert(!DstAlignCanChange); 163 return DstAlign; 164 } isFixedDstAlignMemOp165 bool isFixedDstAlign() const { return !DstAlignCanChange; } allowOverlapMemOp166 bool allowOverlap() const { return AllowOverlap; } isMemsetMemOp167 bool isMemset() const { return IsMemset; } isMemcpyMemOp168 bool isMemcpy() const { return !IsMemset; } isMemcpyWithFixedDstAlignMemOp169 bool isMemcpyWithFixedDstAlign() const { 170 return isMemcpy() && !DstAlignCanChange; 171 } isZeroMemsetMemOp172 bool isZeroMemset() const { return isMemset() && ZeroMemset; } isMemcpyStrSrcMemOp173 bool isMemcpyStrSrc() const { 174 assert(isMemcpy() && "Must be a memcpy"); 175 return MemcpyStrSrc; 176 } getSrcAlignMemOp177 Align getSrcAlign() const { 178 assert(isMemcpy() && "Must be a memcpy"); 179 return SrcAlign; 180 } isSrcAlignedMemOp181 bool isSrcAligned(Align AlignCheck) const { 182 return isMemset() || llvm::isAligned(AlignCheck, SrcAlign.value()); 183 } isDstAlignedMemOp184 bool isDstAligned(Align AlignCheck) const { 185 return DstAlignCanChange || llvm::isAligned(AlignCheck, DstAlign.value()); 186 } isAlignedMemOp187 bool isAligned(Align AlignCheck) const { 188 return isSrcAligned(AlignCheck) && isDstAligned(AlignCheck); 189 } 190 }; 191 192 /// This base class for TargetLowering contains the SelectionDAG-independent 193 /// parts that can be used from the rest of CodeGen. 194 class TargetLoweringBase { 195 public: 196 /// This enum indicates whether operations are valid for a target, and if not, 197 /// what action should be used to make them valid. 198 enum LegalizeAction : uint8_t { 199 Legal, // The target natively supports this operation. 200 Promote, // This operation should be executed in a larger type. 201 Expand, // Try to expand this to other ops, otherwise use a libcall. 202 LibCall, // Don't try to expand this to other ops, always use a libcall. 203 Custom // Use the LowerOperation hook to implement custom lowering. 204 }; 205 206 /// This enum indicates whether a types are legal for a target, and if not, 207 /// what action should be used to make them valid. 208 enum LegalizeTypeAction : uint8_t { 209 TypeLegal, // The target natively supports this type. 210 TypePromoteInteger, // Replace this integer with a larger one. 211 TypeExpandInteger, // Split this integer into two of half the size. 212 TypeSoftenFloat, // Convert this float to a same size integer type. 213 TypeExpandFloat, // Split this float into two of half the size. 214 TypeScalarizeVector, // Replace this one-element vector with its element. 215 TypeSplitVector, // Split this vector into two of half the size. 216 TypeWidenVector, // This vector should be widened into a larger vector. 217 TypePromoteFloat, // Replace this float with a larger one. 218 TypeSoftPromoteHalf, // Soften half to i16 and use float to do arithmetic. 219 TypeScalarizeScalableVector, // This action is explicitly left unimplemented. 220 // While it is theoretically possible to 221 // legalize operations on scalable types with a 222 // loop that handles the vscale * #lanes of the 223 // vector, this is non-trivial at SelectionDAG 224 // level and these types are better to be 225 // widened or promoted. 226 }; 227 228 /// LegalizeKind holds the legalization kind that needs to happen to EVT 229 /// in order to type-legalize it. 230 using LegalizeKind = std::pair<LegalizeTypeAction, EVT>; 231 232 /// Enum that describes how the target represents true/false values. 233 enum BooleanContent { 234 UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage. 235 ZeroOrOneBooleanContent, // All bits zero except for bit 0. 236 ZeroOrNegativeOneBooleanContent // All bits equal to bit 0. 237 }; 238 239 /// Enum that describes what type of support for selects the target has. 240 enum SelectSupportKind { 241 ScalarValSelect, // The target supports scalar selects (ex: cmov). 242 ScalarCondVectorVal, // The target supports selects with a scalar condition 243 // and vector values (ex: cmov). 244 VectorMaskSelect // The target supports vector selects with a vector 245 // mask (ex: x86 blends). 246 }; 247 248 /// Enum that specifies what an atomic load/AtomicRMWInst is expanded 249 /// to, if at all. Exists because different targets have different levels of 250 /// support for these atomic instructions, and also have different options 251 /// w.r.t. what they should expand to. 252 enum class AtomicExpansionKind { 253 None, // Don't expand the instruction. 254 LLSC, // Expand the instruction into loadlinked/storeconditional; used 255 // by ARM/AArch64. 256 LLOnly, // Expand the (load) instruction into just a load-linked, which has 257 // greater atomic guarantees than a normal load. 258 CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. 259 MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop. 260 }; 261 262 /// Enum that specifies when a multiplication should be expanded. 263 enum class MulExpansionKind { 264 Always, // Always expand the instruction. 265 OnlyLegalOrCustom, // Only expand when the resulting instructions are legal 266 // or custom. 267 }; 268 269 /// Enum that specifies when a float negation is beneficial. 270 enum class NegatibleCost { 271 Cheaper = 0, // Negated expression is cheaper. 272 Neutral = 1, // Negated expression has the same cost. 273 Expensive = 2 // Negated expression is more expensive. 274 }; 275 276 class ArgListEntry { 277 public: 278 Value *Val = nullptr; 279 SDValue Node = SDValue(); 280 Type *Ty = nullptr; 281 bool IsSExt : 1; 282 bool IsZExt : 1; 283 bool IsInReg : 1; 284 bool IsSRet : 1; 285 bool IsNest : 1; 286 bool IsByVal : 1; 287 bool IsInAlloca : 1; 288 bool IsPreallocated : 1; 289 bool IsReturned : 1; 290 bool IsSwiftSelf : 1; 291 bool IsSwiftError : 1; 292 bool IsCFGuardTarget : 1; 293 MaybeAlign Alignment = None; 294 Type *ByValType = nullptr; 295 Type *PreallocatedType = nullptr; 296 ArgListEntry()297 ArgListEntry() 298 : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false), 299 IsNest(false), IsByVal(false), IsInAlloca(false), 300 IsPreallocated(false), IsReturned(false), IsSwiftSelf(false), 301 IsSwiftError(false), IsCFGuardTarget(false) {} 302 303 void setAttributes(const CallBase *Call, unsigned ArgIdx); 304 }; 305 using ArgListTy = std::vector<ArgListEntry>; 306 markLibCallAttributes(MachineFunction * MF,unsigned CC,ArgListTy & Args)307 virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC, 308 ArgListTy &Args) const {}; 309 getExtendForContent(BooleanContent Content)310 static ISD::NodeType getExtendForContent(BooleanContent Content) { 311 switch (Content) { 312 case UndefinedBooleanContent: 313 // Extend by adding rubbish bits. 314 return ISD::ANY_EXTEND; 315 case ZeroOrOneBooleanContent: 316 // Extend by adding zero bits. 317 return ISD::ZERO_EXTEND; 318 case ZeroOrNegativeOneBooleanContent: 319 // Extend by copying the sign bit. 320 return ISD::SIGN_EXTEND; 321 } 322 llvm_unreachable("Invalid content kind"); 323 } 324 325 explicit TargetLoweringBase(const TargetMachine &TM); 326 TargetLoweringBase(const TargetLoweringBase &) = delete; 327 TargetLoweringBase &operator=(const TargetLoweringBase &) = delete; 328 virtual ~TargetLoweringBase() = default; 329 330 /// Return true if the target support strict float operation isStrictFPEnabled()331 bool isStrictFPEnabled() const { 332 return IsStrictFPEnabled; 333 } 334 335 protected: 336 /// Initialize all of the actions to default values. 337 void initActions(); 338 339 public: getTargetMachine()340 const TargetMachine &getTargetMachine() const { return TM; } 341 useSoftFloat()342 virtual bool useSoftFloat() const { return false; } 343 getExceptionPointerAS()344 virtual uint32_t getExceptionPointerAS() const { return 0; } 345 346 /// Return the pointer type for the given address space, defaults to 347 /// the pointer type from the data layout. 348 /// FIXME: The default needs to be removed once all the code is updated. 349 virtual MVT getPointerTy(const DataLayout &DL, 350 // To ease porting of backends allow defaulting to AS0 351 #ifdef LLVM_TARGETLOWERINGINFO_DEFAULT_AS 352 uint32_t AS = LLVM_TARGETLOWERINGINFO_DEFAULT_AS) const { 353 #else 354 uint32_t AS) const { 355 #endif 356 if (DL.isFatPointer(AS)) 357 return MVT::getFatPointerVT(DL.getPointerSizeInBits(AS)); 358 return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); 359 } 360 361 /// Return the in-memory pointer type for the given address space, defaults to 362 /// the pointer type from the data layout. FIXME: The default needs to be 363 /// removed once all the code is updated. 364 MVT getPointerMemTy(const DataLayout &DL, 365 // To ease porting of backends allow defaulting to AS0 366 #ifdef LLVM_TARGETLOWERINGINFO_DEFAULT_AS 367 uint32_t AS = LLVM_TARGETLOWERINGINFO_DEFAULT_AS) const { 368 #else 369 uint32_t AS) const { 370 #endif 371 if (DL.isFatPointer(AS)) 372 return MVT::getFatPointerVT(DL.getPointerSizeInBits(AS)); 373 return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); 374 } 375 376 /// Return the integer type with the same size as the address range for 377 /// the given address space. 378 virtual MVT getPointerRangeTy(const DataLayout &DL, uint32_t AS = 0) const { 379 return MVT::getIntegerVT(DL.getPointerAddrSizeInBits(AS)); 380 } 381 382 /// Return the type for frame index, which is determined by 383 /// the alloca address space specified through the data layout. getFrameIndexTy(const DataLayout & DL)384 MVT getFrameIndexTy(const DataLayout &DL) const { 385 return getPointerTy(DL, DL.getAllocaAddrSpace()); 386 } 387 388 /// Return the type for code pointers, which is determined by the program 389 /// address space specified through the data layout. getProgramPointerTy(const DataLayout & DL)390 MVT getProgramPointerTy(const DataLayout &DL) const { 391 return getPointerTy(DL, DL.getProgramAddressSpace()); 392 } 393 394 /// Return the type for operands of fence. 395 /// TODO: Let fence operands be of i32 type and remove this. getFenceOperandTy(const DataLayout & DL)396 virtual MVT getFenceOperandTy(const DataLayout &DL) const { 397 // FIXME: hardcoded AS0 398 return getPointerTy(DL, 0); 399 } 400 401 /// EVT is not used in-tree, but is used by out-of-tree target. 402 /// A documentation for this function would be nice... 403 virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const; 404 405 EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, 406 bool LegalTypes = true) const; 407 408 /// Returns the type to be used for the index operand of: 409 /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, 410 /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR getVectorIdxTy(const DataLayout & DL)411 virtual MVT getVectorIdxTy(const DataLayout &DL) const { 412 // FIXME: hardcoded AS0 413 return getPointerTy(DL, 0); 414 } 415 416 /// This callback is used to inspect load/store instructions and add 417 /// target-specific MachineMemOperand flags to them. The default 418 /// implementation does nothing. getTargetMMOFlags(const Instruction & I)419 virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const { 420 return MachineMemOperand::MONone; 421 } 422 423 MachineMemOperand::Flags getLoadMemOperandFlags(const LoadInst &LI, 424 const DataLayout &DL) const; 425 MachineMemOperand::Flags getStoreMemOperandFlags(const StoreInst &SI, 426 const DataLayout &DL) const; 427 MachineMemOperand::Flags getAtomicMemOperandFlags(const Instruction &AI, 428 const DataLayout &DL) const; 429 isSelectSupported(SelectSupportKind)430 virtual bool isSelectSupported(SelectSupportKind /*kind*/) const { 431 return true; 432 } 433 434 /// Return true if it is profitable to convert a select of FP constants into 435 /// a constant pool load whose address depends on the select condition. The 436 /// parameter may be used to differentiate a select with FP compare from 437 /// integer compare. reduceSelectOfFPConstantLoads(EVT CmpOpVT)438 virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const { 439 return true; 440 } 441 442 /// Return true if multiple condition registers are available. hasMultipleConditionRegisters()443 bool hasMultipleConditionRegisters() const { 444 return HasMultipleConditionRegisters; 445 } 446 447 /// Return true if the target has BitExtract instructions. hasExtractBitsInsn()448 bool hasExtractBitsInsn() const { return HasExtractBitsInsn; } 449 450 /// Return the preferred vector type legalization action. 451 virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)452 getPreferredVectorAction(MVT VT) const { 453 // The default action for one element vectors is to scalarize 454 if (VT.getVectorElementCount() == 1) 455 return TypeScalarizeVector; 456 // The default action for an odd-width vector is to widen. 457 if (!VT.isPow2VectorType()) 458 return TypeWidenVector; 459 // The default action for other vectors is to promote 460 return TypePromoteInteger; 461 } 462 463 // Return true if the half type should be passed around as i16, but promoted 464 // to float around arithmetic. The default behavior is to pass around as 465 // float and convert around loads/stores/bitcasts and other places where 466 // the size matters. softPromoteHalfType()467 virtual bool softPromoteHalfType() const { return false; } 468 469 // There are two general methods for expanding a BUILD_VECTOR node: 470 // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle 471 // them together. 472 // 2. Build the vector on the stack and then load it. 473 // If this function returns true, then method (1) will be used, subject to 474 // the constraint that all of the necessary shuffles are legal (as determined 475 // by isShuffleMaskLegal). If this function returns false, then method (2) is 476 // always used. The vector type, and the number of defined values, are 477 // provided. 478 virtual bool shouldExpandBuildVectorWithShuffles(EVT,unsigned DefinedValues)479 shouldExpandBuildVectorWithShuffles(EVT /* VT */, 480 unsigned DefinedValues) const { 481 return DefinedValues < 3; 482 } 483 484 /// Return true if integer divide is usually cheaper than a sequence of 485 /// several shifts, adds, and multiplies for this target. 486 /// The definition of "cheaper" may depend on whether we're optimizing 487 /// for speed or for size. isIntDivCheap(EVT VT,AttributeList Attr)488 virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; } 489 490 /// Return true if the target can handle a standalone remainder operation. hasStandaloneRem(EVT VT)491 virtual bool hasStandaloneRem(EVT VT) const { 492 return true; 493 } 494 495 /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X). isFsqrtCheap(SDValue X,SelectionDAG & DAG)496 virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const { 497 // Default behavior is to replace SQRT(X) with X*RSQRT(X). 498 return false; 499 } 500 501 /// Reciprocal estimate status values used by the functions below. 502 enum ReciprocalEstimate : int { 503 Unspecified = -1, 504 Disabled = 0, 505 Enabled = 1 506 }; 507 508 /// Return a ReciprocalEstimate enum value for a square root of the given type 509 /// based on the function's attributes. If the operation is not overridden by 510 /// the function's attributes, "Unspecified" is returned and target defaults 511 /// are expected to be used for instruction selection. 512 int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const; 513 514 /// Return a ReciprocalEstimate enum value for a division of the given type 515 /// based on the function's attributes. If the operation is not overridden by 516 /// the function's attributes, "Unspecified" is returned and target defaults 517 /// are expected to be used for instruction selection. 518 int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const; 519 520 /// Return the refinement step count for a square root of the given type based 521 /// on the function's attributes. If the operation is not overridden by 522 /// the function's attributes, "Unspecified" is returned and target defaults 523 /// are expected to be used for instruction selection. 524 int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const; 525 526 /// Return the refinement step count for a division of the given type based 527 /// on the function's attributes. If the operation is not overridden by 528 /// the function's attributes, "Unspecified" is returned and target defaults 529 /// are expected to be used for instruction selection. 530 int getDivRefinementSteps(EVT VT, MachineFunction &MF) const; 531 532 /// Returns true if target has indicated at least one type should be bypassed. isSlowDivBypassed()533 bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); } 534 535 /// Returns map of slow types for division or remainder with corresponding 536 /// fast types getBypassSlowDivWidths()537 const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const { 538 return BypassSlowDivWidths; 539 } 540 541 /// Return true if Flow Control is an expensive operation that should be 542 /// avoided. isJumpExpensive()543 bool isJumpExpensive() const { return JumpIsExpensive; } 544 545 /// Return true if selects are only cheaper than branches if the branch is 546 /// unlikely to be predicted right. isPredictableSelectExpensive()547 bool isPredictableSelectExpensive() const { 548 return PredictableSelectIsExpensive; 549 } 550 fallBackToDAGISel(const Instruction & Inst)551 virtual bool fallBackToDAGISel(const Instruction &Inst) const { 552 return false; 553 } 554 555 /// If a branch or a select condition is skewed in one direction by more than 556 /// this factor, it is very likely to be predicted correctly. 557 virtual BranchProbability getPredictableBranchThreshold() const; 558 559 /// Return true if the following transform is beneficial: 560 /// fold (conv (load x)) -> (load (conv*)x) 561 /// On architectures that don't natively support some vector loads 562 /// efficiently, casting the load to a smaller vector of larger types and 563 /// loading is more efficient, however, this can be undone by optimizations in 564 /// dag combiner. isLoadBitCastBeneficial(EVT LoadVT,EVT BitcastVT,const SelectionDAG & DAG,const MachineMemOperand & MMO)565 virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, 566 const SelectionDAG &DAG, 567 const MachineMemOperand &MMO) const { 568 // Don't do if we could do an indexed load on the original type, but not on 569 // the new one. 570 if (!LoadVT.isSimple() || !BitcastVT.isSimple()) 571 return true; 572 573 MVT LoadMVT = LoadVT.getSimpleVT(); 574 575 // Don't bother doing this if it's just going to be promoted again later, as 576 // doing so might interfere with other combines. 577 if (getOperationAction(ISD::LOAD, LoadMVT) == Promote && 578 getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT()) 579 return false; 580 581 bool Fast = false; 582 return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT, 583 MMO, &Fast) && Fast; 584 } 585 586 /// Return true if the following transform is beneficial: 587 /// (store (y (conv x)), y*)) -> (store x, (x*)) isStoreBitCastBeneficial(EVT StoreVT,EVT BitcastVT,const SelectionDAG & DAG,const MachineMemOperand & MMO)588 virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, 589 const SelectionDAG &DAG, 590 const MachineMemOperand &MMO) const { 591 // Default to the same logic as loads. 592 return isLoadBitCastBeneficial(StoreVT, BitcastVT, DAG, MMO); 593 } 594 595 /// Return true if it is expected to be cheaper to do a store of a non-zero 596 /// vector constant with the given size and type for the address space than to 597 /// store the individual scalar element constants. storeOfVectorConstantIsCheap(EVT MemVT,unsigned NumElem,unsigned AddrSpace)598 virtual bool storeOfVectorConstantIsCheap(EVT MemVT, 599 unsigned NumElem, 600 unsigned AddrSpace) const { 601 return false; 602 } 603 604 /// Allow store merging for the specified type after legalization in addition 605 /// to before legalization. This may transform stores that do not exist 606 /// earlier (for example, stores created from intrinsics). mergeStoresAfterLegalization(EVT MemVT)607 virtual bool mergeStoresAfterLegalization(EVT MemVT) const { 608 return true; 609 } 610 611 /// Returns if it's reasonable to merge stores to MemVT size. canMergeStoresTo(unsigned AS,EVT MemVT,const SelectionDAG & DAG)612 virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, 613 const SelectionDAG &DAG) const { 614 return true; 615 } 616 617 /// Return true if it is cheap to speculate a call to intrinsic cttz. isCheapToSpeculateCttz()618 virtual bool isCheapToSpeculateCttz() const { 619 return false; 620 } 621 622 /// Return true if it is cheap to speculate a call to intrinsic ctlz. isCheapToSpeculateCtlz()623 virtual bool isCheapToSpeculateCtlz() const { 624 return false; 625 } 626 627 /// Return true if ctlz instruction is fast. isCtlzFast()628 virtual bool isCtlzFast() const { 629 return false; 630 } 631 632 /// Return true if instruction generated for equality comparison is folded 633 /// with instruction generated for signed comparison. isEqualityCmpFoldedWithSignedCmp()634 virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; } 635 636 /// Return true if it is safe to transform an integer-domain bitwise operation 637 /// into the equivalent floating-point operation. This should be set to true 638 /// if the target has IEEE-754-compliant fabs/fneg operations for the input 639 /// type. hasBitPreservingFPLogic(EVT VT)640 virtual bool hasBitPreservingFPLogic(EVT VT) const { 641 return false; 642 } 643 644 /// Return true if it is cheaper to split the store of a merged int val 645 /// from a pair of smaller values into multiple stores. isMultiStoresCheaperThanBitsMerge(EVT LTy,EVT HTy)646 virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const { 647 return false; 648 } 649 650 /// Return if the target supports combining a 651 /// chain like: 652 /// \code 653 /// %andResult = and %val1, #mask 654 /// %icmpResult = icmp %andResult, 0 655 /// \endcode 656 /// into a single machine instruction of a form like: 657 /// \code 658 /// cc = test %register, #mask 659 /// \endcode isMaskAndCmp0FoldingBeneficial(const Instruction & AndI)660 virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { 661 return false; 662 } 663 664 /// Use bitwise logic to make pairs of compares more efficient. For example: 665 /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 666 /// This should be true when it takes more than one instruction to lower 667 /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on 668 /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win. convertSetCCLogicToBitwiseLogic(EVT VT)669 virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const { 670 return false; 671 } 672 673 /// Return the preferred operand type if the target has a quick way to compare 674 /// integer values of the given size. Assume that any legal integer type can 675 /// be compared efficiently. Targets may override this to allow illegal wide 676 /// types to return a vector type if there is support to compare that type. hasFastEqualityCompare(unsigned NumBits)677 virtual MVT hasFastEqualityCompare(unsigned NumBits) const { 678 MVT VT = MVT::getIntegerVT(NumBits); 679 return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE; 680 } 681 682 /// Return true if the target should transform: 683 /// (X & Y) == Y ---> (~X & Y) == 0 684 /// (X & Y) != Y ---> (~X & Y) != 0 685 /// 686 /// This may be profitable if the target has a bitwise and-not operation that 687 /// sets comparison flags. A target may want to limit the transformation based 688 /// on the type of Y or if Y is a constant. 689 /// 690 /// Note that the transform will not occur if Y is known to be a power-of-2 691 /// because a mask and compare of a single bit can be handled by inverting the 692 /// predicate, for example: 693 /// (X & 8) == 8 ---> (X & 8) != 0 hasAndNotCompare(SDValue Y)694 virtual bool hasAndNotCompare(SDValue Y) const { 695 return false; 696 } 697 698 /// Return true if the target has a bitwise and-not operation: 699 /// X = ~A & B 700 /// This can be used to simplify select or other instructions. hasAndNot(SDValue X)701 virtual bool hasAndNot(SDValue X) const { 702 // If the target has the more complex version of this operation, assume that 703 // it has this operation too. 704 return hasAndNotCompare(X); 705 } 706 707 /// Return true if the target has a bit-test instruction: 708 /// (X & (1 << Y)) ==/!= 0 709 /// This knowledge can be used to prevent breaking the pattern, 710 /// or creating it if it could be recognized. hasBitTest(SDValue X,SDValue Y)711 virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; } 712 713 /// There are two ways to clear extreme bits (either low or high): 714 /// Mask: x & (-1 << y) (the instcombine canonical form) 715 /// Shifts: x >> y << y 716 /// Return true if the variant with 2 variable shifts is preferred. 717 /// Return false if there is no preference. shouldFoldMaskToVariableShiftPair(SDValue X)718 virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const { 719 // By default, let's assume that no one prefers shifts. 720 return false; 721 } 722 723 /// Return true if it is profitable to fold a pair of shifts into a mask. 724 /// This is usually true on most targets. But some targets, like Thumb1, 725 /// have immediate shift instructions, but no immediate "and" instruction; 726 /// this makes the fold unprofitable. shouldFoldConstantShiftPairToMask(const SDNode * N,CombineLevel Level)727 virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, 728 CombineLevel Level) const { 729 return true; 730 } 731 732 /// Should we tranform the IR-optimal check for whether given truncation 733 /// down into KeptBits would be truncating or not: 734 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) 735 /// Into it's more traditional form: 736 /// ((%x << C) a>> C) dstcond %x 737 /// Return true if we should transform. 738 /// Return false if there is no preference. shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)739 virtual bool shouldTransformSignedTruncationCheck(EVT XVT, 740 unsigned KeptBits) const { 741 // By default, let's assume that no one prefers shifts. 742 return false; 743 } 744 745 /// Given the pattern 746 /// (X & (C l>>/<< Y)) ==/!= 0 747 /// return true if it should be transformed into: 748 /// ((X <</l>> Y) & C) ==/!= 0 749 /// WARNING: if 'X' is a constant, the fold may deadlock! 750 /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat() 751 /// here because it can end up being not linked in. shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X,ConstantSDNode * XC,ConstantSDNode * CC,SDValue Y,unsigned OldShiftOpcode,unsigned NewShiftOpcode,SelectionDAG & DAG)752 virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 753 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 754 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 755 SelectionDAG &DAG) const { 756 if (hasBitTest(X, Y)) { 757 // One interesting pattern that we'd want to form is 'bit test': 758 // ((1 << Y) & C) ==/!= 0 759 // But we also need to be careful not to try to reverse that fold. 760 761 // Is this '1 << Y' ? 762 if (OldShiftOpcode == ISD::SHL && CC->isOne()) 763 return false; // Keep the 'bit test' pattern. 764 765 // Will it be '1 << Y' after the transform ? 766 if (XC && NewShiftOpcode == ISD::SHL && XC->isOne()) 767 return true; // Do form the 'bit test' pattern. 768 } 769 770 // If 'X' is a constant, and we transform, then we will immediately 771 // try to undo the fold, thus causing endless combine loop. 772 // So by default, let's assume everyone prefers the fold 773 // iff 'X' is not a constant. 774 return !XC; 775 } 776 777 /// These two forms are equivalent: 778 /// sub %y, (xor %x, -1) 779 /// add (add %x, 1), %y 780 /// The variant with two add's is IR-canonical. 781 /// Some targets may prefer one to the other. preferIncOfAddToSubOfNot(EVT VT)782 virtual bool preferIncOfAddToSubOfNot(EVT VT) const { 783 // By default, let's assume that everyone prefers the form with two add's. 784 return true; 785 } 786 787 /// Return true if the target wants to use the optimization that 788 /// turns ext(promotableInst1(...(promotableInstN(load)))) into 789 /// promotedInst1(...(promotedInstN(ext(load)))). enableExtLdPromotion()790 bool enableExtLdPromotion() const { return EnableExtLdPromotion; } 791 792 /// Return true if the target can combine store(extractelement VectorTy, 793 /// Idx). 794 /// \p Cost[out] gives the cost of that transformation when this is true. canCombineStoreAndExtract(Type * VectorTy,Value * Idx,unsigned & Cost)795 virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, 796 unsigned &Cost) const { 797 return false; 798 } 799 800 /// Return true if inserting a scalar into a variable element of an undef 801 /// vector is more efficiently handled by splatting the scalar instead. shouldSplatInsEltVarIndex(EVT)802 virtual bool shouldSplatInsEltVarIndex(EVT) const { 803 return false; 804 } 805 806 /// Return true if target always beneficiates from combining into FMA for a 807 /// given value type. This must typically return false on targets where FMA 808 /// takes more cycles to execute than FADD. enableAggressiveFMAFusion(EVT VT)809 virtual bool enableAggressiveFMAFusion(EVT VT) const { 810 return false; 811 } 812 813 /// Return the ValueType of the result of SETCC operations. 814 virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 815 EVT VT) const; 816 817 /// Return the ValueType for comparison libcalls. Comparions libcalls include 818 /// floating point comparion calls, and Ordered/Unordered check calls on 819 /// floating point numbers. 820 virtual 821 MVT::SimpleValueType getCmpLibcallReturnType() const; 822 823 /// For targets without i1 registers, this gives the nature of the high-bits 824 /// of boolean values held in types wider than i1. 825 /// 826 /// "Boolean values" are special true/false values produced by nodes like 827 /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND. 828 /// Not to be confused with general values promoted from i1. Some cpus 829 /// distinguish between vectors of boolean and scalars; the isVec parameter 830 /// selects between the two kinds. For example on X86 a scalar boolean should 831 /// be zero extended from i1, while the elements of a vector of booleans 832 /// should be sign extended from i1. 833 /// 834 /// Some cpus also treat floating point types the same way as they treat 835 /// vectors instead of the way they treat scalars. getBooleanContents(bool isVec,bool isFloat)836 BooleanContent getBooleanContents(bool isVec, bool isFloat) const { 837 if (isVec) 838 return BooleanVectorContents; 839 return isFloat ? BooleanFloatContents : BooleanContents; 840 } 841 getBooleanContents(EVT Type)842 BooleanContent getBooleanContents(EVT Type) const { 843 return getBooleanContents(Type.isVector(), Type.isFloatingPoint()); 844 } 845 846 /// Return target scheduling preference. getSchedulingPreference()847 Sched::Preference getSchedulingPreference() const { 848 return SchedPreferenceInfo; 849 } 850 851 /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics 852 /// for different nodes. This function returns the preference (or none) for 853 /// the given node. getSchedulingPreference(SDNode *)854 virtual Sched::Preference getSchedulingPreference(SDNode *) const { 855 return Sched::None; 856 } 857 858 /// Return the register class that should be used for the specified value 859 /// type. 860 virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const { 861 (void)isDivergent; 862 const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; 863 assert(RC && "This value type is not natively supported!"); 864 return RC; 865 } 866 867 /// Allows target to decide about the register class of the 868 /// specific value that is live outside the defining block. 869 /// Returns true if the value needs uniform register class. requiresUniformRegister(MachineFunction & MF,const Value *)870 virtual bool requiresUniformRegister(MachineFunction &MF, 871 const Value *) const { 872 return false; 873 } 874 875 /// Return the 'representative' register class for the specified value 876 /// type. 877 /// 878 /// The 'representative' register class is the largest legal super-reg 879 /// register class for the register class of the value type. For example, on 880 /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep 881 /// register class is GR64 on x86_64. getRepRegClassFor(MVT VT)882 virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const { 883 const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy]; 884 return RC; 885 } 886 887 /// Return the cost of the 'representative' register class for the specified 888 /// value type. getRepRegClassCostFor(MVT VT)889 virtual uint8_t getRepRegClassCostFor(MVT VT) const { 890 return RepRegClassCostForVT[VT.SimpleTy]; 891 } 892 893 /// Return true if SHIFT instructions should be expanded to SHIFT_PARTS 894 /// instructions, and false if a library call is preferred (e.g for code-size 895 /// reasons). shouldExpandShift(SelectionDAG & DAG,SDNode * N)896 virtual bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const { 897 return true; 898 } 899 900 /// Return true if the target has native support for the specified value type. 901 /// This means that it has a register that directly holds it without 902 /// promotions or expansions. isTypeLegal(EVT VT)903 bool isTypeLegal(EVT VT) const { 904 assert(!VT.isSimple() || 905 (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)); 906 return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr; 907 } 908 909 class ValueTypeActionImpl { 910 /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum 911 /// that indicates how instruction selection should deal with the type. 912 LegalizeTypeAction ValueTypeActions[MVT::LAST_VALUETYPE]; 913 914 public: ValueTypeActionImpl()915 ValueTypeActionImpl() { 916 std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions), 917 TypeLegal); 918 } 919 getTypeAction(MVT VT)920 LegalizeTypeAction getTypeAction(MVT VT) const { 921 return ValueTypeActions[VT.SimpleTy]; 922 } 923 setTypeAction(MVT VT,LegalizeTypeAction Action)924 void setTypeAction(MVT VT, LegalizeTypeAction Action) { 925 ValueTypeActions[VT.SimpleTy] = Action; 926 } 927 }; 928 getValueTypeActions()929 const ValueTypeActionImpl &getValueTypeActions() const { 930 return ValueTypeActions; 931 } 932 933 /// Return how we should legalize values of this type, either it is already 934 /// legal (return 'Legal') or we need to promote it to a larger type (return 935 /// 'Promote'), or we need to expand it into multiple registers of smaller 936 /// integer type (return 'Expand'). 'Custom' is not an option. getTypeAction(LLVMContext & Context,EVT VT)937 LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const { 938 return getTypeConversion(Context, VT).first; 939 } getTypeAction(MVT VT)940 LegalizeTypeAction getTypeAction(MVT VT) const { 941 return ValueTypeActions.getTypeAction(VT); 942 } 943 944 /// For types supported by the target, this is an identity function. For 945 /// types that must be promoted to larger types, this returns the larger type 946 /// to promote to. For integer types that are larger than the largest integer 947 /// register, this contains one step in the expansion to get to the smaller 948 /// register. For illegal floating point types, this returns the integer type 949 /// to transform to. getTypeToTransformTo(LLVMContext & Context,EVT VT)950 EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const { 951 return getTypeConversion(Context, VT).second; 952 } 953 954 /// For types supported by the target, this is an identity function. For 955 /// types that must be expanded (i.e. integer types that are larger than the 956 /// largest integer register or illegal floating point types), this returns 957 /// the largest legal type it will be expanded to. getTypeToExpandTo(LLVMContext & Context,EVT VT)958 EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const { 959 assert(!VT.isVector()); 960 while (true) { 961 switch (getTypeAction(Context, VT)) { 962 case TypeLegal: 963 return VT; 964 case TypeExpandInteger: 965 VT = getTypeToTransformTo(Context, VT); 966 break; 967 default: 968 llvm_unreachable("Type is not legal nor is it to be expanded!"); 969 } 970 } 971 } 972 973 /// Vector types are broken down into some number of legal first class types. 974 /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8 975 /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64 976 /// turns into 4 EVT::i32 values with both PPC and X86. 977 /// 978 /// This method returns the number of registers needed, and the VT for each 979 /// register. It also returns the VT and quantity of the intermediate values 980 /// before they are promoted/expanded. 981 unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT, 982 EVT &IntermediateVT, 983 unsigned &NumIntermediates, 984 MVT &RegisterVT) const; 985 986 /// Certain targets such as MIPS require that some types such as vectors are 987 /// always broken down into scalars in some contexts. This occurs even if the 988 /// vector type is legal. getVectorTypeBreakdownForCallingConv(LLVMContext & Context,CallingConv::ID CC,EVT VT,EVT & IntermediateVT,unsigned & NumIntermediates,MVT & RegisterVT)989 virtual unsigned getVectorTypeBreakdownForCallingConv( 990 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, 991 unsigned &NumIntermediates, MVT &RegisterVT) const { 992 return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates, 993 RegisterVT); 994 } 995 996 struct IntrinsicInfo { 997 unsigned opc = 0; // target opcode 998 EVT memVT; // memory VT 999 1000 // value representing memory location 1001 PointerUnion<const Value *, const PseudoSourceValue *> ptrVal; 1002 1003 int offset = 0; // offset off of ptrVal 1004 uint64_t size = 0; // the size of the memory location 1005 // (taken from memVT if zero) 1006 MaybeAlign align = Align(1); // alignment 1007 1008 MachineMemOperand::Flags flags = MachineMemOperand::MONone; 1009 IntrinsicInfo() = default; 1010 }; 1011 1012 /// Given an intrinsic, checks if on the target the intrinsic will need to map 1013 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns 1014 /// true and store the intrinsic information into the IntrinsicInfo that was 1015 /// passed to the function. getTgtMemIntrinsic(IntrinsicInfo &,const CallInst &,MachineFunction &,unsigned)1016 virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, 1017 MachineFunction &, 1018 unsigned /*Intrinsic*/) const { 1019 return false; 1020 } 1021 1022 /// Returns true if the target can instruction select the specified FP 1023 /// immediate natively. If false, the legalizer will materialize the FP 1024 /// immediate as a load from a constant pool. 1025 virtual bool isFPImmLegal(const APFloat & /*Imm*/, EVT /*VT*/, 1026 bool ForCodeSize = false) const { 1027 return false; 1028 } 1029 1030 /// Targets can use this to indicate that they only support *some* 1031 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a 1032 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be 1033 /// legal. isShuffleMaskLegal(ArrayRef<int>,EVT)1034 virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const { 1035 return true; 1036 } 1037 1038 /// Returns true if the operation can trap for the value type. 1039 /// 1040 /// VT must be a legal type. By default, we optimistically assume most 1041 /// operations don't trap except for integer divide and remainder. 1042 virtual bool canOpTrap(unsigned Op, EVT VT) const; 1043 1044 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there 1045 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a 1046 /// constant pool entry. isVectorClearMaskLegal(ArrayRef<int>,EVT)1047 virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/, 1048 EVT /*VT*/) const { 1049 return false; 1050 } 1051 1052 /// Return how this operation should be treated: either it is legal, needs to 1053 /// be promoted to a larger size, needs to be expanded to some other code 1054 /// sequence, or the target has a custom expander for it. getOperationAction(unsigned Op,EVT VT)1055 LegalizeAction getOperationAction(unsigned Op, EVT VT) const { 1056 if (VT.isExtended()) return Expand; 1057 // If a target-specific SDNode requires legalization, require the target 1058 // to provide custom legalization for it. 1059 if (Op >= array_lengthof(OpActions[0])) return Custom; 1060 return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; 1061 } 1062 1063 /// Custom method defined by each target to indicate if an operation which 1064 /// may require a scale is supported natively by the target. 1065 /// If not, the operation is illegal. isSupportedFixedPointOperation(unsigned Op,EVT VT,unsigned Scale)1066 virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT, 1067 unsigned Scale) const { 1068 return false; 1069 } 1070 1071 /// Some fixed point operations may be natively supported by the target but 1072 /// only for specific scales. This method allows for checking 1073 /// if the width is supported by the target for a given operation that may 1074 /// depend on scale. getFixedPointOperationAction(unsigned Op,EVT VT,unsigned Scale)1075 LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT, 1076 unsigned Scale) const { 1077 auto Action = getOperationAction(Op, VT); 1078 if (Action != Legal) 1079 return Action; 1080 1081 // This operation is supported in this type but may only work on specific 1082 // scales. 1083 bool Supported; 1084 switch (Op) { 1085 default: 1086 llvm_unreachable("Unexpected fixed point operation."); 1087 case ISD::SMULFIX: 1088 case ISD::SMULFIXSAT: 1089 case ISD::UMULFIX: 1090 case ISD::UMULFIXSAT: 1091 case ISD::SDIVFIX: 1092 case ISD::SDIVFIXSAT: 1093 case ISD::UDIVFIX: 1094 case ISD::UDIVFIXSAT: 1095 Supported = isSupportedFixedPointOperation(Op, VT, Scale); 1096 break; 1097 } 1098 1099 return Supported ? Action : Expand; 1100 } 1101 1102 // If Op is a strict floating-point operation, return the result 1103 // of getOperationAction for the equivalent non-strict operation. getStrictFPOperationAction(unsigned Op,EVT VT)1104 LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const { 1105 unsigned EqOpc; 1106 switch (Op) { 1107 default: llvm_unreachable("Unexpected FP pseudo-opcode"); 1108 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ 1109 case ISD::STRICT_##DAGN: EqOpc = ISD::DAGN; break; 1110 #define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ 1111 case ISD::STRICT_##DAGN: EqOpc = ISD::SETCC; break; 1112 #include "llvm/IR/ConstrainedOps.def" 1113 } 1114 1115 return getOperationAction(EqOpc, VT); 1116 } 1117 1118 /// Return true if the specified operation is legal on this target or can be 1119 /// made legal with custom lowering. This is used to help guide high-level 1120 /// lowering decisions. isOperationLegalOrCustom(unsigned Op,EVT VT)1121 bool isOperationLegalOrCustom(unsigned Op, EVT VT) const { 1122 return (VT == MVT::Other || isTypeLegal(VT)) && 1123 (getOperationAction(Op, VT) == Legal || 1124 getOperationAction(Op, VT) == Custom); 1125 } 1126 1127 /// Return true if the specified operation is legal on this target or can be 1128 /// made legal using promotion. This is used to help guide high-level lowering 1129 /// decisions. isOperationLegalOrPromote(unsigned Op,EVT VT)1130 bool isOperationLegalOrPromote(unsigned Op, EVT VT) const { 1131 return (VT == MVT::Other || isTypeLegal(VT)) && 1132 (getOperationAction(Op, VT) == Legal || 1133 getOperationAction(Op, VT) == Promote); 1134 } 1135 1136 /// Return true if the specified operation is legal on this target or can be 1137 /// made legal with custom lowering or using promotion. This is used to help 1138 /// guide high-level lowering decisions. isOperationLegalOrCustomOrPromote(unsigned Op,EVT VT)1139 bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const { 1140 return (VT == MVT::Other || isTypeLegal(VT)) && 1141 (getOperationAction(Op, VT) == Legal || 1142 getOperationAction(Op, VT) == Custom || 1143 getOperationAction(Op, VT) == Promote); 1144 } 1145 1146 /// Return true if the operation uses custom lowering, regardless of whether 1147 /// the type is legal or not. isOperationCustom(unsigned Op,EVT VT)1148 bool isOperationCustom(unsigned Op, EVT VT) const { 1149 return getOperationAction(Op, VT) == Custom; 1150 } 1151 1152 /// Return true if lowering to a jump table is allowed. areJTsAllowed(const Function * Fn)1153 virtual bool areJTsAllowed(const Function *Fn) const { 1154 if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true") 1155 return false; 1156 1157 return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || 1158 isOperationLegalOrCustom(ISD::BRIND, MVT::Other); 1159 } 1160 1161 /// Check whether the range [Low,High] fits in a machine word. rangeFitsInWord(const APInt & Low,const APInt & High,const DataLayout & DL)1162 bool rangeFitsInWord(const APInt &Low, const APInt &High, 1163 const DataLayout &DL) const { 1164 // FIXME: Using the pointer type doesn't seem ideal. 1165 // XXXAR: AS0 hardcoded 1166 uint64_t BW = DL.getIndexSizeInBits(0u); 1167 uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; 1168 return Range <= BW; 1169 } 1170 1171 /// Return true if lowering to a jump table is suitable for a set of case 1172 /// clusters which may contain \p NumCases cases, \p Range range of values. 1173 virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, 1174 uint64_t Range, ProfileSummaryInfo *PSI, 1175 BlockFrequencyInfo *BFI) const; 1176 1177 /// Return true if lowering to a bit test is suitable for a set of case 1178 /// clusters which contains \p NumDests unique destinations, \p Low and 1179 /// \p High as its lowest and highest case values, and expects \p NumCmps 1180 /// case value comparisons. Check if the number of destinations, comparison 1181 /// metric, and range are all suitable. isSuitableForBitTests(unsigned NumDests,unsigned NumCmps,const APInt & Low,const APInt & High,const DataLayout & DL)1182 bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, 1183 const APInt &Low, const APInt &High, 1184 const DataLayout &DL) const { 1185 // FIXME: I don't think NumCmps is the correct metric: a single case and a 1186 // range of cases both require only one branch to lower. Just looking at the 1187 // number of clusters and destinations should be enough to decide whether to 1188 // build bit tests. 1189 1190 // To lower a range with bit tests, the range must fit the bitwidth of a 1191 // machine word. 1192 if (!rangeFitsInWord(Low, High, DL)) 1193 return false; 1194 1195 // Decide whether it's profitable to lower this range with bit tests. Each 1196 // destination requires a bit test and branch, and there is an overall range 1197 // check branch. For a small number of clusters, separate comparisons might 1198 // be cheaper, and for many destinations, splitting the range might be 1199 // better. 1200 return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) || 1201 (NumDests == 3 && NumCmps >= 6); 1202 } 1203 1204 /// Return true if the specified operation is illegal on this target or 1205 /// unlikely to be made legal with custom lowering. This is used to help guide 1206 /// high-level lowering decisions. isOperationExpand(unsigned Op,EVT VT)1207 bool isOperationExpand(unsigned Op, EVT VT) const { 1208 return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand); 1209 } 1210 1211 /// Return true if the specified operation is legal on this target. isOperationLegal(unsigned Op,EVT VT)1212 bool isOperationLegal(unsigned Op, EVT VT) const { 1213 return (VT == MVT::Other || isTypeLegal(VT)) && 1214 getOperationAction(Op, VT) == Legal; 1215 } 1216 1217 /// Return how this load with extension should be treated: either it is legal, 1218 /// needs to be promoted to a larger size, needs to be expanded to some other 1219 /// code sequence, or the target has a custom expander for it. getLoadExtAction(unsigned ExtType,EVT ValVT,EVT MemVT)1220 LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, 1221 EVT MemVT) const { 1222 if (ValVT.isExtended() || MemVT.isExtended()) return Expand; 1223 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; 1224 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; 1225 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && 1226 MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!"); 1227 unsigned Shift = 4 * ExtType; 1228 return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf); 1229 } 1230 1231 /// Return true if the specified load with extension is legal on this target. isLoadExtLegal(unsigned ExtType,EVT ValVT,EVT MemVT)1232 bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const { 1233 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal; 1234 } 1235 1236 /// Return true if the specified load with extension is legal or custom 1237 /// on this target. isLoadExtLegalOrCustom(unsigned ExtType,EVT ValVT,EVT MemVT)1238 bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const { 1239 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal || 1240 getLoadExtAction(ExtType, ValVT, MemVT) == Custom; 1241 } 1242 1243 /// Return how this store with truncation should be treated: either it is 1244 /// legal, needs to be promoted to a larger size, needs to be expanded to some 1245 /// other code sequence, or the target has a custom expander for it. getTruncStoreAction(EVT ValVT,EVT MemVT)1246 LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const { 1247 if (ValVT.isExtended() || MemVT.isExtended()) return Expand; 1248 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; 1249 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; 1250 assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && 1251 "Table isn't big enough!"); 1252 return TruncStoreActions[ValI][MemI]; 1253 } 1254 1255 /// Return true if the specified store with truncation is legal on this 1256 /// target. isTruncStoreLegal(EVT ValVT,EVT MemVT)1257 bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const { 1258 return isTypeLegal(ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal; 1259 } 1260 1261 /// Return true if the specified store with truncation has solution on this 1262 /// target. isTruncStoreLegalOrCustom(EVT ValVT,EVT MemVT)1263 bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const { 1264 return isTypeLegal(ValVT) && 1265 (getTruncStoreAction(ValVT, MemVT) == Legal || 1266 getTruncStoreAction(ValVT, MemVT) == Custom); 1267 } 1268 1269 /// Return how the indexed load should be treated: either it is legal, needs 1270 /// to be promoted to a larger size, needs to be expanded to some other code 1271 /// sequence, or the target has a custom expander for it. getIndexedLoadAction(unsigned IdxMode,MVT VT)1272 LegalizeAction getIndexedLoadAction(unsigned IdxMode, MVT VT) const { 1273 return getIndexedModeAction(IdxMode, VT, IMAB_Load); 1274 } 1275 1276 /// Return true if the specified indexed load is legal on this target. isIndexedLoadLegal(unsigned IdxMode,EVT VT)1277 bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const { 1278 return VT.isSimple() && 1279 (getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Legal || 1280 getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Custom); 1281 } 1282 1283 /// Return how the indexed store should be treated: either it is legal, needs 1284 /// to be promoted to a larger size, needs to be expanded to some other code 1285 /// sequence, or the target has a custom expander for it. getIndexedStoreAction(unsigned IdxMode,MVT VT)1286 LegalizeAction getIndexedStoreAction(unsigned IdxMode, MVT VT) const { 1287 return getIndexedModeAction(IdxMode, VT, IMAB_Store); 1288 } 1289 1290 /// Return true if the specified indexed load is legal on this target. isIndexedStoreLegal(unsigned IdxMode,EVT VT)1291 bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const { 1292 return VT.isSimple() && 1293 (getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Legal || 1294 getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom); 1295 } 1296 1297 /// Return how the indexed load should be treated: either it is legal, needs 1298 /// to be promoted to a larger size, needs to be expanded to some other code 1299 /// sequence, or the target has a custom expander for it. getIndexedMaskedLoadAction(unsigned IdxMode,MVT VT)1300 LegalizeAction getIndexedMaskedLoadAction(unsigned IdxMode, MVT VT) const { 1301 return getIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad); 1302 } 1303 1304 /// Return true if the specified indexed load is legal on this target. isIndexedMaskedLoadLegal(unsigned IdxMode,EVT VT)1305 bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const { 1306 return VT.isSimple() && 1307 (getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Legal || 1308 getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Custom); 1309 } 1310 1311 /// Return how the indexed store should be treated: either it is legal, needs 1312 /// to be promoted to a larger size, needs to be expanded to some other code 1313 /// sequence, or the target has a custom expander for it. getIndexedMaskedStoreAction(unsigned IdxMode,MVT VT)1314 LegalizeAction getIndexedMaskedStoreAction(unsigned IdxMode, MVT VT) const { 1315 return getIndexedModeAction(IdxMode, VT, IMAB_MaskedStore); 1316 } 1317 1318 /// Return true if the specified indexed load is legal on this target. isIndexedMaskedStoreLegal(unsigned IdxMode,EVT VT)1319 bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const { 1320 return VT.isSimple() && 1321 (getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Legal || 1322 getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Custom); 1323 } 1324 1325 /// Return how the condition code should be treated: either it is legal, needs 1326 /// to be expanded to some other code sequence, or the target has a custom 1327 /// expander for it. 1328 LegalizeAction getCondCodeAction(ISD::CondCode CC,MVT VT)1329 getCondCodeAction(ISD::CondCode CC, MVT VT) const { 1330 assert((unsigned)CC < array_lengthof(CondCodeActions) && 1331 ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && 1332 "Table isn't big enough!"); 1333 // See setCondCodeAction for how this is encoded. 1334 uint32_t Shift = 4 * (VT.SimpleTy & 0x7); 1335 uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3]; 1336 LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF); 1337 assert(Action != Promote && "Can't promote condition code!"); 1338 return Action; 1339 } 1340 1341 /// Return true if the specified condition code is legal on this target. isCondCodeLegal(ISD::CondCode CC,MVT VT)1342 bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const { 1343 return getCondCodeAction(CC, VT) == Legal; 1344 } 1345 1346 /// Return true if the specified condition code is legal or custom on this 1347 /// target. isCondCodeLegalOrCustom(ISD::CondCode CC,MVT VT)1348 bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const { 1349 return getCondCodeAction(CC, VT) == Legal || 1350 getCondCodeAction(CC, VT) == Custom; 1351 } 1352 1353 /// If the action for this operation is to promote, this method returns the 1354 /// ValueType to promote to. getTypeToPromoteTo(unsigned Op,MVT VT)1355 MVT getTypeToPromoteTo(unsigned Op, MVT VT) const { 1356 assert(getOperationAction(Op, VT) == Promote && 1357 "This operation isn't promoted!"); 1358 1359 // See if this has an explicit type specified. 1360 std::map<std::pair<unsigned, MVT::SimpleValueType>, 1361 MVT::SimpleValueType>::const_iterator PTTI = 1362 PromoteToType.find(std::make_pair(Op, VT.SimpleTy)); 1363 if (PTTI != PromoteToType.end()) return PTTI->second; 1364 1365 assert((VT.isInteger() || VT.isFloatingPoint()) && 1366 "Cannot autopromote this type, add it with AddPromotedToType."); 1367 1368 MVT NVT = VT; 1369 do { 1370 NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1); 1371 assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && 1372 "Didn't find type to promote to!"); 1373 } while (!isTypeLegal(NVT) || 1374 getOperationAction(Op, NVT) == Promote); 1375 return NVT; 1376 } 1377 1378 /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM 1379 /// operations except for the pointer size. If AllowUnknown is true, this 1380 /// will return MVT::Other for types with no EVT counterpart (e.g. structs), 1381 /// otherwise it will assert. 1382 EVT getValueType(const DataLayout &DL, Type *Ty, 1383 bool AllowUnknown = false) const { 1384 // Lower scalar pointers to native pointer types. 1385 if (auto *PTy = dyn_cast<PointerType>(Ty)) 1386 return getPointerTy(DL, PTy->getAddressSpace()); 1387 1388 if (auto *VTy = dyn_cast<VectorType>(Ty)) { 1389 Type *EltTy = VTy->getElementType(); 1390 // Lower vectors of pointers to native pointer types. 1391 EVT ElemVT; 1392 if (auto *PTy = dyn_cast<PointerType>(EltTy)) { 1393 ElemVT = getPointerTy(DL, PTy->getAddressSpace()); 1394 } else { 1395 ElemVT = EVT::getEVT(EltTy, false); 1396 } 1397 assert(!ElemVT.isOverloaded() && "Should not get an overloaded EVT here"); 1398 return EVT::getVectorVT(Ty->getContext(), ElemVT, VTy->getElementCount()); 1399 } 1400 1401 return EVT::getEVT(Ty, AllowUnknown); 1402 } 1403 1404 EVT getMemValueType(const DataLayout &DL, Type *Ty, 1405 bool AllowUnknown = false) const { 1406 // Lower scalar pointers to native pointer types. 1407 if (PointerType *PTy = dyn_cast<PointerType>(Ty)) 1408 return getPointerMemTy(DL, PTy->getAddressSpace()); 1409 else if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { 1410 Type *Elm = VTy->getElementType(); 1411 if (PointerType *PT = dyn_cast<PointerType>(Elm)) { 1412 EVT PointerTy(getPointerMemTy(DL, PT->getAddressSpace())); 1413 Elm = PointerTy.getTypeForEVT(Ty->getContext()); 1414 } 1415 return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false), 1416 VTy->getElementCount()); 1417 } 1418 1419 return getValueType(DL, Ty, AllowUnknown); 1420 } 1421 1422 1423 /// Return the MVT corresponding to this LLVM type. See getValueType. 1424 MVT getSimpleValueType(const DataLayout &DL, Type *Ty, 1425 bool AllowUnknown = false) const { 1426 return getValueType(DL, Ty, AllowUnknown).getSimpleVT(); 1427 } 1428 1429 /// Return the desired alignment for ByVal or InAlloca aggregate function 1430 /// arguments in the caller parameter area. This is the actual alignment, not 1431 /// its logarithm. 1432 virtual unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const; 1433 1434 /// Return the type of registers that this ValueType will eventually require. getRegisterType(MVT VT)1435 MVT getRegisterType(MVT VT) const { 1436 assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT)); 1437 return RegisterTypeForVT[VT.SimpleTy]; 1438 } 1439 1440 /// Return the type of registers that this ValueType will eventually require. getRegisterType(LLVMContext & Context,EVT VT)1441 MVT getRegisterType(LLVMContext &Context, EVT VT) const { 1442 if (VT.isSimple()) { 1443 assert((unsigned)VT.getSimpleVT().SimpleTy < 1444 array_lengthof(RegisterTypeForVT)); 1445 return RegisterTypeForVT[VT.getSimpleVT().SimpleTy]; 1446 } 1447 if (VT.isVector()) { 1448 EVT VT1; 1449 MVT RegisterVT; 1450 unsigned NumIntermediates; 1451 (void)getVectorTypeBreakdown(Context, VT, VT1, 1452 NumIntermediates, RegisterVT); 1453 return RegisterVT; 1454 } 1455 if (VT.isInteger()) { 1456 return getRegisterType(Context, getTypeToTransformTo(Context, VT)); 1457 } 1458 llvm_unreachable("Unsupported extended type!"); 1459 } 1460 1461 /// Return the number of registers that this ValueType will eventually 1462 /// require. 1463 /// 1464 /// This is one for any types promoted to live in larger registers, but may be 1465 /// more than one for types (like i64) that are split into pieces. For types 1466 /// like i140, which are first promoted then expanded, it is the number of 1467 /// registers needed to hold all the bits of the original type. For an i140 1468 /// on a 32 bit machine this means 5 registers. getNumRegisters(LLVMContext & Context,EVT VT)1469 unsigned getNumRegisters(LLVMContext &Context, EVT VT) const { 1470 if (VT.isSimple()) { 1471 assert((unsigned)VT.getSimpleVT().SimpleTy < 1472 array_lengthof(NumRegistersForVT)); 1473 return NumRegistersForVT[VT.getSimpleVT().SimpleTy]; 1474 } 1475 if (VT.isVector()) { 1476 EVT VT1; 1477 MVT VT2; 1478 unsigned NumIntermediates; 1479 return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2); 1480 } 1481 if (VT.isInteger()) { 1482 unsigned BitWidth = VT.getSizeInBits(); 1483 unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits(); 1484 return (BitWidth + RegWidth - 1) / RegWidth; 1485 } 1486 llvm_unreachable("Unsupported extended type!"); 1487 } 1488 1489 /// Certain combinations of ABIs, Targets and features require that types 1490 /// are legal for some operations and not for other operations. 1491 /// For MIPS all vector types must be passed through the integer register set. getRegisterTypeForCallingConv(LLVMContext & Context,CallingConv::ID CC,EVT VT)1492 virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, 1493 CallingConv::ID CC, EVT VT) const { 1494 return getRegisterType(Context, VT); 1495 } 1496 1497 /// Certain targets require unusual breakdowns of certain types. For MIPS, 1498 /// this occurs when a vector type is used, as vector are passed through the 1499 /// integer register set. getNumRegistersForCallingConv(LLVMContext & Context,CallingConv::ID CC,EVT VT)1500 virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, 1501 CallingConv::ID CC, 1502 EVT VT) const { 1503 return getNumRegisters(Context, VT); 1504 } 1505 1506 /// Certain targets have context senstive alignment requirements, where one 1507 /// type has the alignment requirement of another type. getABIAlignmentForCallingConv(Type * ArgTy,DataLayout DL)1508 virtual Align getABIAlignmentForCallingConv(Type *ArgTy, 1509 DataLayout DL) const { 1510 return DL.getABITypeAlign(ArgTy); 1511 } 1512 1513 /// If true, then instruction selection should seek to shrink the FP constant 1514 /// of the specified type to a smaller type in order to save space and / or 1515 /// reduce runtime. ShouldShrinkFPConstant(EVT)1516 virtual bool ShouldShrinkFPConstant(EVT) const { return true; } 1517 1518 /// Return true if it is profitable to reduce a load to a smaller type. 1519 /// Example: (i16 (trunc (i32 (load x))) -> i16 load x shouldReduceLoadWidth(SDNode * Load,ISD::LoadExtType ExtTy,EVT NewVT)1520 virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 1521 EVT NewVT) const { 1522 // By default, assume that it is cheaper to extract a subvector from a wide 1523 // vector load rather than creating multiple narrow vector loads. 1524 if (NewVT.isVector() && !Load->hasOneUse()) 1525 return false; 1526 1527 return true; 1528 } 1529 1530 /// When splitting a value of the specified type into parts, does the Lo 1531 /// or Hi part come first? This usually follows the endianness, except 1532 /// for ppcf128, where the Hi part always comes first. hasBigEndianPartOrdering(EVT VT,const DataLayout & DL)1533 bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const { 1534 return DL.isBigEndian() || VT == MVT::ppcf128; 1535 } 1536 1537 /// If true, the target has custom DAG combine transformations that it can 1538 /// perform for the specified node. hasTargetDAGCombine(ISD::NodeType NT)1539 bool hasTargetDAGCombine(ISD::NodeType NT) const { 1540 assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)); 1541 return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7)); 1542 } 1543 getGatherAllAliasesMaxDepth()1544 unsigned getGatherAllAliasesMaxDepth() const { 1545 return GatherAllAliasesMaxDepth; 1546 } 1547 1548 /// Returns the size of the platform's va_list object. getVaListSizeInBits(const DataLayout & DL,unsigned AS)1549 virtual unsigned getVaListSizeInBits(const DataLayout &DL, 1550 unsigned AS) const { 1551 return getPointerTy(DL, AS).getSizeInBits(); 1552 } 1553 1554 /// Get maximum # of store operations permitted for llvm.memset 1555 /// 1556 /// This function returns the maximum number of store operations permitted 1557 /// to replace a call to llvm.memset. The value is set by the target at the 1558 /// performance threshold for such a replacement. If OptSize is true, 1559 /// return the limit for functions that have OptSize attribute. getMaxStoresPerMemset(bool OptSize)1560 unsigned getMaxStoresPerMemset(bool OptSize) const { 1561 return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset; 1562 } 1563 1564 /// Get maximum # of store operations permitted for llvm.memcpy 1565 /// 1566 /// This function returns the maximum number of store operations permitted 1567 /// to replace a call to llvm.memcpy. The value is set by the target at the 1568 /// performance threshold for such a replacement. If OptSize is true, 1569 /// return the limit for functions that have OptSize attribute. getMaxStoresPerMemcpy(bool OptSize)1570 unsigned getMaxStoresPerMemcpy(bool OptSize) const { 1571 return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy; 1572 } 1573 1574 /// \brief Get maximum # of store operations to be glued together 1575 /// 1576 /// This function returns the maximum number of store operations permitted 1577 /// to glue together during lowering of llvm.memcpy. The value is set by 1578 // the target at the performance threshold for such a replacement. getMaxGluedStoresPerMemcpy()1579 virtual unsigned getMaxGluedStoresPerMemcpy() const { 1580 return MaxGluedStoresPerMemcpy; 1581 } 1582 1583 /// Get maximum # of load operations permitted for memcmp 1584 /// 1585 /// This function returns the maximum number of load operations permitted 1586 /// to replace a call to memcmp. The value is set by the target at the 1587 /// performance threshold for such a replacement. If OptSize is true, 1588 /// return the limit for functions that have OptSize attribute. getMaxExpandSizeMemcmp(bool OptSize)1589 unsigned getMaxExpandSizeMemcmp(bool OptSize) const { 1590 return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp; 1591 } 1592 1593 /// Get maximum # of store operations permitted for llvm.memmove 1594 /// 1595 /// This function returns the maximum number of store operations permitted 1596 /// to replace a call to llvm.memmove. The value is set by the target at the 1597 /// performance threshold for such a replacement. If OptSize is true, 1598 /// return the limit for functions that have OptSize attribute. getMaxStoresPerMemmove(bool OptSize)1599 unsigned getMaxStoresPerMemmove(bool OptSize) const { 1600 return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove; 1601 } 1602 1603 /// Determine if the target supports unaligned memory accesses. 1604 /// 1605 /// This function returns true if the target allows unaligned memory accesses 1606 /// of the specified type in the given address space. If true, it also returns 1607 /// whether the unaligned memory access is "fast" in the last argument by 1608 /// reference. This is used, for example, in situations where an array 1609 /// copy/move/set is converted to a sequence of store operations. Its use 1610 /// helps to ensure that such replacements don't generate code that causes an 1611 /// alignment error (trap) on the target machine. 1612 virtual bool allowsMisalignedMemoryAccesses( 1613 EVT, unsigned AddrSpace = 0, unsigned Align = 1, 1614 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 1615 bool * /*Fast*/ = nullptr) const { 1616 return false; 1617 } 1618 1619 /// LLT handling variant. 1620 virtual bool allowsMisalignedMemoryAccesses( 1621 LLT, unsigned AddrSpace = 0, Align Alignment = Align(1), 1622 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 1623 bool * /*Fast*/ = nullptr) const { 1624 return false; 1625 } 1626 1627 /// This function returns true if the memory access is aligned or if the 1628 /// target allows this specific unaligned memory access. If the access is 1629 /// allowed, the optional final parameter returns if the access is also fast 1630 /// (as defined by the target). 1631 bool allowsMemoryAccessForAlignment( 1632 LLVMContext &Context, const DataLayout &DL, EVT VT, 1633 unsigned AddrSpace = 0, Align Alignment = Align(1), 1634 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 1635 bool *Fast = nullptr) const; 1636 1637 /// Return true if the memory access of this type is aligned or if the target 1638 /// allows this specific unaligned access for the given MachineMemOperand. 1639 /// If the access is allowed, the optional final parameter returns if the 1640 /// access is also fast (as defined by the target). 1641 bool allowsMemoryAccessForAlignment(LLVMContext &Context, 1642 const DataLayout &DL, EVT VT, 1643 const MachineMemOperand &MMO, 1644 bool *Fast = nullptr) const; 1645 1646 /// Return true if the target supports a memory access of this type for the 1647 /// given address space and alignment. If the access is allowed, the optional 1648 /// final parameter returns if the access is also fast (as defined by the 1649 /// target). 1650 virtual bool 1651 allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, 1652 unsigned AddrSpace = 0, Align Alignment = Align(1), 1653 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 1654 bool *Fast = nullptr) const; 1655 1656 /// Return true if the target supports a memory access of this type for the 1657 /// given MachineMemOperand. If the access is allowed, the optional 1658 /// final parameter returns if the access is also fast (as defined by the 1659 /// target). 1660 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, 1661 const MachineMemOperand &MMO, 1662 bool *Fast = nullptr) const; 1663 1664 /// Returns the target specific optimal type for load and store operations as 1665 /// a result of memset, memcpy, and memmove lowering. 1666 /// It returns EVT::Other if the type should be determined using generic 1667 /// target-independent logic. 1668 virtual EVT getOptimalMemOpType(const MemOp & Op,const AttributeList &)1669 getOptimalMemOpType(const MemOp &Op, 1670 const AttributeList & /*FuncAttributes*/) const { 1671 return MVT::Other; 1672 } 1673 1674 /// LLT returning variant. 1675 virtual LLT getOptimalMemOpLLT(const MemOp & Op,const AttributeList &)1676 getOptimalMemOpLLT(const MemOp &Op, 1677 const AttributeList & /*FuncAttributes*/) const { 1678 return LLT(); 1679 } 1680 1681 /// Returns true if it's safe to use load / store of the specified type to 1682 /// expand memcpy / memset inline. 1683 /// 1684 /// This is mostly true for all types except for some special cases. For 1685 /// example, on X86 targets without SSE2 f64 load / store are done with fldl / 1686 /// fstpl which also does type conversion. Note the specified type doesn't 1687 /// have to be legal as the hook is used before type legalization. isSafeMemOpType(MVT)1688 virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; } 1689 1690 /// Return lower limit for number of blocks in a jump table. 1691 virtual unsigned getMinimumJumpTableEntries() const; 1692 1693 /// Return lower limit of the density in a jump table. 1694 unsigned getMinimumJumpTableDensity(bool OptForSize) const; 1695 1696 /// Return upper limit for number of entries in a jump table. 1697 /// Zero if no limit. 1698 unsigned getMaximumJumpTableSize() const; 1699 1700 virtual bool isJumpTableRelative() const; 1701 1702 /// Return true if a mulh[s|u] node for a specific type is cheaper than 1703 /// a multiply followed by a shift. This is false by default. isMulhCheaperThanMulShift(EVT Type)1704 virtual bool isMulhCheaperThanMulShift(EVT Type) const { return false; } 1705 1706 /// If a physical register, this specifies the register that 1707 /// llvm.savestack/llvm.restorestack should save and restore. getStackPointerRegisterToSaveRestore()1708 unsigned getStackPointerRegisterToSaveRestore() const { 1709 return StackPointerRegisterToSaveRestore; 1710 } 1711 1712 /// If a physical register, this returns the register that receives the 1713 /// exception address on entry to an EH pad. 1714 virtual Register getExceptionPointerRegister(const Constant * PersonalityFn)1715 getExceptionPointerRegister(const Constant *PersonalityFn) const { 1716 return Register(); 1717 } 1718 1719 /// If a physical register, this returns the register that receives the 1720 /// exception typeid on entry to a landing pad. 1721 virtual Register getExceptionSelectorRegister(const Constant * PersonalityFn)1722 getExceptionSelectorRegister(const Constant *PersonalityFn) const { 1723 return Register(); 1724 } 1725 needsFixedCatchObjects()1726 virtual bool needsFixedCatchObjects() const { 1727 report_fatal_error("Funclet EH is not implemented for this target"); 1728 } 1729 1730 /// Return the minimum stack alignment of an argument. getMinStackArgumentAlignment()1731 Align getMinStackArgumentAlignment() const { 1732 return MinStackArgumentAlignment; 1733 } 1734 1735 /// Return the minimum function alignment. getMinFunctionAlignment()1736 Align getMinFunctionAlignment() const { return MinFunctionAlignment; } 1737 1738 /// Return the preferred function alignment. getPrefFunctionAlignment()1739 Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; } 1740 1741 /// Return the preferred loop alignment. 1742 virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const { 1743 return PrefLoopAlignment; 1744 } 1745 1746 /// Should loops be aligned even when the function is marked OptSize (but not 1747 /// MinSize). alignLoopsWithOptSize()1748 virtual bool alignLoopsWithOptSize() const { 1749 return false; 1750 } 1751 1752 /// If the target has a standard location for the stack protector guard, 1753 /// returns the address of that location. Otherwise, returns nullptr. 1754 /// DEPRECATED: please override useLoadStackGuardNode and customize 1755 /// LOAD_STACK_GUARD, or customize \@llvm.stackguard(). 1756 virtual Value *getIRStackGuard(IRBuilder<> &IRB) const; 1757 1758 /// Inserts necessary declarations for SSP (stack protection) purpose. 1759 /// Should be used only when getIRStackGuard returns nullptr. 1760 virtual void insertSSPDeclarations(Module &M) const; 1761 1762 /// Return the variable that's previously inserted by insertSSPDeclarations, 1763 /// if any, otherwise return nullptr. Should be used only when 1764 /// getIRStackGuard returns nullptr. 1765 virtual Value *getSDagStackGuard(const Module &M) const; 1766 1767 /// If this function returns true, stack protection checks should XOR the 1768 /// frame pointer (or whichever pointer is used to address locals) into the 1769 /// stack guard value before checking it. getIRStackGuard must return nullptr 1770 /// if this returns true. useStackGuardXorFP()1771 virtual bool useStackGuardXorFP() const { return false; } 1772 1773 /// If the target has a standard stack protection check function that 1774 /// performs validation and error handling, returns the function. Otherwise, 1775 /// returns nullptr. Must be previously inserted by insertSSPDeclarations. 1776 /// Should be used only when getIRStackGuard returns nullptr. 1777 virtual Function *getSSPStackGuardCheck(const Module &M) const; 1778 1779 protected: 1780 Value *getDefaultSafeStackPointerLocation(IRBuilder<> &IRB, 1781 bool UseTLS) const; 1782 1783 public: 1784 /// Returns the target-specific address of the unsafe stack pointer. 1785 virtual Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const; 1786 1787 /// Returns the name of the symbol used to emit stack probes or the empty 1788 /// string if not applicable. hasStackProbeSymbol(MachineFunction & MF)1789 virtual bool hasStackProbeSymbol(MachineFunction &MF) const { return false; } 1790 hasInlineStackProbe(MachineFunction & MF)1791 virtual bool hasInlineStackProbe(MachineFunction &MF) const { return false; } 1792 getStackProbeSymbolName(MachineFunction & MF)1793 virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const { 1794 return ""; 1795 } 1796 1797 /// Returns true if a cast between SrcAS and DestAS is a noop. isNoopAddrSpaceCast(unsigned SrcAS,unsigned DestAS)1798 virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { 1799 return false; 1800 } 1801 1802 /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we 1803 /// are happy to sink it into basic blocks. A cast may be free, but not 1804 /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer. isFreeAddrSpaceCast(unsigned SrcAS,unsigned DestAS)1805 virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { 1806 return isNoopAddrSpaceCast(SrcAS, DestAS); 1807 } 1808 1809 /// Return true if the pointer arguments to CI should be aligned by aligning 1810 /// the object whose address is being passed. If so then MinSize is set to the 1811 /// minimum size the object must be to be aligned and PrefAlign is set to the 1812 /// preferred alignment. shouldAlignPointerArgs(CallInst *,unsigned &,unsigned &)1813 virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/, 1814 unsigned & /*PrefAlign*/) const { 1815 return false; 1816 } 1817 1818 //===--------------------------------------------------------------------===// 1819 /// \name Helpers for TargetTransformInfo implementations 1820 /// @{ 1821 1822 /// Get the ISD node that corresponds to the Instruction class opcode. 1823 int InstructionOpcodeToISD(unsigned Opcode) const; 1824 1825 /// Estimate the cost of type-legalization and the legalized type. 1826 std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL, 1827 Type *Ty) const; 1828 1829 /// @} 1830 1831 //===--------------------------------------------------------------------===// 1832 /// \name Helpers for atomic expansion. 1833 /// @{ 1834 1835 /// Returns the maximum atomic operation size (in bits) supported by 1836 /// the backend. Atomic operations greater than this size (as well 1837 /// as ones that are not naturally aligned), will be expanded by 1838 /// AtomicExpandPass into an __atomic_* library call. getMaxAtomicSizeInBitsSupported()1839 unsigned getMaxAtomicSizeInBitsSupported() const { 1840 return MaxAtomicSizeInBitsSupported; 1841 } 1842 1843 /// Returns the size of the smallest cmpxchg or ll/sc instruction 1844 /// the backend supports. Any smaller operations are widened in 1845 /// AtomicExpandPass. 1846 /// 1847 /// Note that *unlike* operations above the maximum size, atomic ops 1848 /// are still natively supported below the minimum; they just 1849 /// require a more complex expansion. getMinCmpXchgSizeInBits()1850 unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; } 1851 1852 /// Whether the target supports unaligned atomic operations. supportsUnalignedAtomics()1853 bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; } 1854 1855 /// Whether AtomicExpandPass should automatically insert fences and reduce 1856 /// ordering for this atomic. This should be true for most architectures with 1857 /// weak memory ordering. Defaults to false. shouldInsertFencesForAtomic(const Instruction * I)1858 virtual bool shouldInsertFencesForAtomic(const Instruction *I) const { 1859 return false; 1860 } 1861 1862 /// Whether the atomic operation \p AI with type \p ValueTy and alignment 1863 /// \p Alignment via \p PointerTy is natively supported or requires an 1864 /// __atomic_* libcall. 1865 virtual bool supportsAtomicOperation(const DataLayout &DL, 1866 const Instruction *AI, Type *ValueTy, 1867 Type *PointerTy, Align Alignment) const; 1868 1869 /// Perform a load-linked operation on Addr, returning a "Value *" with the 1870 /// corresponding pointee type. This may entail some non-trivial operations to 1871 /// truncate or reconstruct types that will be illegal in the backend. See 1872 /// ARMISelLowering for an example implementation. emitLoadLinked(IRBuilder<> & Builder,Value * Addr,AtomicOrdering Ord)1873 virtual Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, 1874 AtomicOrdering Ord) const { 1875 llvm_unreachable("Load linked unimplemented on this target"); 1876 } 1877 1878 /// Perform a store-conditional operation to Addr. Return the status of the 1879 /// store. This should be 0 if the store succeeded, non-zero otherwise. emitStoreConditional(IRBuilder<> & Builder,Value * Val,Value * Addr,AtomicOrdering Ord)1880 virtual Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, 1881 Value *Addr, AtomicOrdering Ord) const { 1882 llvm_unreachable("Store conditional unimplemented on this target"); 1883 } 1884 1885 /// Perform a masked atomicrmw using a target-specific intrinsic. This 1886 /// represents the core LL/SC loop which will be lowered at a late stage by 1887 /// the backend. emitMaskedAtomicRMWIntrinsic(IRBuilder<> & Builder,AtomicRMWInst * AI,Value * AlignedAddr,Value * Incr,Value * Mask,Value * ShiftAmt,AtomicOrdering Ord)1888 virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilder<> &Builder, 1889 AtomicRMWInst *AI, 1890 Value *AlignedAddr, Value *Incr, 1891 Value *Mask, Value *ShiftAmt, 1892 AtomicOrdering Ord) const { 1893 llvm_unreachable("Masked atomicrmw expansion unimplemented on this target"); 1894 } 1895 1896 /// Perform a masked cmpxchg using a target-specific intrinsic. This 1897 /// represents the core LL/SC loop which will be lowered at a late stage by 1898 /// the backend. emitMaskedAtomicCmpXchgIntrinsic(IRBuilder<> & Builder,AtomicCmpXchgInst * CI,Value * AlignedAddr,Value * CmpVal,Value * NewVal,Value * Mask,AtomicOrdering Ord)1899 virtual Value *emitMaskedAtomicCmpXchgIntrinsic( 1900 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 1901 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 1902 llvm_unreachable("Masked cmpxchg expansion unimplemented on this target"); 1903 } 1904 1905 /// Inserts in the IR a target-specific intrinsic specifying a fence. 1906 /// It is called by AtomicExpandPass before expanding an 1907 /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad 1908 /// if shouldInsertFencesForAtomic returns true. 1909 /// 1910 /// Inst is the original atomic instruction, prior to other expansions that 1911 /// may be performed. 1912 /// 1913 /// This function should either return a nullptr, or a pointer to an IR-level 1914 /// Instruction*. Even complex fence sequences can be represented by a 1915 /// single Instruction* through an intrinsic to be lowered later. 1916 /// Backends should override this method to produce target-specific intrinsic 1917 /// for their fences. 1918 /// FIXME: Please note that the default implementation here in terms of 1919 /// IR-level fences exists for historical/compatibility reasons and is 1920 /// *unsound* ! Fences cannot, in general, be used to restore sequential 1921 /// consistency. For example, consider the following example: 1922 /// atomic<int> x = y = 0; 1923 /// int r1, r2, r3, r4; 1924 /// Thread 0: 1925 /// x.store(1); 1926 /// Thread 1: 1927 /// y.store(1); 1928 /// Thread 2: 1929 /// r1 = x.load(); 1930 /// r2 = y.load(); 1931 /// Thread 3: 1932 /// r3 = y.load(); 1933 /// r4 = x.load(); 1934 /// r1 = r3 = 1 and r2 = r4 = 0 is impossible as long as the accesses are all 1935 /// seq_cst. But if they are lowered to monotonic accesses, no amount of 1936 /// IR-level fences can prevent it. 1937 /// @{ emitLeadingFence(IRBuilder<> & Builder,Instruction * Inst,AtomicOrdering Ord)1938 virtual Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, 1939 AtomicOrdering Ord) const { 1940 if (isReleaseOrStronger(Ord) && Inst->hasAtomicStore()) 1941 return Builder.CreateFence(Ord); 1942 else 1943 return nullptr; 1944 } 1945 emitTrailingFence(IRBuilder<> & Builder,Instruction * Inst,AtomicOrdering Ord)1946 virtual Instruction *emitTrailingFence(IRBuilder<> &Builder, 1947 Instruction *Inst, 1948 AtomicOrdering Ord) const { 1949 if (isAcquireOrStronger(Ord)) 1950 return Builder.CreateFence(Ord); 1951 else 1952 return nullptr; 1953 } 1954 /// @} 1955 1956 // Emits code that executes when the comparison result in the ll/sc 1957 // expansion of a cmpxchg instruction is such that the store-conditional will 1958 // not execute. This makes it possible to balance out the load-linked with 1959 // a dedicated instruction, if desired. 1960 // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would 1961 // be unnecessarily held, except if clrex, inserted by this hook, is executed. emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> & Builder)1962 virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {} 1963 1964 /// Returns true if the given (atomic) store should be expanded by the 1965 /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input. shouldExpandAtomicStoreInIR(StoreInst * SI)1966 virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const { 1967 return false; 1968 } 1969 1970 /// Returns true if arguments should be sign-extended in lib calls. shouldSignExtendTypeInLibCall(EVT Type,bool IsSigned)1971 virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 1972 return IsSigned; 1973 } 1974 1975 /// Returns true if arguments should be extended in lib calls. shouldExtendTypeInLibCall(EVT Type)1976 virtual bool shouldExtendTypeInLibCall(EVT Type) const { 1977 return true; 1978 } 1979 1980 /// Returns how the given (atomic) load should be expanded by the 1981 /// IR-level AtomicExpand pass. shouldExpandAtomicLoadInIR(LoadInst * LI)1982 virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { 1983 return AtomicExpansionKind::None; 1984 } 1985 1986 /// Returns how the given atomic cmpxchg should be expanded by the IR-level 1987 /// AtomicExpand pass. 1988 virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst * AI)1989 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { 1990 return AtomicExpansionKind::None; 1991 } 1992 1993 /// Return true if the backend can lower of pointer-type cmpxchg. 1994 /// Otherwise it will be converted to an integer-type cmpxchg in the IR 1995 /// TODO: remove this hook canLowerPointerTypeCmpXchg(const DataLayout & DL,AtomicCmpXchgInst * AI)1996 virtual bool canLowerPointerTypeCmpXchg(const DataLayout &DL, 1997 AtomicCmpXchgInst *AI) const { 1998 // Capability-type cmxchg always needs to use i8 addrspace(200)* instead of 1999 // converting arguments to integer types. 2000 return DL.isFatPointer(AI->getCompareOperand()->getType()); 2001 } 2002 2003 /// Returns how the IR-level AtomicExpand pass should expand the given 2004 /// AtomicRMW, if at all. Default is to never expand. shouldExpandAtomicRMWInIR(AtomicRMWInst * RMW)2005 virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { 2006 return RMW->isFloatingPointOperation() ? 2007 AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; 2008 } 2009 2010 /// On some platforms, an AtomicRMW that never actually modifies the value 2011 /// (such as fetch_add of 0) can be turned into a fence followed by an 2012 /// atomic load. This may sound useless, but it makes it possible for the 2013 /// processor to keep the cacheline shared, dramatically improving 2014 /// performance. And such idempotent RMWs are useful for implementing some 2015 /// kinds of locks, see for example (justification + benchmarks): 2016 /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf 2017 /// This method tries doing that transformation, returning the atomic load if 2018 /// it succeeds, and nullptr otherwise. 2019 /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo 2020 /// another round of expansion. 2021 virtual LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst * RMWI)2022 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const { 2023 return nullptr; 2024 } 2025 2026 /// Returns how the platform's atomic operations are extended (ZERO_EXTEND, 2027 /// SIGN_EXTEND, or ANY_EXTEND). getExtendForAtomicOps()2028 virtual ISD::NodeType getExtendForAtomicOps() const { 2029 return ISD::ZERO_EXTEND; 2030 } 2031 2032 /// Returns how the platform's atomic compare and swap expects its comparison 2033 /// value to be extended (ZERO_EXTEND, SIGN_EXTEND, or ANY_EXTEND). This is 2034 /// separate from getExtendForAtomicOps, which is concerned with the 2035 /// sign-extension of the instruction's output, whereas here we are concerned 2036 /// with the sign-extension of the input. For targets with compare-and-swap 2037 /// instructions (or sub-word comparisons in their LL/SC loop expansions), 2038 /// the input can be ANY_EXTEND, but the output will still have a specific 2039 /// extension. getExtendForAtomicCmpSwapArg()2040 virtual ISD::NodeType getExtendForAtomicCmpSwapArg() const { 2041 return ISD::ANY_EXTEND; 2042 } 2043 2044 /// @} 2045 2046 /// Returns true if we should normalize 2047 /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and 2048 /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely 2049 /// that it saves us from materializing N0 and N1 in an integer register. 2050 /// Targets that are able to perform and/or on flags should return false here. shouldNormalizeToSelectSequence(LLVMContext & Context,EVT VT)2051 virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, 2052 EVT VT) const { 2053 // If a target has multiple condition registers, then it likely has logical 2054 // operations on those registers. 2055 if (hasMultipleConditionRegisters()) 2056 return false; 2057 // Only do the transform if the value won't be split into multiple 2058 // registers. 2059 LegalizeTypeAction Action = getTypeAction(Context, VT); 2060 return Action != TypeExpandInteger && Action != TypeExpandFloat && 2061 Action != TypeSplitVector; 2062 } 2063 isProfitableToCombineMinNumMaxNum(EVT VT)2064 virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; } 2065 2066 /// Return true if a select of constants (select Cond, C1, C2) should be 2067 /// transformed into simple math ops with the condition value. For example: 2068 /// select Cond, C1, C1-1 --> add (zext Cond), C1-1 convertSelectOfConstantsToMath(EVT VT)2069 virtual bool convertSelectOfConstantsToMath(EVT VT) const { 2070 return false; 2071 } 2072 2073 /// Return true if it is profitable to transform an integer 2074 /// multiplication-by-constant into simpler operations like shifts and adds. 2075 /// This may be true if the target does not directly support the 2076 /// multiplication operation for the specified type or the sequence of simpler 2077 /// ops is faster than the multiply. decomposeMulByConstant(LLVMContext & Context,EVT VT,SDValue C)2078 virtual bool decomposeMulByConstant(LLVMContext &Context, 2079 EVT VT, SDValue C) const { 2080 return false; 2081 } 2082 2083 /// Return true if it is more correct/profitable to use strict FP_TO_INT 2084 /// conversion operations - canonicalizing the FP source value instead of 2085 /// converting all cases and then selecting based on value. 2086 /// This may be true if the target throws exceptions for out of bounds 2087 /// conversions or has fast FP CMOV. shouldUseStrictFP_TO_INT(EVT FpVT,EVT IntVT,bool IsSigned)2088 virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, 2089 bool IsSigned) const { 2090 return false; 2091 } 2092 2093 //===--------------------------------------------------------------------===// 2094 // TargetLowering Configuration Methods - These methods should be invoked by 2095 // the derived class constructor to configure this object for the target. 2096 // 2097 protected: 2098 /// Specify how the target extends the result of integer and floating point 2099 /// boolean values from i1 to a wider type. See getBooleanContents. setBooleanContents(BooleanContent Ty)2100 void setBooleanContents(BooleanContent Ty) { 2101 BooleanContents = Ty; 2102 BooleanFloatContents = Ty; 2103 } 2104 2105 /// Specify how the target extends the result of integer and floating point 2106 /// boolean values from i1 to a wider type. See getBooleanContents. setBooleanContents(BooleanContent IntTy,BooleanContent FloatTy)2107 void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) { 2108 BooleanContents = IntTy; 2109 BooleanFloatContents = FloatTy; 2110 } 2111 2112 /// Specify how the target extends the result of a vector boolean value from a 2113 /// vector of i1 to a wider type. See getBooleanContents. setBooleanVectorContents(BooleanContent Ty)2114 void setBooleanVectorContents(BooleanContent Ty) { 2115 BooleanVectorContents = Ty; 2116 } 2117 2118 /// Specify the target scheduling preference. setSchedulingPreference(Sched::Preference Pref)2119 void setSchedulingPreference(Sched::Preference Pref) { 2120 SchedPreferenceInfo = Pref; 2121 } 2122 2123 /// Indicate the minimum number of blocks to generate jump tables. 2124 void setMinimumJumpTableEntries(unsigned Val); 2125 2126 /// Indicate the maximum number of entries in jump tables. 2127 /// Set to zero to generate unlimited jump tables. 2128 void setMaximumJumpTableSize(unsigned); 2129 2130 /// If set to a physical register, this specifies the register that 2131 /// llvm.savestack/llvm.restorestack should save and restore. setStackPointerRegisterToSaveRestore(Register R)2132 void setStackPointerRegisterToSaveRestore(Register R) { 2133 StackPointerRegisterToSaveRestore = R; 2134 } 2135 2136 /// Tells the code generator that the target has multiple (allocatable) 2137 /// condition registers that can be used to store the results of comparisons 2138 /// for use by selects and conditional branches. With multiple condition 2139 /// registers, the code generator will not aggressively sink comparisons into 2140 /// the blocks of their users. 2141 void setHasMultipleConditionRegisters(bool hasManyRegs = true) { 2142 HasMultipleConditionRegisters = hasManyRegs; 2143 } 2144 2145 /// Tells the code generator that the target has BitExtract instructions. 2146 /// The code generator will aggressively sink "shift"s into the blocks of 2147 /// their users if the users will generate "and" instructions which can be 2148 /// combined with "shift" to BitExtract instructions. 2149 void setHasExtractBitsInsn(bool hasExtractInsn = true) { 2150 HasExtractBitsInsn = hasExtractInsn; 2151 } 2152 2153 /// Tells the code generator not to expand logic operations on comparison 2154 /// predicates into separate sequences that increase the amount of flow 2155 /// control. 2156 void setJumpIsExpensive(bool isExpensive = true); 2157 2158 /// Tells the code generator which bitwidths to bypass. addBypassSlowDiv(unsigned int SlowBitWidth,unsigned int FastBitWidth)2159 void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) { 2160 BypassSlowDivWidths[SlowBitWidth] = FastBitWidth; 2161 } 2162 2163 /// Add the specified register class as an available regclass for the 2164 /// specified value type. This indicates the selector can handle values of 2165 /// that class natively. addRegisterClass(MVT VT,const TargetRegisterClass * RC)2166 void addRegisterClass(MVT VT, const TargetRegisterClass *RC) { 2167 assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)); 2168 RegClassForVT[VT.SimpleTy] = RC; 2169 } 2170 2171 /// Return the largest legal super-reg register class of the register class 2172 /// for the specified type and its associated "cost". 2173 virtual std::pair<const TargetRegisterClass *, uint8_t> 2174 findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const; 2175 2176 /// Once all of the register classes are added, this allows us to compute 2177 /// derived properties we expose. 2178 void computeRegisterProperties(const TargetRegisterInfo *TRI); 2179 2180 /// Indicate that the specified operation does not work with the specified 2181 /// type and indicate what to do about it. Note that VT may refer to either 2182 /// the type of a result or that of an operand of Op. setOperationAction(unsigned Op,MVT VT,LegalizeAction Action)2183 void setOperationAction(unsigned Op, MVT VT, 2184 LegalizeAction Action) { 2185 assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"); 2186 OpActions[(unsigned)VT.SimpleTy][Op] = Action; 2187 } 2188 2189 /// Indicate that the specified load with extension does not work with the 2190 /// specified type and indicate what to do about it. setLoadExtAction(unsigned ExtType,MVT ValVT,MVT MemVT,LegalizeAction Action)2191 void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, 2192 LegalizeAction Action) { 2193 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && 2194 MemVT.isValid() && "Table isn't big enough!"); 2195 assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); 2196 unsigned Shift = 4 * ExtType; 2197 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift); 2198 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift; 2199 } 2200 2201 /// Indicate that the specified truncating store does not work with the 2202 /// specified type and indicate what to do about it. setTruncStoreAction(MVT ValVT,MVT MemVT,LegalizeAction Action)2203 void setTruncStoreAction(MVT ValVT, MVT MemVT, 2204 LegalizeAction Action) { 2205 assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"); 2206 TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action; 2207 } 2208 2209 /// Indicate that the specified indexed load does or does not work with the 2210 /// specified type and indicate what to do abort it. 2211 /// 2212 /// NOTE: All indexed mode loads are initialized to Expand in 2213 /// TargetLowering.cpp setIndexedLoadAction(unsigned IdxMode,MVT VT,LegalizeAction Action)2214 void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action) { 2215 setIndexedModeAction(IdxMode, VT, IMAB_Load, Action); 2216 } 2217 2218 /// Indicate that the specified indexed store does or does not work with the 2219 /// specified type and indicate what to do about it. 2220 /// 2221 /// NOTE: All indexed mode stores are initialized to Expand in 2222 /// TargetLowering.cpp setIndexedStoreAction(unsigned IdxMode,MVT VT,LegalizeAction Action)2223 void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action) { 2224 setIndexedModeAction(IdxMode, VT, IMAB_Store, Action); 2225 } 2226 2227 /// Indicate that the specified indexed masked load does or does not work with 2228 /// the specified type and indicate what to do about it. 2229 /// 2230 /// NOTE: All indexed mode masked loads are initialized to Expand in 2231 /// TargetLowering.cpp setIndexedMaskedLoadAction(unsigned IdxMode,MVT VT,LegalizeAction Action)2232 void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, 2233 LegalizeAction Action) { 2234 setIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad, Action); 2235 } 2236 2237 /// Indicate that the specified indexed masked store does or does not work 2238 /// with the specified type and indicate what to do about it. 2239 /// 2240 /// NOTE: All indexed mode masked stores are initialized to Expand in 2241 /// TargetLowering.cpp setIndexedMaskedStoreAction(unsigned IdxMode,MVT VT,LegalizeAction Action)2242 void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, 2243 LegalizeAction Action) { 2244 setIndexedModeAction(IdxMode, VT, IMAB_MaskedStore, Action); 2245 } 2246 2247 /// Indicate that the specified condition code is or isn't supported on the 2248 /// target and indicate what to do about it. setCondCodeAction(ISD::CondCode CC,MVT VT,LegalizeAction Action)2249 void setCondCodeAction(ISD::CondCode CC, MVT VT, 2250 LegalizeAction Action) { 2251 assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && 2252 "Table isn't big enough!"); 2253 assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); 2254 /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit 2255 /// value and the upper 29 bits index into the second dimension of the array 2256 /// to select what 32-bit value to use. 2257 uint32_t Shift = 4 * (VT.SimpleTy & 0x7); 2258 CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift); 2259 CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift; 2260 } 2261 2262 /// If Opc/OrigVT is specified as being promoted, the promotion code defaults 2263 /// to trying a larger integer/fp until it can find one that works. If that 2264 /// default is insufficient, this method can be used by the target to override 2265 /// the default. AddPromotedToType(unsigned Opc,MVT OrigVT,MVT DestVT)2266 void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { 2267 PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy; 2268 } 2269 2270 /// Convenience method to set an operation to Promote and specify the type 2271 /// in a single call. setOperationPromotedToType(unsigned Opc,MVT OrigVT,MVT DestVT)2272 void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { 2273 setOperationAction(Opc, OrigVT, Promote); 2274 AddPromotedToType(Opc, OrigVT, DestVT); 2275 } 2276 2277 /// Targets should invoke this method for each target independent node that 2278 /// they want to provide a custom DAG combiner for by implementing the 2279 /// PerformDAGCombine virtual method. setTargetDAGCombine(ISD::NodeType NT)2280 void setTargetDAGCombine(ISD::NodeType NT) { 2281 assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)); 2282 TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7); 2283 } 2284 2285 /// Set the target's minimum function alignment. setMinFunctionAlignment(Align Alignment)2286 void setMinFunctionAlignment(Align Alignment) { 2287 MinFunctionAlignment = Alignment; 2288 } 2289 2290 /// Set the target's preferred function alignment. This should be set if 2291 /// there is a performance benefit to higher-than-minimum alignment setPrefFunctionAlignment(Align Alignment)2292 void setPrefFunctionAlignment(Align Alignment) { 2293 PrefFunctionAlignment = Alignment; 2294 } 2295 2296 /// Set the target's preferred loop alignment. Default alignment is one, it 2297 /// means the target does not care about loop alignment. The target may also 2298 /// override getPrefLoopAlignment to provide per-loop values. setPrefLoopAlignment(Align Alignment)2299 void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; } 2300 2301 /// Set the minimum stack alignment of an argument. setMinStackArgumentAlignment(Align Alignment)2302 void setMinStackArgumentAlignment(Align Alignment) { 2303 MinStackArgumentAlignment = Alignment; 2304 } 2305 2306 /// Set the maximum atomic operation size supported by the 2307 /// backend. Atomic operations greater than this size (as well as 2308 /// ones that are not naturally aligned), will be expanded by 2309 /// AtomicExpandPass into an __atomic_* library call. setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)2310 void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) { 2311 MaxAtomicSizeInBitsSupported = SizeInBits; 2312 } 2313 2314 /// Sets the minimum cmpxchg or ll/sc size supported by the backend. setMinCmpXchgSizeInBits(unsigned SizeInBits)2315 void setMinCmpXchgSizeInBits(unsigned SizeInBits) { 2316 MinCmpXchgSizeInBits = SizeInBits; 2317 } 2318 2319 /// Sets whether unaligned atomic operations are supported. setSupportsUnalignedAtomics(bool UnalignedSupported)2320 void setSupportsUnalignedAtomics(bool UnalignedSupported) { 2321 SupportsUnalignedAtomics = UnalignedSupported; 2322 } 2323 2324 public: 2325 //===--------------------------------------------------------------------===// 2326 // Addressing mode description hooks (used by LSR etc). 2327 // 2328 2329 /// CodeGenPrepare sinks address calculations into the same BB as Load/Store 2330 /// instructions reading the address. This allows as much computation as 2331 /// possible to be done in the address mode for that operand. This hook lets 2332 /// targets also pass back when this should be done on intrinsics which 2333 /// load/store. getAddrModeArguments(IntrinsicInst *,SmallVectorImpl<Value * > &,Type * &)2334 virtual bool getAddrModeArguments(IntrinsicInst * /*I*/, 2335 SmallVectorImpl<Value*> &/*Ops*/, 2336 Type *&/*AccessTy*/) const { 2337 return false; 2338 } 2339 2340 /// This represents an addressing mode of: 2341 /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg 2342 /// If BaseGV is null, there is no BaseGV. 2343 /// If BaseOffs is zero, there is no base offset. 2344 /// If HasBaseReg is false, there is no base register. 2345 /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with 2346 /// no scale. 2347 struct AddrMode { 2348 GlobalValue *BaseGV = nullptr; 2349 int64_t BaseOffs = 0; 2350 bool HasBaseReg = false; 2351 int64_t Scale = 0; 2352 AddrMode() = default; 2353 }; 2354 2355 /// Return true if the addressing mode represented by AM is legal for this 2356 /// target, for a load/store of the specified type. 2357 /// 2358 /// The type may be VoidTy, in which case only return true if the addressing 2359 /// mode is legal for a load/store of any legal type. TODO: Handle 2360 /// pre/postinc as well. 2361 /// 2362 /// If the address space cannot be determined, it will be -1. 2363 /// 2364 /// TODO: Remove default argument 2365 virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, 2366 Type *Ty, unsigned AddrSpace, 2367 Instruction *I = nullptr) const; 2368 2369 /// Return the cost of the scaling factor used in the addressing mode 2370 /// represented by AM for this target, for a load/store of the specified type. 2371 /// 2372 /// If the AM is supported, the return value must be >= 0. 2373 /// If the AM is not supported, it returns a negative value. 2374 /// TODO: Handle pre/postinc as well. 2375 /// TODO: Remove default argument 2376 virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, 2377 Type *Ty, unsigned AS = 0) const { 2378 // Default: assume that any scaling factor used in a legal AM is free. 2379 if (isLegalAddressingMode(DL, AM, Ty, AS)) 2380 return 0; 2381 return -1; 2382 } 2383 2384 /// Return true if the specified immediate is legal icmp immediate, that is 2385 /// the target has icmp instructions which can compare a register against the 2386 /// immediate without having to materialize the immediate into a register. isLegalICmpImmediate(int64_t)2387 virtual bool isLegalICmpImmediate(int64_t) const { 2388 return true; 2389 } 2390 2391 /// Return true if the specified immediate is legal add immediate, that is the 2392 /// target has add instructions which can add a register with the immediate 2393 /// without having to materialize the immediate into a register. isLegalAddImmediate(int64_t)2394 virtual bool isLegalAddImmediate(int64_t) const { 2395 return true; 2396 } 2397 2398 /// Return true if the specified immediate is legal for the value input of a 2399 /// store instruction. isLegalStoreImmediate(int64_t Value)2400 virtual bool isLegalStoreImmediate(int64_t Value) const { 2401 // Default implementation assumes that at least 0 works since it is likely 2402 // that a zero register exists or a zero immediate is allowed. 2403 return Value == 0; 2404 } 2405 2406 /// Return true if it's significantly cheaper to shift a vector by a uniform 2407 /// scalar than by an amount which will vary across each lane. On x86 before 2408 /// AVX2 for example, there is a "psllw" instruction for the former case, but 2409 /// no simple instruction for a general "a << b" operation on vectors. 2410 /// This should also apply to lowering for vector funnel shifts (rotates). isVectorShiftByScalarCheap(Type * Ty)2411 virtual bool isVectorShiftByScalarCheap(Type *Ty) const { 2412 return false; 2413 } 2414 2415 /// Given a shuffle vector SVI representing a vector splat, return a new 2416 /// scalar type of size equal to SVI's scalar type if the new type is more 2417 /// profitable. Returns nullptr otherwise. For example under MVE float splats 2418 /// are converted to integer to prevent the need to move from SPR to GPR 2419 /// registers. shouldConvertSplatType(ShuffleVectorInst * SVI)2420 virtual Type* shouldConvertSplatType(ShuffleVectorInst* SVI) const { 2421 return nullptr; 2422 } 2423 2424 /// Given a set in interconnected phis of type 'From' that are loaded/stored 2425 /// or bitcast to type 'To', return true if the set should be converted to 2426 /// 'To'. shouldConvertPhiType(Type * From,Type * To)2427 virtual bool shouldConvertPhiType(Type *From, Type *To) const { 2428 return (From->isIntegerTy() || From->isFloatingPointTy()) && 2429 (To->isIntegerTy() || To->isFloatingPointTy()); 2430 } 2431 2432 /// Returns true if the opcode is a commutative binary operation. isCommutativeBinOp(unsigned Opcode)2433 virtual bool isCommutativeBinOp(unsigned Opcode) const { 2434 // FIXME: This should get its info from the td file. 2435 switch (Opcode) { 2436 case ISD::ADD: 2437 case ISD::SMIN: 2438 case ISD::SMAX: 2439 case ISD::UMIN: 2440 case ISD::UMAX: 2441 case ISD::MUL: 2442 case ISD::MULHU: 2443 case ISD::MULHS: 2444 case ISD::SMUL_LOHI: 2445 case ISD::UMUL_LOHI: 2446 case ISD::FADD: 2447 case ISD::FMUL: 2448 case ISD::AND: 2449 case ISD::OR: 2450 case ISD::XOR: 2451 case ISD::SADDO: 2452 case ISD::UADDO: 2453 case ISD::ADDC: 2454 case ISD::ADDE: 2455 case ISD::SADDSAT: 2456 case ISD::UADDSAT: 2457 case ISD::FMINNUM: 2458 case ISD::FMAXNUM: 2459 case ISD::FMINNUM_IEEE: 2460 case ISD::FMAXNUM_IEEE: 2461 case ISD::FMINIMUM: 2462 case ISD::FMAXIMUM: 2463 return true; 2464 default: return false; 2465 } 2466 } 2467 2468 /// Return true if the node is a math/logic binary operator. isBinOp(unsigned Opcode)2469 virtual bool isBinOp(unsigned Opcode) const { 2470 // A commutative binop must be a binop. 2471 if (isCommutativeBinOp(Opcode)) 2472 return true; 2473 // These are non-commutative binops. 2474 switch (Opcode) { 2475 case ISD::SUB: 2476 case ISD::SHL: 2477 case ISD::SRL: 2478 case ISD::SRA: 2479 case ISD::SDIV: 2480 case ISD::UDIV: 2481 case ISD::SREM: 2482 case ISD::UREM: 2483 case ISD::FSUB: 2484 case ISD::FDIV: 2485 case ISD::FREM: 2486 return true; 2487 default: 2488 return false; 2489 } 2490 } 2491 2492 /// Return true if it's free to truncate a value of type FromTy to type 2493 /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 2494 /// by referencing its sub-register AX. 2495 /// Targets must return false when FromTy <= ToTy. isTruncateFree(Type * FromTy,Type * ToTy)2496 virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const { 2497 return false; 2498 } 2499 2500 /// Return true if a truncation from FromTy to ToTy is permitted when deciding 2501 /// whether a call is in tail position. Typically this means that both results 2502 /// would be assigned to the same register or stack slot, but it could mean 2503 /// the target performs adequate checks of its own before proceeding with the 2504 /// tail call. Targets must return false when FromTy <= ToTy. allowTruncateForTailCall(Type * FromTy,Type * ToTy)2505 virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const { 2506 return false; 2507 } 2508 isTruncateFree(EVT FromVT,EVT ToVT)2509 virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { 2510 return false; 2511 } 2512 isProfitableToHoist(Instruction * I)2513 virtual bool isProfitableToHoist(Instruction *I) const { return true; } 2514 2515 /// Return true if the extension represented by \p I is free. 2516 /// Unlikely the is[Z|FP]ExtFree family which is based on types, 2517 /// this method can use the context provided by \p I to decide 2518 /// whether or not \p I is free. 2519 /// This method extends the behavior of the is[Z|FP]ExtFree family. 2520 /// In other words, if is[Z|FP]Free returns true, then this method 2521 /// returns true as well. The converse is not true. 2522 /// The target can perform the adequate checks by overriding isExtFreeImpl. 2523 /// \pre \p I must be a sign, zero, or fp extension. isExtFree(const Instruction * I)2524 bool isExtFree(const Instruction *I) const { 2525 switch (I->getOpcode()) { 2526 case Instruction::FPExt: 2527 if (isFPExtFree(EVT::getEVT(I->getType()), 2528 EVT::getEVT(I->getOperand(0)->getType()))) 2529 return true; 2530 break; 2531 case Instruction::ZExt: 2532 if (isZExtFree(I->getOperand(0)->getType(), I->getType())) 2533 return true; 2534 break; 2535 case Instruction::SExt: 2536 break; 2537 default: 2538 llvm_unreachable("Instruction is not an extension"); 2539 } 2540 return isExtFreeImpl(I); 2541 } 2542 2543 /// Return true if \p Load and \p Ext can form an ExtLoad. 2544 /// For example, in AArch64 2545 /// %L = load i8, i8* %ptr 2546 /// %E = zext i8 %L to i32 2547 /// can be lowered into one load instruction 2548 /// ldrb w0, [x0] isExtLoad(const LoadInst * Load,const Instruction * Ext,const DataLayout & DL)2549 bool isExtLoad(const LoadInst *Load, const Instruction *Ext, 2550 const DataLayout &DL) const { 2551 EVT VT = getValueType(DL, Ext->getType()); 2552 EVT LoadVT = getValueType(DL, Load->getType()); 2553 2554 // If the load has other users and the truncate is not free, the ext 2555 // probably isn't free. 2556 if (!Load->hasOneUse() && (isTypeLegal(LoadVT) || !isTypeLegal(VT)) && 2557 !isTruncateFree(Ext->getType(), Load->getType())) 2558 return false; 2559 2560 // Check whether the target supports casts folded into loads. 2561 unsigned LType; 2562 if (isa<ZExtInst>(Ext)) 2563 LType = ISD::ZEXTLOAD; 2564 else { 2565 assert(isa<SExtInst>(Ext) && "Unexpected ext type!"); 2566 LType = ISD::SEXTLOAD; 2567 } 2568 2569 return isLoadExtLegal(LType, VT, LoadVT); 2570 } 2571 2572 /// Return true if any actual instruction that defines a value of type FromTy 2573 /// implicitly zero-extends the value to ToTy in the result register. 2574 /// 2575 /// The function should return true when it is likely that the truncate can 2576 /// be freely folded with an instruction defining a value of FromTy. If 2577 /// the defining instruction is unknown (because you're looking at a 2578 /// function argument, PHI, etc.) then the target may require an 2579 /// explicit truncate, which is not necessarily free, but this function 2580 /// does not deal with those cases. 2581 /// Targets must return false when FromTy >= ToTy. isZExtFree(Type * FromTy,Type * ToTy)2582 virtual bool isZExtFree(Type *FromTy, Type *ToTy) const { 2583 return false; 2584 } 2585 isZExtFree(EVT FromTy,EVT ToTy)2586 virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { 2587 return false; 2588 } 2589 2590 /// Return true if sign-extension from FromTy to ToTy is cheaper than 2591 /// zero-extension. isSExtCheaperThanZExt(EVT FromTy,EVT ToTy)2592 virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const { 2593 return false; 2594 } 2595 2596 /// Return true if sinking I's operands to the same basic block as I is 2597 /// profitable, e.g. because the operands can be folded into a target 2598 /// instruction during instruction selection. After calling the function 2599 /// \p Ops contains the Uses to sink ordered by dominance (dominating users 2600 /// come first). shouldSinkOperands(Instruction * I,SmallVectorImpl<Use * > & Ops)2601 virtual bool shouldSinkOperands(Instruction *I, 2602 SmallVectorImpl<Use *> &Ops) const { 2603 return false; 2604 } 2605 2606 /// Return true if the target supplies and combines to a paired load 2607 /// two loaded values of type LoadedType next to each other in memory. 2608 /// RequiredAlignment gives the minimal alignment constraints that must be met 2609 /// to be able to select this paired load. 2610 /// 2611 /// This information is *not* used to generate actual paired loads, but it is 2612 /// used to generate a sequence of loads that is easier to combine into a 2613 /// paired load. 2614 /// For instance, something like this: 2615 /// a = load i64* addr 2616 /// b = trunc i64 a to i32 2617 /// c = lshr i64 a, 32 2618 /// d = trunc i64 c to i32 2619 /// will be optimized into: 2620 /// b = load i32* addr1 2621 /// d = load i32* addr2 2622 /// Where addr1 = addr2 +/- sizeof(i32). 2623 /// 2624 /// In other words, unless the target performs a post-isel load combining, 2625 /// this information should not be provided because it will generate more 2626 /// loads. hasPairedLoad(EVT,Align &)2627 virtual bool hasPairedLoad(EVT /*LoadedType*/, 2628 Align & /*RequiredAlignment*/) const { 2629 return false; 2630 } 2631 2632 /// Return true if the target has a vector blend instruction. hasVectorBlend()2633 virtual bool hasVectorBlend() const { return false; } 2634 2635 /// Get the maximum supported factor for interleaved memory accesses. 2636 /// Default to be the minimum interleave factor: 2. getMaxSupportedInterleaveFactor()2637 virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; } 2638 2639 /// Lower an interleaved load to target specific intrinsics. Return 2640 /// true on success. 2641 /// 2642 /// \p LI is the vector load instruction. 2643 /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector. 2644 /// \p Indices is the corresponding indices for each shufflevector. 2645 /// \p Factor is the interleave factor. lowerInterleavedLoad(LoadInst * LI,ArrayRef<ShuffleVectorInst * > Shuffles,ArrayRef<unsigned> Indices,unsigned Factor)2646 virtual bool lowerInterleavedLoad(LoadInst *LI, 2647 ArrayRef<ShuffleVectorInst *> Shuffles, 2648 ArrayRef<unsigned> Indices, 2649 unsigned Factor) const { 2650 return false; 2651 } 2652 2653 /// Lower an interleaved store to target specific intrinsics. Return 2654 /// true on success. 2655 /// 2656 /// \p SI is the vector store instruction. 2657 /// \p SVI is the shufflevector to RE-interleave the stored vector. 2658 /// \p Factor is the interleave factor. lowerInterleavedStore(StoreInst * SI,ShuffleVectorInst * SVI,unsigned Factor)2659 virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 2660 unsigned Factor) const { 2661 return false; 2662 } 2663 2664 /// Return true if zero-extending the specific node Val to type VT2 is free 2665 /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or 2666 /// because it's folded such as X86 zero-extending loads). isZExtFree(SDValue Val,EVT VT2)2667 virtual bool isZExtFree(SDValue Val, EVT VT2) const { 2668 return isZExtFree(Val.getValueType(), VT2); 2669 } 2670 2671 /// Return true if an fpext operation is free (for instance, because 2672 /// single-precision floating-point numbers are implicitly extended to 2673 /// double-precision). isFPExtFree(EVT DestVT,EVT SrcVT)2674 virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const { 2675 assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && 2676 "invalid fpext types"); 2677 return false; 2678 } 2679 2680 /// Return true if an fpext operation input to an \p Opcode operation is free 2681 /// (for instance, because half-precision floating-point numbers are 2682 /// implicitly extended to float-precision) for an FMA instruction. isFPExtFoldable(const SelectionDAG & DAG,unsigned Opcode,EVT DestVT,EVT SrcVT)2683 virtual bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, 2684 EVT DestVT, EVT SrcVT) const { 2685 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && 2686 "invalid fpext types"); 2687 return isFPExtFree(DestVT, SrcVT); 2688 } 2689 2690 /// Return true if folding a vector load into ExtVal (a sign, zero, or any 2691 /// extend node) is profitable. isVectorLoadExtDesirable(SDValue ExtVal)2692 virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; } 2693 2694 /// Return true if an fneg operation is free to the point where it is never 2695 /// worthwhile to replace it with a bitwise operation. isFNegFree(EVT VT)2696 virtual bool isFNegFree(EVT VT) const { 2697 assert(VT.isFloatingPoint()); 2698 return false; 2699 } 2700 2701 /// Return true if an fabs operation is free to the point where it is never 2702 /// worthwhile to replace it with a bitwise operation. isFAbsFree(EVT VT)2703 virtual bool isFAbsFree(EVT VT) const { 2704 assert(VT.isFloatingPoint()); 2705 return false; 2706 } 2707 2708 /// Return true if an FMA operation is faster than a pair of fmul and fadd 2709 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method 2710 /// returns true, otherwise fmuladd is expanded to fmul + fadd. 2711 /// 2712 /// NOTE: This may be called before legalization on types for which FMAs are 2713 /// not legal, but should return true if those types will eventually legalize 2714 /// to types that support FMAs. After legalization, it will only be called on 2715 /// types that support FMAs (via Legal or Custom actions) isFMAFasterThanFMulAndFAdd(const MachineFunction & MF,EVT)2716 virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 2717 EVT) const { 2718 return false; 2719 } 2720 2721 /// IR version isFMAFasterThanFMulAndFAdd(const Function & F,Type *)2722 virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const { 2723 return false; 2724 } 2725 2726 /// Returns true if be combined with to form an ISD::FMAD. \p N may be an 2727 /// ISD::FADD, ISD::FSUB, or an ISD::FMUL which will be distributed into an 2728 /// fadd/fsub. isFMADLegal(const SelectionDAG & DAG,const SDNode * N)2729 virtual bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const { 2730 assert((N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB || 2731 N->getOpcode() == ISD::FMUL) && 2732 "unexpected node in FMAD forming combine"); 2733 return isOperationLegal(ISD::FMAD, N->getValueType(0)); 2734 } 2735 2736 /// Return true if it's profitable to narrow operations of type VT1 to 2737 /// VT2. e.g. on x86, it's profitable to narrow from i32 to i8 but not from 2738 /// i32 to i16. isNarrowingProfitable(EVT,EVT)2739 virtual bool isNarrowingProfitable(EVT /*VT1*/, EVT /*VT2*/) const { 2740 return false; 2741 } 2742 2743 /// Return true if it is beneficial to convert a load of a constant to 2744 /// just the constant itself. 2745 /// On some targets it might be more efficient to use a combination of 2746 /// arithmetic instructions to materialize the constant instead of loading it 2747 /// from a constant pool. shouldConvertConstantLoadToIntImm(const APInt & Imm,Type * Ty)2748 virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 2749 Type *Ty) const { 2750 return false; 2751 } 2752 2753 /// Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type 2754 /// from this source type with this index. This is needed because 2755 /// EXTRACT_SUBVECTOR usually has custom lowering that depends on the index of 2756 /// the first element, and only the target knows which lowering is cheap. isExtractSubvectorCheap(EVT ResVT,EVT SrcVT,unsigned Index)2757 virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 2758 unsigned Index) const { 2759 return false; 2760 } 2761 2762 /// Try to convert an extract element of a vector binary operation into an 2763 /// extract element followed by a scalar operation. shouldScalarizeBinop(SDValue VecOp)2764 virtual bool shouldScalarizeBinop(SDValue VecOp) const { 2765 return false; 2766 } 2767 2768 /// Return true if extraction of a scalar element from the given vector type 2769 /// at the given index is cheap. For example, if scalar operations occur on 2770 /// the same register file as vector operations, then an extract element may 2771 /// be a sub-register rename rather than an actual instruction. isExtractVecEltCheap(EVT VT,unsigned Index)2772 virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const { 2773 return false; 2774 } 2775 2776 /// Try to convert math with an overflow comparison into the corresponding DAG 2777 /// node operation. Targets may want to override this independently of whether 2778 /// the operation is legal/custom for the given type because it may obscure 2779 /// matching of other patterns. shouldFormOverflowOp(unsigned Opcode,EVT VT,bool MathUsed)2780 virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 2781 bool MathUsed) const { 2782 // TODO: The default logic is inherited from code in CodeGenPrepare. 2783 // The opcode should not make a difference by default? 2784 if (Opcode != ISD::UADDO) 2785 return false; 2786 2787 // Allow the transform as long as we have an integer type that is not 2788 // obviously illegal and unsupported and if the math result is used 2789 // besides the overflow check. On some targets (e.g. SPARC), it is 2790 // not profitable to form on overflow op if the math result has no 2791 // concrete users. 2792 if (VT.isVector()) 2793 return false; 2794 return MathUsed && (VT.isSimple() || !isOperationExpand(Opcode, VT)); 2795 } 2796 2797 // Return true if it is profitable to use a scalar input to a BUILD_VECTOR 2798 // even if the vector itself has multiple uses. aggressivelyPreferBuildVectorSources(EVT VecVT)2799 virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const { 2800 return false; 2801 } 2802 2803 // Return true if CodeGenPrepare should consider splitting large offset of a 2804 // GEP to make the GEP fit into the addressing mode and can be sunk into the 2805 // same blocks of its users. shouldConsiderGEPOffsetSplit()2806 virtual bool shouldConsiderGEPOffsetSplit() const { return false; } 2807 2808 /// Return true if creating a shift of the type by the given 2809 /// amount is not profitable. shouldAvoidTransformToShift(EVT VT,unsigned Amount)2810 virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const { 2811 return false; 2812 } 2813 2814 // Return true if the target has a capability set address instruction. hasCapabilitySetAddress()2815 virtual bool hasCapabilitySetAddress() const { return false; } cheriCapabilityType()2816 MVT cheriCapabilityType() const { return CapType; } cheriCapabilityTypeHasPreciseBounds()2817 bool cheriCapabilityTypeHasPreciseBounds() const { 2818 return CapTypeHasPreciseBounds; 2819 } supportsUnalignedCapabilityMemOps()2820 bool supportsUnalignedCapabilityMemOps() const { 2821 return SupportsUnalignedCapabilityMemOps; 2822 } getTailPaddingForPreciseBounds(uint64_t Size)2823 virtual TailPaddingAmount getTailPaddingForPreciseBounds(uint64_t Size) const { 2824 return TailPaddingAmount::None; 2825 } getAlignmentForPreciseBounds(uint64_t Size)2826 virtual Align getAlignmentForPreciseBounds(uint64_t Size) const { 2827 return Align(); 2828 } supportsAtomicCapabilityOperations()2829 bool supportsAtomicCapabilityOperations() const { 2830 return SupportsAtomicCapabilityOperations; 2831 } getNullCapabilityRegister()2832 Register getNullCapabilityRegister() const { 2833 return NullCapabilityRegister; 2834 } 2835 2836 //===--------------------------------------------------------------------===// 2837 // Runtime Library hooks 2838 // 2839 2840 /// Rename the default libcall routine name for the specified libcall. setLibcallName(RTLIB::Libcall Call,const char * Name)2841 void setLibcallName(RTLIB::Libcall Call, const char *Name) { 2842 LibcallRoutineNames[Call] = Name; 2843 } 2844 2845 /// Get the libcall routine name for the specified libcall. getLibcallName(RTLIB::Libcall Call)2846 const char *getLibcallName(RTLIB::Libcall Call) const { 2847 return LibcallRoutineNames[Call]; 2848 } 2849 2850 /// Override the default CondCode to be used to test the result of the 2851 /// comparison libcall against zero. setCmpLibcallCC(RTLIB::Libcall Call,ISD::CondCode CC)2852 void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) { 2853 CmpLibcallCCs[Call] = CC; 2854 } 2855 2856 /// Get the CondCode that's to be used to test the result of the comparison 2857 /// libcall against zero. getCmpLibcallCC(RTLIB::Libcall Call)2858 ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const { 2859 return CmpLibcallCCs[Call]; 2860 } 2861 2862 /// Set the CallingConv that should be used for the specified libcall. setLibcallCallingConv(RTLIB::Libcall Call,CallingConv::ID CC)2863 void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) { 2864 LibcallCallingConvs[Call] = CC; 2865 } 2866 2867 /// Get the CallingConv that should be used for the specified libcall. getLibcallCallingConv(RTLIB::Libcall Call)2868 CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const { 2869 return LibcallCallingConvs[Call]; 2870 } 2871 2872 /// Execute target specific actions to finalize target lowering. 2873 /// This is used to set extra flags in MachineFrameInformation and freezing 2874 /// the set of reserved registers. 2875 /// The default implementation just freezes the set of reserved registers. 2876 virtual void finalizeLowering(MachineFunction &MF) const; 2877 2878 //===----------------------------------------------------------------------===// 2879 // GlobalISel Hooks 2880 //===----------------------------------------------------------------------===// 2881 /// Check whether or not \p MI needs to be moved close to its uses. 2882 virtual bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const; 2883 2884 2885 private: 2886 const TargetMachine &TM; 2887 2888 /// Tells the code generator that the target has multiple (allocatable) 2889 /// condition registers that can be used to store the results of comparisons 2890 /// for use by selects and conditional branches. With multiple condition 2891 /// registers, the code generator will not aggressively sink comparisons into 2892 /// the blocks of their users. 2893 bool HasMultipleConditionRegisters; 2894 2895 /// Tells the code generator that the target has BitExtract instructions. 2896 /// The code generator will aggressively sink "shift"s into the blocks of 2897 /// their users if the users will generate "and" instructions which can be 2898 /// combined with "shift" to BitExtract instructions. 2899 bool HasExtractBitsInsn; 2900 2901 /// Tells the code generator to bypass slow divide or remainder 2902 /// instructions. For example, BypassSlowDivWidths[32,8] tells the code 2903 /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer 2904 /// div/rem when the operands are positive and less than 256. 2905 DenseMap <unsigned int, unsigned int> BypassSlowDivWidths; 2906 2907 /// Tells the code generator that it shouldn't generate extra flow control 2908 /// instructions and should attempt to combine flow control instructions via 2909 /// predication. 2910 bool JumpIsExpensive; 2911 2912 /// Information about the contents of the high-bits in boolean values held in 2913 /// a type wider than i1. See getBooleanContents. 2914 BooleanContent BooleanContents; 2915 2916 /// Information about the contents of the high-bits in boolean values held in 2917 /// a type wider than i1. See getBooleanContents. 2918 BooleanContent BooleanFloatContents; 2919 2920 /// Information about the contents of the high-bits in boolean vector values 2921 /// when the element type is wider than i1. See getBooleanContents. 2922 BooleanContent BooleanVectorContents; 2923 2924 /// The target scheduling preference: shortest possible total cycles or lowest 2925 /// register usage. 2926 Sched::Preference SchedPreferenceInfo; 2927 2928 /// The minimum alignment that any argument on the stack needs to have. 2929 Align MinStackArgumentAlignment; 2930 2931 /// The minimum function alignment (used when optimizing for size, and to 2932 /// prevent explicitly provided alignment from leading to incorrect code). 2933 Align MinFunctionAlignment; 2934 2935 /// The preferred function alignment (used when alignment unspecified and 2936 /// optimizing for speed). 2937 Align PrefFunctionAlignment; 2938 2939 /// The preferred loop alignment (in log2 bot in bytes). 2940 Align PrefLoopAlignment; 2941 2942 /// Size in bits of the maximum atomics size the backend supports. 2943 /// Accesses larger than this will be expanded by AtomicExpandPass. 2944 unsigned MaxAtomicSizeInBitsSupported; 2945 2946 /// Size in bits of the minimum cmpxchg or ll/sc operation the 2947 /// backend supports. 2948 unsigned MinCmpXchgSizeInBits; 2949 2950 /// This indicates if the target supports unaligned atomic operations. 2951 bool SupportsUnalignedAtomics; 2952 2953 /// If set to a physical register, this specifies the register that 2954 /// llvm.savestack/llvm.restorestack should save and restore. 2955 Register StackPointerRegisterToSaveRestore; 2956 2957 /// This indicates the default register class to use for each ValueType the 2958 /// target supports natively. 2959 const TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE]; 2960 uint16_t NumRegistersForVT[MVT::LAST_VALUETYPE]; 2961 MVT RegisterTypeForVT[MVT::LAST_VALUETYPE]; 2962 2963 /// This indicates the "representative" register class to use for each 2964 /// ValueType the target supports natively. This information is used by the 2965 /// scheduler to track register pressure. By default, the representative 2966 /// register class is the largest legal super-reg register class of the 2967 /// register class of the specified type. e.g. On x86, i8, i16, and i32's 2968 /// representative class would be GR32. 2969 const TargetRegisterClass *RepRegClassForVT[MVT::LAST_VALUETYPE]; 2970 2971 /// This indicates the "cost" of the "representative" register class for each 2972 /// ValueType. The cost is used by the scheduler to approximate register 2973 /// pressure. 2974 uint8_t RepRegClassCostForVT[MVT::LAST_VALUETYPE]; 2975 2976 /// For any value types we are promoting or expanding, this contains the value 2977 /// type that we are changing to. For Expanded types, this contains one step 2978 /// of the expand (e.g. i64 -> i32), even if there are multiple steps required 2979 /// (e.g. i64 -> i16). For types natively supported by the system, this holds 2980 /// the same type (e.g. i32 -> i32). 2981 MVT TransformToType[MVT::LAST_VALUETYPE]; 2982 2983 /// For each operation and each value type, keep a LegalizeAction that 2984 /// indicates how instruction selection should deal with the operation. Most 2985 /// operations are Legal (aka, supported natively by the target), but 2986 /// operations that are not should be described. Note that operations on 2987 /// non-legal value types are not described here. 2988 LegalizeAction OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END]; 2989 2990 /// For each load extension type and each value type, keep a LegalizeAction 2991 /// that indicates how instruction selection should deal with a load of a 2992 /// specific value type and extension type. Uses 4-bits to store the action 2993 /// for each of the 4 load ext types. 2994 uint16_t LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; 2995 2996 /// For each value type pair keep a LegalizeAction that indicates whether a 2997 /// truncating store of a specific value type and truncating type is legal. 2998 LegalizeAction TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; 2999 3000 /// For each indexed mode and each value type, keep a quad of LegalizeAction 3001 /// that indicates how instruction selection should deal with the load / 3002 /// store / maskedload / maskedstore. 3003 /// 3004 /// The first dimension is the value_type for the reference. The second 3005 /// dimension represents the various modes for load store. 3006 uint16_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE]; 3007 3008 /// For each condition code (ISD::CondCode) keep a LegalizeAction that 3009 /// indicates how instruction selection should deal with the condition code. 3010 /// 3011 /// Because each CC action takes up 4 bits, we need to have the array size be 3012 /// large enough to fit all of the value types. This can be done by rounding 3013 /// up the MVT::LAST_VALUETYPE value to the next multiple of 8. 3014 uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8]; 3015 3016 ValueTypeActionImpl ValueTypeActions; 3017 3018 private: 3019 LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const; 3020 3021 /// Targets can specify ISD nodes that they would like PerformDAGCombine 3022 /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this 3023 /// array. 3024 unsigned char 3025 TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT-1)/CHAR_BIT]; 3026 3027 /// For operations that must be promoted to a specific type, this holds the 3028 /// destination type. This map should be sparse, so don't hold it as an 3029 /// array. 3030 /// 3031 /// Targets add entries to this map with AddPromotedToType(..), clients access 3032 /// this with getTypeToPromoteTo(..). 3033 std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType> 3034 PromoteToType; 3035 3036 /// Stores the name each libcall. 3037 const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL + 1]; 3038 3039 /// The ISD::CondCode that should be used to test the result of each of the 3040 /// comparison libcall against zero. 3041 ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL]; 3042 3043 /// Stores the CallingConv that should be used for each libcall. 3044 CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL]; 3045 3046 /// Set default libcall names and calling conventions. 3047 void InitLibcalls(const Triple &TT); 3048 3049 /// The bits of IndexedModeActions used to store the legalisation actions 3050 /// We store the data as | ML | MS | L | S | each taking 4 bits. 3051 enum IndexedModeActionsBits { 3052 IMAB_Store = 0, 3053 IMAB_Load = 4, 3054 IMAB_MaskedStore = 8, 3055 IMAB_MaskedLoad = 12 3056 }; 3057 setIndexedModeAction(unsigned IdxMode,MVT VT,unsigned Shift,LegalizeAction Action)3058 void setIndexedModeAction(unsigned IdxMode, MVT VT, unsigned Shift, 3059 LegalizeAction Action) { 3060 assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && 3061 (unsigned)Action < 0xf && "Table isn't big enough!"); 3062 unsigned Ty = (unsigned)VT.SimpleTy; 3063 IndexedModeActions[Ty][IdxMode] &= ~(0xf << Shift); 3064 IndexedModeActions[Ty][IdxMode] |= ((uint16_t)Action) << Shift; 3065 } 3066 getIndexedModeAction(unsigned IdxMode,MVT VT,unsigned Shift)3067 LegalizeAction getIndexedModeAction(unsigned IdxMode, MVT VT, 3068 unsigned Shift) const { 3069 assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && 3070 "Table isn't big enough!"); 3071 unsigned Ty = (unsigned)VT.SimpleTy; 3072 return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] >> Shift) & 0xf); 3073 } 3074 3075 protected: 3076 /// Return true if the extension represented by \p I is free. 3077 /// \pre \p I is a sign, zero, or fp extension and 3078 /// is[Z|FP]ExtFree of the related types is not true. isExtFreeImpl(const Instruction * I)3079 virtual bool isExtFreeImpl(const Instruction *I) const { return false; } 3080 3081 /// Depth that GatherAllAliases should should continue looking for chain 3082 /// dependencies when trying to find a more preferable chain. As an 3083 /// approximation, this should be more than the number of consecutive stores 3084 /// expected to be merged. 3085 unsigned GatherAllAliasesMaxDepth; 3086 3087 /// \brief Specify maximum number of store instructions per memset call. 3088 /// 3089 /// When lowering \@llvm.memset this field specifies the maximum number of 3090 /// store operations that may be substituted for the call to memset. Targets 3091 /// must set this value based on the cost threshold for that target. Targets 3092 /// should assume that the memset will be done using as many of the largest 3093 /// store operations first, followed by smaller ones, if necessary, per 3094 /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine 3095 /// with 16-bit alignment would result in four 2-byte stores and one 1-byte 3096 /// store. This only applies to setting a constant array of a constant size. 3097 unsigned MaxStoresPerMemset; 3098 /// Likewise for functions with the OptSize attribute. 3099 unsigned MaxStoresPerMemsetOptSize; 3100 3101 /// \brief Specify maximum number of store instructions per memcpy call. 3102 /// 3103 /// When lowering \@llvm.memcpy this field specifies the maximum number of 3104 /// store operations that may be substituted for a call to memcpy. Targets 3105 /// must set this value based on the cost threshold for that target. Targets 3106 /// should assume that the memcpy will be done using as many of the largest 3107 /// store operations first, followed by smaller ones, if necessary, per 3108 /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine 3109 /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store 3110 /// and one 1-byte store. This only applies to copying a constant array of 3111 /// constant size. 3112 unsigned MaxStoresPerMemcpy; 3113 /// Likewise for functions with the OptSize attribute. 3114 unsigned MaxStoresPerMemcpyOptSize; 3115 /// \brief Specify max number of store instructions to glue in inlined memcpy. 3116 /// 3117 /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number 3118 /// of store instructions to keep together. This helps in pairing and 3119 // vectorization later on. 3120 unsigned MaxGluedStoresPerMemcpy = 0; 3121 3122 /// \brief Specify maximum number of load instructions per memcmp call. 3123 /// 3124 /// When lowering \@llvm.memcmp this field specifies the maximum number of 3125 /// pairs of load operations that may be substituted for a call to memcmp. 3126 /// Targets must set this value based on the cost threshold for that target. 3127 /// Targets should assume that the memcmp will be done using as many of the 3128 /// largest load operations first, followed by smaller ones, if necessary, per 3129 /// alignment restrictions. For example, loading 7 bytes on a 32-bit machine 3130 /// with 32-bit alignment would result in one 4-byte load, a one 2-byte load 3131 /// and one 1-byte load. This only applies to copying a constant array of 3132 /// constant size. 3133 unsigned MaxLoadsPerMemcmp; 3134 /// Likewise for functions with the OptSize attribute. 3135 unsigned MaxLoadsPerMemcmpOptSize; 3136 3137 /// \brief Specify maximum number of store instructions per memmove call. 3138 /// 3139 /// When lowering \@llvm.memmove this field specifies the maximum number of 3140 /// store instructions that may be substituted for a call to memmove. Targets 3141 /// must set this value based on the cost threshold for that target. Targets 3142 /// should assume that the memmove will be done using as many of the largest 3143 /// store operations first, followed by smaller ones, if necessary, per 3144 /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine 3145 /// with 8-bit alignment would result in nine 1-byte stores. This only 3146 /// applies to copying a constant array of constant size. 3147 unsigned MaxStoresPerMemmove; 3148 /// Likewise for functions with the OptSize attribute. 3149 unsigned MaxStoresPerMemmoveOptSize; 3150 3151 /// Tells the code generator that select is more expensive than a branch if 3152 /// the branch is usually predicted right. 3153 bool PredictableSelectIsExpensive; 3154 3155 /// \see enableExtLdPromotion. 3156 bool EnableExtLdPromotion; 3157 3158 /// The type to use for CHERI capabilities (if supported) 3159 /// Should be one of iFATPTR64/128/256 3160 MVT CapType = MVT(); 3161 3162 /// Whether the CHERI capability type supports precise bounds for any 3163 /// allocation. Defaults to false for safety over efficiency. 3164 bool CapTypeHasPreciseBounds = false; 3165 3166 /// Whether CHERI Capability loads/stores can be used with unaligned addresses 3167 /// This makes it possible to do a tag-preserving copy even if the alignment 3168 /// is not statically known to be at least capability aligned. 3169 bool SupportsUnalignedCapabilityMemOps = false; 3170 3171 /// Whether atomic operations with CHERI capability values are supported. 3172 bool SupportsAtomicCapabilityOperations = false; 3173 3174 /// Set the target has a NULL capability register (e.g. Mips::CNULL) 3175 Register NullCapabilityRegister = {}; 3176 3177 /// Return true if the value types that can be represented by the specified 3178 /// register class are all legal. 3179 bool isLegalRC(const TargetRegisterInfo &TRI, 3180 const TargetRegisterClass &RC) const; 3181 3182 /// Replace/modify any TargetFrameIndex operands with a targte-dependent 3183 /// sequence of memory operands that is recognized by PrologEpilogInserter. 3184 MachineBasicBlock *emitPatchPoint(MachineInstr &MI, 3185 MachineBasicBlock *MBB) const; 3186 3187 /// Replace/modify the XRay custom event operands with target-dependent 3188 /// details. 3189 MachineBasicBlock *emitXRayCustomEvent(MachineInstr &MI, 3190 MachineBasicBlock *MBB) const; 3191 3192 /// Replace/modify the XRay typed event operands with target-dependent 3193 /// details. 3194 MachineBasicBlock *emitXRayTypedEvent(MachineInstr &MI, 3195 MachineBasicBlock *MBB) const; 3196 3197 bool IsStrictFPEnabled; 3198 }; 3199 3200 /// This class defines information used to lower LLVM code to legal SelectionDAG 3201 /// operators that the target instruction selector can accept natively. 3202 /// 3203 /// This class also defines callbacks that targets must implement to lower 3204 /// target-specific constructs to SelectionDAG operators. 3205 class TargetLowering : public TargetLoweringBase { 3206 public: 3207 struct DAGCombinerInfo; 3208 struct MakeLibCallOptions; 3209 3210 TargetLowering(const TargetLowering &) = delete; 3211 TargetLowering &operator=(const TargetLowering &) = delete; 3212 3213 explicit TargetLowering(const TargetMachine &TM); 3214 3215 bool isPositionIndependent() const; 3216 isSDNodeSourceOfDivergence(const SDNode * N,FunctionLoweringInfo * FLI,LegacyDivergenceAnalysis * DA)3217 virtual bool isSDNodeSourceOfDivergence(const SDNode *N, 3218 FunctionLoweringInfo *FLI, 3219 LegacyDivergenceAnalysis *DA) const { 3220 return false; 3221 } 3222 isSDNodeAlwaysUniform(const SDNode * N)3223 virtual bool isSDNodeAlwaysUniform(const SDNode * N) const { 3224 return false; 3225 } 3226 3227 /// Returns true by value, base pointer and offset pointer and addressing mode 3228 /// by reference if the node's address can be legally represented as 3229 /// pre-indexed load / store address. getPreIndexedAddressParts(SDNode *,SDValue &,SDValue &,ISD::MemIndexedMode &,SelectionDAG &)3230 virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/, 3231 SDValue &/*Offset*/, 3232 ISD::MemIndexedMode &/*AM*/, 3233 SelectionDAG &/*DAG*/) const { 3234 return false; 3235 } 3236 3237 /// Returns true by value, base pointer and offset pointer and addressing mode 3238 /// by reference if this node can be combined with a load / store to form a 3239 /// post-indexed load / store. getPostIndexedAddressParts(SDNode *,SDNode *,SDValue &,SDValue &,ISD::MemIndexedMode &,SelectionDAG &)3240 virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/, 3241 SDValue &/*Base*/, 3242 SDValue &/*Offset*/, 3243 ISD::MemIndexedMode &/*AM*/, 3244 SelectionDAG &/*DAG*/) const { 3245 return false; 3246 } 3247 3248 /// Returns true if the specified base+offset is a legal indexed addressing 3249 /// mode for this target. \p MI is the load or store instruction that is being 3250 /// considered for transformation. isIndexingLegal(MachineInstr & MI,Register Base,Register Offset,bool IsPre,MachineRegisterInfo & MRI)3251 virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset, 3252 bool IsPre, MachineRegisterInfo &MRI) const { 3253 return false; 3254 } 3255 3256 /// Return the entry encoding for a jump table in the current function. The 3257 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. 3258 virtual unsigned getJumpTableEncoding() const; 3259 3260 virtual const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *,const MachineBasicBlock *,unsigned,MCContext &)3261 LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/, 3262 const MachineBasicBlock * /*MBB*/, unsigned /*uid*/, 3263 MCContext &/*Ctx*/) const { 3264 llvm_unreachable("Need to implement this hook if target has custom JTIs"); 3265 } 3266 3267 /// Returns relocation base for the given PIC jumptable. 3268 virtual SDValue getPICJumpTableRelocBase(SDValue Table, 3269 SelectionDAG &DAG) const; 3270 3271 /// This returns the relocation base for the given PIC jumptable, the same as 3272 /// getPICJumpTableRelocBase, but as an MCExpr. 3273 virtual const MCExpr * 3274 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 3275 unsigned JTI, MCContext &Ctx) const; 3276 3277 /// Return true if folding a constant offset with the given GlobalAddress is 3278 /// legal. It is frequently not legal in PIC relocation models. 3279 virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; 3280 3281 bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, 3282 SDValue &Chain) const; 3283 3284 void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, 3285 SDValue &NewRHS, ISD::CondCode &CCCode, 3286 const SDLoc &DL, const SDValue OldLHS, 3287 const SDValue OldRHS) const; 3288 3289 void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, 3290 SDValue &NewRHS, ISD::CondCode &CCCode, 3291 const SDLoc &DL, const SDValue OldLHS, 3292 const SDValue OldRHS, SDValue &Chain, 3293 bool IsSignaling = false) const; 3294 3295 /// Returns a pair of (return value, chain). 3296 /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC. 3297 std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, 3298 EVT RetVT, ArrayRef<SDValue> Ops, 3299 MakeLibCallOptions CallOptions, 3300 const SDLoc &dl, 3301 SDValue Chain = SDValue()) const; 3302 3303 /// Check whether parameters to a call that are passed in callee saved 3304 /// registers are the same as from the calling function. This needs to be 3305 /// checked for tail call eligibility. 3306 bool parametersInCSRMatch(const MachineRegisterInfo &MRI, 3307 const uint32_t *CallerPreservedMask, 3308 const SmallVectorImpl<CCValAssign> &ArgLocs, 3309 const SmallVectorImpl<SDValue> &OutVals) const; 3310 3311 //===--------------------------------------------------------------------===// 3312 // TargetLowering Optimization Methods 3313 // 3314 3315 /// A convenience struct that encapsulates a DAG, and two SDValues for 3316 /// returning information from TargetLowering to its clients that want to 3317 /// combine. 3318 struct TargetLoweringOpt { 3319 SelectionDAG &DAG; 3320 bool LegalTys; 3321 bool LegalOps; 3322 SDValue Old; 3323 SDValue New; 3324 TargetLoweringOptTargetLoweringOpt3325 explicit TargetLoweringOpt(SelectionDAG &InDAG, 3326 bool LT, bool LO) : 3327 DAG(InDAG), LegalTys(LT), LegalOps(LO) {} 3328 LegalTypesTargetLoweringOpt3329 bool LegalTypes() const { return LegalTys; } LegalOperationsTargetLoweringOpt3330 bool LegalOperations() const { return LegalOps; } 3331 CombineToTargetLoweringOpt3332 bool CombineTo(SDValue O, SDValue N) { 3333 Old = O; 3334 New = N; 3335 return true; 3336 } 3337 }; 3338 3339 /// Determines the optimal series of memory ops to replace the memset / memcpy. 3340 /// Return true if the number of memory ops is below the threshold (Limit). 3341 /// It returns the types of the sequence of memory ops to perform 3342 /// memset / memcpy by reference. 3343 bool findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit, 3344 const MemOp &Op, unsigned DstAS, unsigned SrcAS, 3345 const AttributeList &FuncAttributes) const; 3346 3347 /// Check to see if the specified operand of the specified instruction is a 3348 /// constant integer. If so, check to see if there are any bits set in the 3349 /// constant that are not demanded. If so, shrink the constant and return 3350 /// true. 3351 bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 3352 const APInt &DemandedElts, 3353 TargetLoweringOpt &TLO) const; 3354 3355 /// Helper wrapper around ShrinkDemandedConstant, demanding all elements. 3356 bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 3357 TargetLoweringOpt &TLO) const; 3358 3359 // Target hook to do target-specific const optimization, which is called by 3360 // ShrinkDemandedConstant. This function should return true if the target 3361 // doesn't want ShrinkDemandedConstant to further optimize the constant. targetShrinkDemandedConstant(SDValue Op,const APInt & DemandedBits,const APInt & DemandedElts,TargetLoweringOpt & TLO)3362 virtual bool targetShrinkDemandedConstant(SDValue Op, 3363 const APInt &DemandedBits, 3364 const APInt &DemandedElts, 3365 TargetLoweringOpt &TLO) const { 3366 return false; 3367 } 3368 3369 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. This 3370 /// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be 3371 /// generalized for targets with other types of implicit widening casts. 3372 bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded, 3373 TargetLoweringOpt &TLO) const; 3374 3375 /// Look at Op. At this point, we know that only the DemandedBits bits of the 3376 /// result of Op are ever used downstream. If we can use this information to 3377 /// simplify Op, create a new simplified DAG node and return true, returning 3378 /// the original and new nodes in Old and New. Otherwise, analyze the 3379 /// expression and return a mask of KnownOne and KnownZero bits for the 3380 /// expression (used to simplify the caller). The KnownZero/One bits may only 3381 /// be accurate for those bits in the Demanded masks. 3382 /// \p AssumeSingleUse When this parameter is true, this function will 3383 /// attempt to simplify \p Op even if there are multiple uses. 3384 /// Callers are responsible for correctly updating the DAG based on the 3385 /// results of this function, because simply replacing replacing TLO.Old 3386 /// with TLO.New will be incorrect when this parameter is true and TLO.Old 3387 /// has multiple uses. 3388 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, 3389 const APInt &DemandedElts, KnownBits &Known, 3390 TargetLoweringOpt &TLO, unsigned Depth = 0, 3391 bool AssumeSingleUse = false) const; 3392 3393 /// Helper wrapper around SimplifyDemandedBits, demanding all elements. 3394 /// Adds Op back to the worklist upon success. 3395 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, 3396 KnownBits &Known, TargetLoweringOpt &TLO, 3397 unsigned Depth = 0, 3398 bool AssumeSingleUse = false) const; 3399 3400 /// Helper wrapper around SimplifyDemandedBits. 3401 /// Adds Op back to the worklist upon success. 3402 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, 3403 DAGCombinerInfo &DCI) const; 3404 3405 /// More limited version of SimplifyDemandedBits that can be used to "look 3406 /// through" ops that don't contribute to the DemandedBits/DemandedElts - 3407 /// bitwise ops etc. 3408 SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, 3409 const APInt &DemandedElts, 3410 SelectionDAG &DAG, 3411 unsigned Depth) const; 3412 3413 /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all 3414 /// elements. 3415 SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, 3416 SelectionDAG &DAG, 3417 unsigned Depth = 0) const; 3418 3419 /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all 3420 /// bits from only some vector elements. 3421 SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, 3422 const APInt &DemandedElts, 3423 SelectionDAG &DAG, 3424 unsigned Depth = 0) const; 3425 3426 /// Look at Vector Op. At this point, we know that only the DemandedElts 3427 /// elements of the result of Op are ever used downstream. If we can use 3428 /// this information to simplify Op, create a new simplified DAG node and 3429 /// return true, storing the original and new nodes in TLO. 3430 /// Otherwise, analyze the expression and return a mask of KnownUndef and 3431 /// KnownZero elements for the expression (used to simplify the caller). 3432 /// The KnownUndef/Zero elements may only be accurate for those bits 3433 /// in the DemandedMask. 3434 /// \p AssumeSingleUse When this parameter is true, this function will 3435 /// attempt to simplify \p Op even if there are multiple uses. 3436 /// Callers are responsible for correctly updating the DAG based on the 3437 /// results of this function, because simply replacing replacing TLO.Old 3438 /// with TLO.New will be incorrect when this parameter is true and TLO.Old 3439 /// has multiple uses. 3440 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, 3441 APInt &KnownUndef, APInt &KnownZero, 3442 TargetLoweringOpt &TLO, unsigned Depth = 0, 3443 bool AssumeSingleUse = false) const; 3444 3445 /// Helper wrapper around SimplifyDemandedVectorElts. 3446 /// Adds Op back to the worklist upon success. 3447 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, 3448 APInt &KnownUndef, APInt &KnownZero, 3449 DAGCombinerInfo &DCI) const; 3450 3451 /// Determine which of the bits specified in Mask are known to be either zero 3452 /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts 3453 /// argument allows us to only collect the known bits that are shared by the 3454 /// requested vector elements. 3455 virtual void computeKnownBitsForTargetNode(const SDValue Op, 3456 KnownBits &Known, 3457 const APInt &DemandedElts, 3458 const SelectionDAG &DAG, 3459 unsigned Depth = 0) const; 3460 3461 /// Determine which of the bits specified in Mask are known to be either zero 3462 /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts 3463 /// argument allows us to only collect the known bits that are shared by the 3464 /// requested vector elements. This is for GISel. 3465 virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, 3466 Register R, KnownBits &Known, 3467 const APInt &DemandedElts, 3468 const MachineRegisterInfo &MRI, 3469 unsigned Depth = 0) const; 3470 3471 /// Determine the known alignment for the pointer value \p R. This is can 3472 /// typically be inferred from the number of low known 0 bits. However, for a 3473 /// pointer with a non-integral address space, the alignment value may be 3474 /// independent from the known low bits. 3475 virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, 3476 Register R, 3477 const MachineRegisterInfo &MRI, 3478 unsigned Depth = 0) const; 3479 3480 /// Determine which of the bits of FrameIndex \p FIOp are known to be 0. 3481 /// Default implementation computes low bits based on alignment 3482 /// information. This should preserve known bits passed into it. 3483 virtual void computeKnownBitsForFrameIndex(int FIOp, 3484 KnownBits &Known, 3485 const MachineFunction &MF) const; 3486 3487 /// This method can be implemented by targets that want to expose additional 3488 /// information about sign bits to the DAG Combiner. The DemandedElts 3489 /// argument allows us to only collect the minimum sign bits that are shared 3490 /// by the requested vector elements. 3491 virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 3492 const APInt &DemandedElts, 3493 const SelectionDAG &DAG, 3494 unsigned Depth = 0) const; 3495 3496 /// This method can be implemented by targets that want to expose additional 3497 /// information about sign bits to GlobalISel combiners. The DemandedElts 3498 /// argument allows us to only collect the minimum sign bits that are shared 3499 /// by the requested vector elements. 3500 virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, 3501 Register R, 3502 const APInt &DemandedElts, 3503 const MachineRegisterInfo &MRI, 3504 unsigned Depth = 0) const; 3505 3506 /// Attempt to simplify any target nodes based on the demanded vector 3507 /// elements, returning true on success. Otherwise, analyze the expression and 3508 /// return a mask of KnownUndef and KnownZero elements for the expression 3509 /// (used to simplify the caller). The KnownUndef/Zero elements may only be 3510 /// accurate for those bits in the DemandedMask. 3511 virtual bool SimplifyDemandedVectorEltsForTargetNode( 3512 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, 3513 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const; 3514 3515 /// Attempt to simplify any target nodes based on the demanded bits/elts, 3516 /// returning true on success. Otherwise, analyze the 3517 /// expression and return a mask of KnownOne and KnownZero bits for the 3518 /// expression (used to simplify the caller). The KnownZero/One bits may only 3519 /// be accurate for those bits in the Demanded masks. 3520 virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, 3521 const APInt &DemandedBits, 3522 const APInt &DemandedElts, 3523 KnownBits &Known, 3524 TargetLoweringOpt &TLO, 3525 unsigned Depth = 0) const; 3526 3527 /// More limited version of SimplifyDemandedBits that can be used to "look 3528 /// through" ops that don't contribute to the DemandedBits/DemandedElts - 3529 /// bitwise ops etc. 3530 virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode( 3531 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 3532 SelectionDAG &DAG, unsigned Depth) const; 3533 3534 /// Tries to build a legal vector shuffle using the provided parameters 3535 /// or equivalent variations. The Mask argument maybe be modified as the 3536 /// function tries different variations. 3537 /// Returns an empty SDValue if the operation fails. 3538 SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, 3539 SDValue N1, MutableArrayRef<int> Mask, 3540 SelectionDAG &DAG) const; 3541 3542 /// This method returns the constant pool value that will be loaded by LD. 3543 /// NOTE: You must check for implicit extensions of the constant by LD. 3544 virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const; 3545 3546 /// If \p SNaN is false, \returns true if \p Op is known to never be any 3547 /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling 3548 /// NaN. 3549 virtual bool isKnownNeverNaNForTargetNode(SDValue Op, 3550 const SelectionDAG &DAG, 3551 bool SNaN = false, 3552 unsigned Depth = 0) const; 3553 struct DAGCombinerInfo { 3554 void *DC; // The DAG Combiner object. 3555 CombineLevel Level; 3556 bool CalledByLegalizer; 3557 3558 public: 3559 SelectionDAG &DAG; 3560 DAGCombinerInfoDAGCombinerInfo3561 DAGCombinerInfo(SelectionDAG &dag, CombineLevel level, bool cl, void *dc) 3562 : DC(dc), Level(level), CalledByLegalizer(cl), DAG(dag) {} 3563 isBeforeLegalizeDAGCombinerInfo3564 bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; } isBeforeLegalizeOpsDAGCombinerInfo3565 bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; } isAfterLegalizeDAGDAGCombinerInfo3566 bool isAfterLegalizeDAG() const { return Level >= AfterLegalizeDAG; } getDAGCombineLevelDAGCombinerInfo3567 CombineLevel getDAGCombineLevel() { return Level; } isCalledByLegalizerDAGCombinerInfo3568 bool isCalledByLegalizer() const { return CalledByLegalizer; } 3569 3570 void AddToWorklist(SDNode *N); 3571 SDValue CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo = true); 3572 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true); 3573 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true); 3574 3575 bool recursivelyDeleteUnusedNodes(SDNode *N); 3576 3577 void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO); 3578 }; 3579 3580 /// Return if the N is a constant or constant vector equal to the true value 3581 /// from getBooleanContents(). 3582 bool isConstTrueVal(const SDNode *N) const; 3583 3584 /// Return if the N is a constant or constant vector equal to the false value 3585 /// from getBooleanContents(). 3586 bool isConstFalseVal(const SDNode *N) const; 3587 3588 /// Return if \p N is a True value when extended to \p VT. 3589 bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const; 3590 3591 /// Try to simplify a setcc built with the specified operands and cc. If it is 3592 /// unable to simplify it, return a null SDValue. 3593 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 3594 bool foldBooleans, DAGCombinerInfo &DCI, 3595 const SDLoc &dl) const; 3596 3597 // For targets which wrap address, unwrap for analysis. unwrapAddress(SDValue N)3598 virtual SDValue unwrapAddress(SDValue N) const { return N; } 3599 3600 /// Returns true (and the GlobalValue and the offset) if the node is a 3601 /// GlobalAddress + offset. 3602 virtual bool 3603 isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const; 3604 3605 /// This method will be invoked for all target nodes and for any 3606 /// target-independent nodes that the target has registered with invoke it 3607 /// for. 3608 /// 3609 /// The semantics are as follows: 3610 /// Return Value: 3611 /// SDValue.Val == 0 - No change was made 3612 /// SDValue.Val == N - N was replaced, is dead, and is already handled. 3613 /// otherwise - N should be replaced by the returned Operand. 3614 /// 3615 /// In addition, methods provided by DAGCombinerInfo may be used to perform 3616 /// more complex transformations. 3617 /// 3618 virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; 3619 3620 /// Return true if it is profitable to move this shift by a constant amount 3621 /// though its operand, adjusting any immediate operands as necessary to 3622 /// preserve semantics. This transformation may not be desirable if it 3623 /// disrupts a particularly auspicious target-specific tree (e.g. bitfield 3624 /// extraction in AArch64). By default, it returns true. 3625 /// 3626 /// @param N the shift node 3627 /// @param Level the current DAGCombine legalization level. isDesirableToCommuteWithShift(const SDNode * N,CombineLevel Level)3628 virtual bool isDesirableToCommuteWithShift(const SDNode *N, 3629 CombineLevel Level) const { 3630 return true; 3631 } 3632 3633 /// Return true if the target has native support for the specified value type 3634 /// and it is 'desirable' to use the type for the given node type. e.g. On x86 3635 /// i16 is legal, but undesirable since i16 instruction encodings are longer 3636 /// and some i16 instructions are slow. isTypeDesirableForOp(unsigned,EVT VT)3637 virtual bool isTypeDesirableForOp(unsigned /*Opc*/, EVT VT) const { 3638 // By default, assume all legal types are desirable. 3639 return isTypeLegal(VT); 3640 } 3641 3642 /// Return true if it is profitable for dag combiner to transform a floating 3643 /// point op of specified opcode to a equivalent op of an integer 3644 /// type. e.g. f32 load -> i32 load can be profitable on ARM. isDesirableToTransformToIntegerOp(unsigned,EVT)3645 virtual bool isDesirableToTransformToIntegerOp(unsigned /*Opc*/, 3646 EVT /*VT*/) const { 3647 return false; 3648 } 3649 3650 /// This method query the target whether it is beneficial for dag combiner to 3651 /// promote the specified node. If true, it should return the desired 3652 /// promotion type by reference. IsDesirableToPromoteOp(SDValue,EVT &)3653 virtual bool IsDesirableToPromoteOp(SDValue /*Op*/, EVT &/*PVT*/) const { 3654 return false; 3655 } 3656 3657 /// Return true if the target supports swifterror attribute. It optimizes 3658 /// loads and stores to reading and writing a specific register. supportSwiftError()3659 virtual bool supportSwiftError() const { 3660 return false; 3661 } 3662 3663 /// Return true if the target supports that a subset of CSRs for the given 3664 /// machine function is handled explicitly via copies. supportSplitCSR(MachineFunction * MF)3665 virtual bool supportSplitCSR(MachineFunction *MF) const { 3666 return false; 3667 } 3668 3669 /// Perform necessary initialization to handle a subset of CSRs explicitly 3670 /// via copies. This function is called at the beginning of instruction 3671 /// selection. initializeSplitCSR(MachineBasicBlock * Entry)3672 virtual void initializeSplitCSR(MachineBasicBlock *Entry) const { 3673 llvm_unreachable("Not Implemented"); 3674 } 3675 3676 /// Insert explicit copies in entry and exit blocks. We copy a subset of 3677 /// CSRs to virtual registers in the entry block, and copy them back to 3678 /// physical registers in the exit blocks. This function is called at the end 3679 /// of instruction selection. insertCopiesSplitCSR(MachineBasicBlock * Entry,const SmallVectorImpl<MachineBasicBlock * > & Exits)3680 virtual void insertCopiesSplitCSR( 3681 MachineBasicBlock *Entry, 3682 const SmallVectorImpl<MachineBasicBlock *> &Exits) const { 3683 llvm_unreachable("Not Implemented"); 3684 } 3685 3686 /// Return the newly negated expression if the cost is not expensive and 3687 /// set the cost in \p Cost to indicate that if it is cheaper or neutral to 3688 /// do the negation. 3689 virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, 3690 bool LegalOps, bool OptForSize, 3691 NegatibleCost &Cost, 3692 unsigned Depth = 0) const; 3693 3694 /// This is the helper function to return the newly negated expression only 3695 /// when the cost is cheaper. 3696 SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, 3697 bool LegalOps, bool OptForSize, 3698 unsigned Depth = 0) const { 3699 NegatibleCost Cost = NegatibleCost::Expensive; 3700 SDValue Neg = 3701 getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); 3702 if (Neg && Cost == NegatibleCost::Cheaper) 3703 return Neg; 3704 // Remove the new created node to avoid the side effect to the DAG. 3705 if (Neg && Neg.getNode()->use_empty()) 3706 DAG.RemoveDeadNode(Neg.getNode()); 3707 return SDValue(); 3708 } 3709 3710 /// This is the helper function to return the newly negated expression if 3711 /// the cost is not expensive. 3712 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, 3713 bool OptForSize, unsigned Depth = 0) const { 3714 NegatibleCost Cost = NegatibleCost::Expensive; 3715 return getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); 3716 } 3717 3718 //===--------------------------------------------------------------------===// 3719 // Lowering methods - These methods must be implemented by targets so that 3720 // the SelectionDAGBuilder code knows how to lower these. 3721 // 3722 3723 /// Target-specific splitting of values into parts that fit a register 3724 /// storing a legal type splitValueIntoRegisterParts(SelectionDAG & DAG,const SDLoc & DL,SDValue Val,SDValue * Parts,unsigned NumParts,MVT PartVT,Optional<CallingConv::ID> CC)3725 virtual bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, 3726 SDValue Val, SDValue *Parts, 3727 unsigned NumParts, MVT PartVT, 3728 Optional<CallingConv::ID> CC) const { 3729 return false; 3730 } 3731 3732 /// Target-specific combining of register parts into its original value 3733 virtual SDValue joinRegisterPartsIntoValue(SelectionDAG & DAG,const SDLoc & DL,const SDValue * Parts,unsigned NumParts,MVT PartVT,EVT ValueVT,Optional<CallingConv::ID> CC)3734 joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, 3735 const SDValue *Parts, unsigned NumParts, 3736 MVT PartVT, EVT ValueVT, 3737 Optional<CallingConv::ID> CC) const { 3738 return SDValue(); 3739 } 3740 3741 /// This hook must be implemented to lower the incoming (formal) arguments, 3742 /// described by the Ins array, into the specified DAG. The implementation 3743 /// should fill in the InVals array with legal-type argument values, and 3744 /// return the resulting token chain value. LowerFormalArguments(SDValue,CallingConv::ID,bool,const SmallVectorImpl<ISD::InputArg> &,const SDLoc &,SelectionDAG &,SmallVectorImpl<SDValue> &)3745 virtual SDValue LowerFormalArguments( 3746 SDValue /*Chain*/, CallingConv::ID /*CallConv*/, bool /*isVarArg*/, 3747 const SmallVectorImpl<ISD::InputArg> & /*Ins*/, const SDLoc & /*dl*/, 3748 SelectionDAG & /*DAG*/, SmallVectorImpl<SDValue> & /*InVals*/) const { 3749 llvm_unreachable("Not Implemented"); 3750 } 3751 3752 /// This structure contains all information that is necessary for lowering 3753 /// calls. It is passed to TLI::LowerCallTo when the SelectionDAG builder 3754 /// needs to lower a call, and targets will see this struct in their LowerCall 3755 /// implementation. 3756 struct CallLoweringInfo { 3757 SDValue Chain; 3758 Type *RetTy = nullptr; 3759 bool RetSExt : 1; 3760 bool RetZExt : 1; 3761 bool IsVarArg : 1; 3762 bool IsInReg : 1; 3763 bool DoesNotReturn : 1; 3764 bool IsReturnValueUsed : 1; 3765 bool IsConvergent : 1; 3766 bool IsPatchPoint : 1; 3767 bool IsPreallocated : 1; 3768 bool NoMerge : 1; 3769 3770 // IsTailCall should be modified by implementations of 3771 // TargetLowering::LowerCall that perform tail call conversions. 3772 bool IsTailCall = false; 3773 3774 // Is Call lowering done post SelectionDAG type legalization. 3775 bool IsPostTypeLegalization = false; 3776 3777 unsigned NumFixedArgs = -1; 3778 CallingConv::ID CallConv = CallingConv::C; 3779 SDValue Callee; 3780 ArgListTy Args; 3781 SelectionDAG &DAG; 3782 SDLoc DL; 3783 const CallBase *CB = nullptr; 3784 SmallVector<ISD::OutputArg, 32> Outs; 3785 SmallVector<SDValue, 32> OutVals; 3786 SmallVector<ISD::InputArg, 32> Ins; 3787 SmallVector<SDValue, 4> InVals; 3788 CallLoweringInfoCallLoweringInfo3789 CallLoweringInfo(SelectionDAG &DAG) 3790 : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), 3791 DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false), 3792 IsPatchPoint(false), IsPreallocated(false), NoMerge(false), 3793 DAG(DAG) {} 3794 setDebugLocCallLoweringInfo3795 CallLoweringInfo &setDebugLoc(const SDLoc &dl) { 3796 DL = dl; 3797 return *this; 3798 } 3799 setChainCallLoweringInfo3800 CallLoweringInfo &setChain(SDValue InChain) { 3801 Chain = InChain; 3802 return *this; 3803 } 3804 3805 // setCallee with target/module-specific attributes setLibCalleeCallLoweringInfo3806 CallLoweringInfo &setLibCallee(CallingConv::ID CC, Type *ResultType, 3807 SDValue Target, ArgListTy &&ArgsList) { 3808 RetTy = ResultType; 3809 Callee = Target; 3810 CallConv = CC; 3811 NumFixedArgs = ArgsList.size(); 3812 Args = std::move(ArgsList); 3813 3814 DAG.getTargetLoweringInfo().markLibCallAttributes( 3815 &(DAG.getMachineFunction()), CC, Args); 3816 return *this; 3817 } 3818 setCalleeCallLoweringInfo3819 CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultType, 3820 SDValue Target, ArgListTy &&ArgsList) { 3821 RetTy = ResultType; 3822 Callee = Target; 3823 CallConv = CC; 3824 NumFixedArgs = ArgsList.size(); 3825 Args = std::move(ArgsList); 3826 return *this; 3827 } 3828 setCalleeCallLoweringInfo3829 CallLoweringInfo &setCallee(Type *ResultType, FunctionType *FTy, 3830 SDValue Target, ArgListTy &&ArgsList, 3831 const CallBase &Call) { 3832 RetTy = ResultType; 3833 3834 IsInReg = Call.hasRetAttr(Attribute::InReg); 3835 DoesNotReturn = 3836 Call.doesNotReturn() || 3837 (!isa<InvokeInst>(Call) && isa<UnreachableInst>(Call.getNextNode())); 3838 IsVarArg = FTy->isVarArg(); 3839 IsReturnValueUsed = !Call.use_empty(); 3840 RetSExt = Call.hasRetAttr(Attribute::SExt); 3841 RetZExt = Call.hasRetAttr(Attribute::ZExt); 3842 NoMerge = Call.hasFnAttr(Attribute::NoMerge); 3843 3844 Callee = Target; 3845 3846 CallConv = Call.getCallingConv(); 3847 NumFixedArgs = FTy->getNumParams(); 3848 Args = std::move(ArgsList); 3849 3850 CB = &Call; 3851 3852 return *this; 3853 } 3854 3855 CallLoweringInfo &setInRegister(bool Value = true) { 3856 IsInReg = Value; 3857 return *this; 3858 } 3859 3860 CallLoweringInfo &setNoReturn(bool Value = true) { 3861 DoesNotReturn = Value; 3862 return *this; 3863 } 3864 3865 CallLoweringInfo &setVarArg(bool Value = true) { 3866 IsVarArg = Value; 3867 return *this; 3868 } 3869 3870 CallLoweringInfo &setTailCall(bool Value = true) { 3871 IsTailCall = Value; 3872 return *this; 3873 } 3874 3875 CallLoweringInfo &setDiscardResult(bool Value = true) { 3876 IsReturnValueUsed = !Value; 3877 return *this; 3878 } 3879 3880 CallLoweringInfo &setConvergent(bool Value = true) { 3881 IsConvergent = Value; 3882 return *this; 3883 } 3884 3885 CallLoweringInfo &setSExtResult(bool Value = true) { 3886 RetSExt = Value; 3887 return *this; 3888 } 3889 3890 CallLoweringInfo &setZExtResult(bool Value = true) { 3891 RetZExt = Value; 3892 return *this; 3893 } 3894 3895 CallLoweringInfo &setIsPatchPoint(bool Value = true) { 3896 IsPatchPoint = Value; 3897 return *this; 3898 } 3899 3900 CallLoweringInfo &setIsPreallocated(bool Value = true) { 3901 IsPreallocated = Value; 3902 return *this; 3903 } 3904 3905 CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) { 3906 IsPostTypeLegalization = Value; 3907 return *this; 3908 } 3909 getArgsCallLoweringInfo3910 ArgListTy &getArgs() { 3911 return Args; 3912 } 3913 }; 3914 3915 /// This structure is used to pass arguments to makeLibCall function. 3916 struct MakeLibCallOptions { 3917 // By passing type list before soften to makeLibCall, the target hook 3918 // shouldExtendTypeInLibCall can get the original type before soften. 3919 ArrayRef<EVT> OpsVTBeforeSoften; 3920 EVT RetVTBeforeSoften; 3921 bool IsSExt : 1; 3922 bool DoesNotReturn : 1; 3923 bool IsReturnValueUsed : 1; 3924 bool IsPostTypeLegalization : 1; 3925 bool IsSoften : 1; 3926 MakeLibCallOptionsMakeLibCallOptions3927 MakeLibCallOptions() 3928 : IsSExt(false), DoesNotReturn(false), IsReturnValueUsed(true), 3929 IsPostTypeLegalization(false), IsSoften(false) {} 3930 3931 MakeLibCallOptions &setSExt(bool Value = true) { 3932 IsSExt = Value; 3933 return *this; 3934 } 3935 3936 MakeLibCallOptions &setNoReturn(bool Value = true) { 3937 DoesNotReturn = Value; 3938 return *this; 3939 } 3940 3941 MakeLibCallOptions &setDiscardResult(bool Value = true) { 3942 IsReturnValueUsed = !Value; 3943 return *this; 3944 } 3945 3946 MakeLibCallOptions &setIsPostTypeLegalization(bool Value = true) { 3947 IsPostTypeLegalization = Value; 3948 return *this; 3949 } 3950 3951 MakeLibCallOptions &setTypeListBeforeSoften(ArrayRef<EVT> OpsVT, EVT RetVT, 3952 bool Value = true) { 3953 OpsVTBeforeSoften = OpsVT; 3954 RetVTBeforeSoften = RetVT; 3955 IsSoften = Value; 3956 return *this; 3957 } 3958 }; 3959 3960 /// This function lowers an abstract call to a function into an actual call. 3961 /// This returns a pair of operands. The first element is the return value 3962 /// for the function (if RetTy is not VoidTy). The second element is the 3963 /// outgoing token chain. It calls LowerCall to do the actual lowering. 3964 std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const; 3965 3966 /// This hook must be implemented to lower calls into the specified 3967 /// DAG. The outgoing arguments to the call are described by the Outs array, 3968 /// and the values to be returned by the call are described by the Ins 3969 /// array. The implementation should fill in the InVals array with legal-type 3970 /// return values from the call, and return the resulting token chain value. 3971 virtual SDValue LowerCall(CallLoweringInfo &,SmallVectorImpl<SDValue> &)3972 LowerCall(CallLoweringInfo &/*CLI*/, 3973 SmallVectorImpl<SDValue> &/*InVals*/) const { 3974 llvm_unreachable("Not Implemented"); 3975 } 3976 3977 /// Target-specific cleanup for formal ByVal parameters. HandleByVal(CCState *,unsigned &,Align)3978 virtual void HandleByVal(CCState *, unsigned &, Align) const {} 3979 3980 /// This hook should be implemented to check whether the return values 3981 /// described by the Outs array can fit into the return registers. If false 3982 /// is returned, an sret-demotion is performed. CanLowerReturn(CallingConv::ID,MachineFunction &,bool,const SmallVectorImpl<ISD::OutputArg> &,LLVMContext &)3983 virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/, 3984 MachineFunction &/*MF*/, bool /*isVarArg*/, 3985 const SmallVectorImpl<ISD::OutputArg> &/*Outs*/, 3986 LLVMContext &/*Context*/) const 3987 { 3988 // Return true by default to get preexisting behavior. 3989 return true; 3990 } 3991 3992 /// This hook must be implemented to lower outgoing return values, described 3993 /// by the Outs array, into the specified DAG. The implementation should 3994 /// return the resulting token chain value. LowerReturn(SDValue,CallingConv::ID,bool,const SmallVectorImpl<ISD::OutputArg> &,const SmallVectorImpl<SDValue> &,const SDLoc &,SelectionDAG &)3995 virtual SDValue LowerReturn(SDValue /*Chain*/, CallingConv::ID /*CallConv*/, 3996 bool /*isVarArg*/, 3997 const SmallVectorImpl<ISD::OutputArg> & /*Outs*/, 3998 const SmallVectorImpl<SDValue> & /*OutVals*/, 3999 const SDLoc & /*dl*/, 4000 SelectionDAG & /*DAG*/) const { 4001 llvm_unreachable("Not Implemented"); 4002 } 4003 4004 /// Return true if result of the specified node is used by a return node 4005 /// only. It also compute and return the input chain for the tail call. 4006 /// 4007 /// This is used to determine whether it is possible to codegen a libcall as 4008 /// tail call at legalization time. isUsedByReturnOnly(SDNode *,SDValue &)4009 virtual bool isUsedByReturnOnly(SDNode *, SDValue &/*Chain*/) const { 4010 return false; 4011 } 4012 4013 /// Return true if the target may be able emit the call instruction as a tail 4014 /// call. This is used by optimization passes to determine if it's profitable 4015 /// to duplicate return instructions to enable tailcall optimization. mayBeEmittedAsTailCall(const CallInst *)4016 virtual bool mayBeEmittedAsTailCall(const CallInst *) const { 4017 return false; 4018 } 4019 4020 /// Return the builtin name for the __builtin___clear_cache intrinsic 4021 /// Default is to invoke the clear cache library call getClearCacheBuiltinName()4022 virtual const char * getClearCacheBuiltinName() const { 4023 return "__clear_cache"; 4024 } 4025 4026 /// Return the register ID of the name passed in. Used by named register 4027 /// global variables extension. There is no target-independent behaviour 4028 /// so the default action is to bail. getRegisterByName(const char * RegName,LLT Ty,const MachineFunction & MF)4029 virtual Register getRegisterByName(const char* RegName, LLT Ty, 4030 const MachineFunction &MF) const { 4031 report_fatal_error("Named registers not implemented for this target"); 4032 } 4033 4034 /// Return the type that should be used to zero or sign extend a 4035 /// zeroext/signext integer return value. FIXME: Some C calling conventions 4036 /// require the return type to be promoted, but this is not true all the time, 4037 /// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling 4038 /// conventions. The frontend should handle this and include all of the 4039 /// necessary information. getTypeForExtReturn(LLVMContext & Context,EVT VT,ISD::NodeType)4040 virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, 4041 ISD::NodeType /*ExtendKind*/) const { 4042 EVT MinVT = getRegisterType(Context, MVT::i32); 4043 return VT.bitsLT(MinVT) ? MinVT : VT; 4044 } 4045 4046 /// For some targets, an LLVM struct type must be broken down into multiple 4047 /// simple types, but the calling convention specifies that the entire struct 4048 /// must be passed in a block of consecutive registers. 4049 virtual bool functionArgumentNeedsConsecutiveRegisters(Type * Ty,CallingConv::ID CallConv,bool isVarArg)4050 functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, 4051 bool isVarArg) const { 4052 return false; 4053 } 4054 4055 /// For most targets, an LLVM type must be broken down into multiple 4056 /// smaller types. Usually the halves are ordered according to the endianness 4057 /// but for some platform that would break. So this method will default to 4058 /// matching the endianness but can be overridden. 4059 virtual bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout & DL)4060 shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const { 4061 return DL.isLittleEndian(); 4062 } 4063 4064 /// Returns a 0 terminated array of registers that can be safely used as 4065 /// scratch registers. getScratchRegisters(CallingConv::ID CC)4066 virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const { 4067 return nullptr; 4068 } 4069 4070 /// This callback is used to prepare for a volatile or atomic load. 4071 /// It takes a chain node as input and returns the chain for the load itself. 4072 /// 4073 /// Having a callback like this is necessary for targets like SystemZ, 4074 /// which allows a CPU to reuse the result of a previous load indefinitely, 4075 /// even if a cache-coherent store is performed by another CPU. The default 4076 /// implementation does nothing. prepareVolatileOrAtomicLoad(SDValue Chain,const SDLoc & DL,SelectionDAG & DAG)4077 virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL, 4078 SelectionDAG &DAG) const { 4079 return Chain; 4080 } 4081 4082 /// Should SelectionDAG lower an atomic store of the given kind as a normal 4083 /// StoreSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to 4084 /// eventually migrate all targets to the using StoreSDNodes, but porting is 4085 /// being done target at a time. lowerAtomicStoreAsStoreSDNode(const StoreInst & SI)4086 virtual bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const { 4087 assert(SI.isAtomic() && "violated precondition"); 4088 return false; 4089 } 4090 4091 /// Should SelectionDAG lower an atomic load of the given kind as a normal 4092 /// LoadSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to 4093 /// eventually migrate all targets to the using LoadSDNodes, but porting is 4094 /// being done target at a time. lowerAtomicLoadAsLoadSDNode(const LoadInst & LI)4095 virtual bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const { 4096 assert(LI.isAtomic() && "violated precondition"); 4097 return false; 4098 } 4099 4100 4101 /// This callback is invoked by the type legalizer to legalize nodes with an 4102 /// illegal operand type but legal result types. It replaces the 4103 /// LowerOperation callback in the type Legalizer. The reason we can not do 4104 /// away with LowerOperation entirely is that LegalizeDAG isn't yet ready to 4105 /// use this callback. 4106 /// 4107 /// TODO: Consider merging with ReplaceNodeResults. 4108 /// 4109 /// The target places new result values for the node in Results (their number 4110 /// and types must exactly match those of the original return values of 4111 /// the node), or leaves Results empty, which indicates that the node is not 4112 /// to be custom lowered after all. 4113 /// The default implementation calls LowerOperation. 4114 virtual void LowerOperationWrapper(SDNode *N, 4115 SmallVectorImpl<SDValue> &Results, 4116 SelectionDAG &DAG) const; 4117 4118 /// This callback is invoked for operations that are unsupported by the 4119 /// target, which are registered to use 'custom' lowering, and whose defined 4120 /// values are all legal. If the target has no operations that require custom 4121 /// lowering, it need not implement this. The default implementation of this 4122 /// aborts. 4123 virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; 4124 4125 /// This callback is invoked when a node result type is illegal for the 4126 /// target, and the operation was registered to use 'custom' lowering for that 4127 /// result type. The target places new result values for the node in Results 4128 /// (their number and types must exactly match those of the original return 4129 /// values of the node), or leaves Results empty, which indicates that the 4130 /// node is not to be custom lowered after all. 4131 /// 4132 /// If the target has no operations that require custom lowering, it need not 4133 /// implement this. The default implementation aborts. ReplaceNodeResults(SDNode *,SmallVectorImpl<SDValue> &,SelectionDAG &)4134 virtual void ReplaceNodeResults(SDNode * /*N*/, 4135 SmallVectorImpl<SDValue> &/*Results*/, 4136 SelectionDAG &/*DAG*/) const { 4137 llvm_unreachable("ReplaceNodeResults not implemented for this target!"); 4138 } 4139 4140 /// This method returns the name of a target specific DAG node. 4141 virtual const char *getTargetNodeName(unsigned Opcode) const; 4142 4143 /// This method returns a target specific FastISel object, or null if the 4144 /// target does not support "fast" ISel. createFastISel(FunctionLoweringInfo &,const TargetLibraryInfo *)4145 virtual FastISel *createFastISel(FunctionLoweringInfo &, 4146 const TargetLibraryInfo *) const { 4147 return nullptr; 4148 } 4149 4150 bool verifyReturnAddressArgumentIsConstant(SDValue Op, 4151 SelectionDAG &DAG) const; 4152 4153 //===--------------------------------------------------------------------===// 4154 // Inline Asm Support hooks 4155 // 4156 4157 /// This hook allows the target to expand an inline asm call to be explicit 4158 /// llvm code if it wants to. This is useful for turning simple inline asms 4159 /// into LLVM intrinsics, which gives the compiler more information about the 4160 /// behavior of the code. ExpandInlineAsm(CallInst *)4161 virtual bool ExpandInlineAsm(CallInst *) const { 4162 return false; 4163 } 4164 4165 enum ConstraintType { 4166 C_Register, // Constraint represents specific register(s). 4167 C_RegisterClass, // Constraint represents any of register(s) in class. 4168 C_Memory, // Memory constraint. 4169 C_Immediate, // Requires an immediate. 4170 C_Other, // Something else. 4171 C_Unknown // Unsupported constraint. 4172 }; 4173 4174 enum ConstraintWeight { 4175 // Generic weights. 4176 CW_Invalid = -1, // No match. 4177 CW_Okay = 0, // Acceptable. 4178 CW_Good = 1, // Good weight. 4179 CW_Better = 2, // Better weight. 4180 CW_Best = 3, // Best weight. 4181 4182 // Well-known weights. 4183 CW_SpecificReg = CW_Okay, // Specific register operands. 4184 CW_Register = CW_Good, // Register operands. 4185 CW_Memory = CW_Better, // Memory operands. 4186 CW_Constant = CW_Best, // Constant operand. 4187 CW_Default = CW_Okay // Default or don't know type. 4188 }; 4189 4190 /// This contains information for each constraint that we are lowering. 4191 struct AsmOperandInfo : public InlineAsm::ConstraintInfo { 4192 /// This contains the actual string for the code, like "m". TargetLowering 4193 /// picks the 'best' code from ConstraintInfo::Codes that most closely 4194 /// matches the operand. 4195 std::string ConstraintCode; 4196 4197 /// Information about the constraint code, e.g. Register, RegisterClass, 4198 /// Memory, Other, Unknown. 4199 TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown; 4200 4201 /// If this is the result output operand or a clobber, this is null, 4202 /// otherwise it is the incoming operand to the CallInst. This gets 4203 /// modified as the asm is processed. 4204 Value *CallOperandVal = nullptr; 4205 4206 /// The ValueType for the operand value. 4207 MVT ConstraintVT = MVT::Other; 4208 4209 /// Copy constructor for copying from a ConstraintInfo. AsmOperandInfoAsmOperandInfo4210 AsmOperandInfo(InlineAsm::ConstraintInfo Info) 4211 : InlineAsm::ConstraintInfo(std::move(Info)) {} 4212 4213 /// Return true of this is an input operand that is a matching constraint 4214 /// like "4". 4215 bool isMatchingInputConstraint() const; 4216 4217 /// If this is an input matching constraint, this method returns the output 4218 /// operand it matches. 4219 unsigned getMatchedOperand() const; 4220 }; 4221 4222 using AsmOperandInfoVector = std::vector<AsmOperandInfo>; 4223 4224 /// Split up the constraint string from the inline assembly value into the 4225 /// specific constraints and their prefixes, and also tie in the associated 4226 /// operand values. If this returns an empty vector, and if the constraint 4227 /// string itself isn't empty, there was an error parsing. 4228 virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, 4229 const TargetRegisterInfo *TRI, 4230 const CallBase &Call) const; 4231 4232 /// Examine constraint type and operand type and determine a weight value. 4233 /// The operand object must already have been set up with the operand type. 4234 virtual ConstraintWeight getMultipleConstraintMatchWeight( 4235 AsmOperandInfo &info, int maIndex) const; 4236 4237 /// Examine constraint string and operand type and determine a weight value. 4238 /// The operand object must already have been set up with the operand type. 4239 virtual ConstraintWeight getSingleConstraintMatchWeight( 4240 AsmOperandInfo &info, const char *constraint) const; 4241 4242 /// Determines the constraint code and constraint type to use for the specific 4243 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType. 4244 /// If the actual operand being passed in is available, it can be passed in as 4245 /// Op, otherwise an empty SDValue can be passed. 4246 virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, 4247 SDValue Op, 4248 SelectionDAG *DAG = nullptr) const; 4249 4250 /// Given a constraint, return the type of constraint it is for this target. 4251 virtual ConstraintType getConstraintType(StringRef Constraint) const; 4252 4253 /// Given a physical register constraint (e.g. {edx}), return the register 4254 /// number and the register class for the register. 4255 /// 4256 /// Given a register class constraint, like 'r', if this corresponds directly 4257 /// to an LLVM register class, return a register of 0 and the register class 4258 /// pointer. 4259 /// 4260 /// This should only be used for C_Register constraints. On error, this 4261 /// returns a register number of 0 and a null register class pointer. 4262 virtual std::pair<unsigned, const TargetRegisterClass *> 4263 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 4264 StringRef Constraint, MVT VT) const; 4265 getInlineAsmMemConstraint(StringRef ConstraintCode)4266 virtual unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const { 4267 if (ConstraintCode == "m") 4268 return InlineAsm::Constraint_m; 4269 return InlineAsm::Constraint_Unknown; 4270 } 4271 4272 /// Try to replace an X constraint, which matches anything, with another that 4273 /// has more specific requirements based on the type of the corresponding 4274 /// operand. This returns null if there is no replacement to make. 4275 virtual const char *LowerXConstraint(EVT ConstraintVT) const; 4276 4277 /// Lower the specified operand into the Ops vector. If it is invalid, don't 4278 /// add anything to Ops. 4279 virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, 4280 std::vector<SDValue> &Ops, 4281 SelectionDAG &DAG) const; 4282 4283 // Lower custom output constraints. If invalid, return SDValue(). 4284 virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, 4285 SDLoc DL, 4286 const AsmOperandInfo &OpInfo, 4287 SelectionDAG &DAG) const; 4288 4289 //===--------------------------------------------------------------------===// 4290 // Div utility functions 4291 // 4292 SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, 4293 SmallVectorImpl<SDNode *> &Created) const; 4294 SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, 4295 SmallVectorImpl<SDNode *> &Created) const; 4296 4297 /// Targets may override this function to provide custom SDIV lowering for 4298 /// power-of-2 denominators. If the target returns an empty SDValue, LLVM 4299 /// assumes SDIV is expensive and replaces it with a series of other integer 4300 /// operations. 4301 virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, 4302 SelectionDAG &DAG, 4303 SmallVectorImpl<SDNode *> &Created) const; 4304 4305 /// Indicate whether this target prefers to combine FDIVs with the same 4306 /// divisor. If the transform should never be done, return zero. If the 4307 /// transform should be done, return the minimum number of divisor uses 4308 /// that must exist. combineRepeatedFPDivisors()4309 virtual unsigned combineRepeatedFPDivisors() const { 4310 return 0; 4311 } 4312 4313 /// Hooks for building estimates in place of slower divisions and square 4314 /// roots. 4315 4316 /// Return either a square root or its reciprocal estimate value for the input 4317 /// operand. 4318 /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or 4319 /// 'Enabled' as set by a potential default override attribute. 4320 /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson 4321 /// refinement iterations required to generate a sufficient (though not 4322 /// necessarily IEEE-754 compliant) estimate is returned in that parameter. 4323 /// The boolean UseOneConstNR output is used to select a Newton-Raphson 4324 /// algorithm implementation that uses either one or two constants. 4325 /// The boolean Reciprocal is used to select whether the estimate is for the 4326 /// square root of the input operand or the reciprocal of its square root. 4327 /// A target may choose to implement its own refinement within this function. 4328 /// If that's true, then return '0' as the number of RefinementSteps to avoid 4329 /// any further refinement of the estimate. 4330 /// An empty SDValue return means no estimate sequence can be created. getSqrtEstimate(SDValue Operand,SelectionDAG & DAG,int Enabled,int & RefinementSteps,bool & UseOneConstNR,bool Reciprocal)4331 virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, 4332 int Enabled, int &RefinementSteps, 4333 bool &UseOneConstNR, bool Reciprocal) const { 4334 return SDValue(); 4335 } 4336 4337 /// Return a reciprocal estimate value for the input operand. 4338 /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or 4339 /// 'Enabled' as set by a potential default override attribute. 4340 /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson 4341 /// refinement iterations required to generate a sufficient (though not 4342 /// necessarily IEEE-754 compliant) estimate is returned in that parameter. 4343 /// A target may choose to implement its own refinement within this function. 4344 /// If that's true, then return '0' as the number of RefinementSteps to avoid 4345 /// any further refinement of the estimate. 4346 /// An empty SDValue return means no estimate sequence can be created. getRecipEstimate(SDValue Operand,SelectionDAG & DAG,int Enabled,int & RefinementSteps)4347 virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, 4348 int Enabled, int &RefinementSteps) const { 4349 return SDValue(); 4350 } 4351 4352 //===--------------------------------------------------------------------===// 4353 // Legalization utility functions 4354 // 4355 4356 /// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, 4357 /// respectively, each computing an n/2-bit part of the result. 4358 /// \param Result A vector that will be filled with the parts of the result 4359 /// in little-endian order. 4360 /// \param LL Low bits of the LHS of the MUL. You can use this parameter 4361 /// if you want to control how low bits are extracted from the LHS. 4362 /// \param LH High bits of the LHS of the MUL. See LL for meaning. 4363 /// \param RL Low bits of the RHS of the MUL. See LL for meaning 4364 /// \param RH High bits of the RHS of the MUL. See LL for meaning. 4365 /// \returns true if the node has been expanded, false if it has not 4366 bool expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, SDValue LHS, 4367 SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT, 4368 SelectionDAG &DAG, MulExpansionKind Kind, 4369 SDValue LL = SDValue(), SDValue LH = SDValue(), 4370 SDValue RL = SDValue(), SDValue RH = SDValue()) const; 4371 4372 /// Expand a MUL into two nodes. One that computes the high bits of 4373 /// the result and one that computes the low bits. 4374 /// \param HiLoVT The value type to use for the Lo and Hi nodes. 4375 /// \param LL Low bits of the LHS of the MUL. You can use this parameter 4376 /// if you want to control how low bits are extracted from the LHS. 4377 /// \param LH High bits of the LHS of the MUL. See LL for meaning. 4378 /// \param RL Low bits of the RHS of the MUL. See LL for meaning 4379 /// \param RH High bits of the RHS of the MUL. See LL for meaning. 4380 /// \returns true if the node has been expanded. false if it has not 4381 bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, 4382 SelectionDAG &DAG, MulExpansionKind Kind, 4383 SDValue LL = SDValue(), SDValue LH = SDValue(), 4384 SDValue RL = SDValue(), SDValue RH = SDValue()) const; 4385 4386 /// Expand funnel shift. 4387 /// \param N Node to expand 4388 /// \param Result output after conversion 4389 /// \returns True, if the expansion was successful, false otherwise 4390 bool expandFunnelShift(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; 4391 4392 /// Expand rotations. 4393 /// \param N Node to expand 4394 /// \param Result output after conversion 4395 /// \returns True, if the expansion was successful, false otherwise 4396 bool expandROT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; 4397 4398 /// Expand float(f32) to SINT(i64) conversion 4399 /// \param N Node to expand 4400 /// \param Result output after conversion 4401 /// \returns True, if the expansion was successful, false otherwise 4402 bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; 4403 4404 /// Expand float to UINT conversion 4405 /// \param N Node to expand 4406 /// \param Result output after conversion 4407 /// \param Chain output chain after conversion 4408 /// \returns True, if the expansion was successful, false otherwise 4409 bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, 4410 SelectionDAG &DAG) const; 4411 4412 /// Expand UINT(i64) to double(f64) conversion 4413 /// \param N Node to expand 4414 /// \param Result output after conversion 4415 /// \param Chain output chain after conversion 4416 /// \returns True, if the expansion was successful, false otherwise 4417 bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, 4418 SelectionDAG &DAG) const; 4419 4420 /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs. 4421 SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const; 4422 4423 /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes, 4424 /// vector nodes can only succeed if all operations are legal/custom. 4425 /// \param N Node to expand 4426 /// \param Result output after conversion 4427 /// \returns True, if the expansion was successful, false otherwise 4428 bool expandCTPOP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; 4429 4430 /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes, 4431 /// vector nodes can only succeed if all operations are legal/custom. 4432 /// \param N Node to expand 4433 /// \param Result output after conversion 4434 /// \returns True, if the expansion was successful, false otherwise 4435 bool expandCTLZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; 4436 4437 /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes, 4438 /// vector nodes can only succeed if all operations are legal/custom. 4439 /// \param N Node to expand 4440 /// \param Result output after conversion 4441 /// \returns True, if the expansion was successful, false otherwise 4442 bool expandCTTZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; 4443 4444 /// Expand ABS nodes. Expands vector/scalar ABS nodes, 4445 /// vector nodes can only succeed if all operations are legal/custom. 4446 /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size)) 4447 /// \param N Node to expand 4448 /// \param Result output after conversion 4449 /// \returns True, if the expansion was successful, false otherwise 4450 bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; 4451 4452 /// Turn load of vector type into a load of the individual elements. 4453 /// \param LD load to expand 4454 /// \returns BUILD_VECTOR and TokenFactor nodes. 4455 std::pair<SDValue, SDValue> scalarizeVectorLoad(LoadSDNode *LD, 4456 SelectionDAG &DAG) const; 4457 4458 // Turn a store of a vector type into stores of the individual elements. 4459 /// \param ST Store with a vector value type 4460 /// \returns TokenFactor of the individual store chains. 4461 SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const; 4462 4463 /// Expands an unaligned load to 2 half-size loads for an integer, and 4464 /// possibly more for vectors. 4465 std::pair<SDValue, SDValue> expandUnalignedLoad(LoadSDNode *LD, 4466 SelectionDAG &DAG) const; 4467 4468 SDValue unalignedLoadStoreCSetbounds(const char *loadOrStore, SDValue Ptr, 4469 const SDLoc &DL, unsigned CapSize, 4470 SelectionDAG &DAG) const; 4471 4472 /// Expands an unaligned store to 2 half-size stores for integer values, and 4473 /// possibly more for vectors. 4474 SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const; 4475 4476 /// Increments memory address \p Addr according to the type of the value 4477 /// \p DataVT that should be stored. If the data is stored in compressed 4478 /// form, the memory address should be incremented according to the number of 4479 /// the stored elements. This number is equal to the number of '1's bits 4480 /// in the \p Mask. 4481 /// \p DataVT is a vector type. \p Mask is a vector value. 4482 /// \p DataVT and \p Mask have the same number of vector elements. 4483 SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, 4484 EVT DataVT, SelectionDAG &DAG, 4485 bool IsCompressedMemory) const; 4486 4487 /// Get a pointer to vector element \p Idx located in memory for a vector of 4488 /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of 4489 /// bounds the returned pointer is unspecified, but will be within the vector 4490 /// bounds. 4491 SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, 4492 SDValue Index) const; 4493 4494 /// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This 4495 /// method accepts integers as its arguments. 4496 SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const; 4497 4498 /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This 4499 /// method accepts integers as its arguments. 4500 SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const; 4501 4502 /// Method for building the DAG expansion of ISD::[US]DIVFIX[SAT]. This 4503 /// method accepts integers as its arguments. 4504 /// Note: This method may fail if the division could not be performed 4505 /// within the type. Clients must retry with a wider type if this happens. 4506 SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, 4507 SDValue LHS, SDValue RHS, 4508 unsigned Scale, SelectionDAG &DAG) const; 4509 4510 /// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion 4511 /// always suceeds and populates the Result and Overflow arguments. 4512 void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, 4513 SelectionDAG &DAG) const; 4514 4515 /// Method for building the DAG expansion of ISD::S(ADD|SUB)O. Expansion 4516 /// always suceeds and populates the Result and Overflow arguments. 4517 void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, 4518 SelectionDAG &DAG) const; 4519 4520 /// Method for building the DAG expansion of ISD::[US]MULO. Returns whether 4521 /// expansion was successful and populates the Result and Overflow arguments. 4522 bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, 4523 SelectionDAG &DAG) const; 4524 4525 /// Expand a VECREDUCE_* into an explicit calculation. If Count is specified, 4526 /// only the first Count elements of the vector are used. 4527 SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const; 4528 4529 /// Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal. 4530 /// Returns true if the expansion was successful. 4531 bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const; 4532 4533 //===--------------------------------------------------------------------===// 4534 // Instruction Emitting Hooks 4535 // 4536 4537 /// This method should be implemented by targets that mark instructions with 4538 /// the 'usesCustomInserter' flag. These instructions are special in various 4539 /// ways, which require special support to insert. The specified MachineInstr 4540 /// is created but not inserted into any basic blocks, and this method is 4541 /// called to expand it into a sequence of instructions, potentially also 4542 /// creating new basic blocks and control flow. 4543 /// As long as the returned basic block is different (i.e., we created a new 4544 /// one), the custom inserter is free to modify the rest of \p MBB. 4545 virtual MachineBasicBlock * 4546 EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; 4547 4548 /// This method should be implemented by targets that mark instructions with 4549 /// the 'hasPostISelHook' flag. These instructions must be adjusted after 4550 /// instruction selection by target hooks. e.g. To fill in optional defs for 4551 /// ARM 's' setting instructions. 4552 virtual void AdjustInstrPostInstrSelection(MachineInstr &MI, 4553 SDNode *Node) const; 4554 4555 /// If this function returns true, SelectionDAGBuilder emits a 4556 /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector. useLoadStackGuardNode()4557 virtual bool useLoadStackGuardNode() const { 4558 return false; 4559 } 4560 emitStackGuardXorFP(SelectionDAG & DAG,SDValue Val,const SDLoc & DL)4561 virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, 4562 const SDLoc &DL) const { 4563 llvm_unreachable("not implemented for this target"); 4564 } 4565 4566 /// Lower TLS global address SDNode for target independent emulated TLS model. 4567 virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, 4568 SelectionDAG &DAG) const; 4569 4570 /// Expands target specific indirect branch for the case of JumpTable 4571 /// expanasion. expandIndirectJTBranch(const SDLoc & dl,SDValue Value,SDValue Addr,SelectionDAG & DAG)4572 virtual SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, SDValue Addr, 4573 SelectionDAG &DAG) const { 4574 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Value, Addr); 4575 } 4576 4577 // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits))) 4578 // If we're comparing for equality to zero and isCtlzFast is true, expose the 4579 // fact that this can be implemented as a ctlz/srl pair, so that the dag 4580 // combiner can fold the new nodes. 4581 SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const; 4582 4583 private: 4584 SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 4585 const SDLoc &DL, DAGCombinerInfo &DCI) const; 4586 SDValue foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 4587 const SDLoc &DL, DAGCombinerInfo &DCI) const; 4588 4589 SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0, 4590 SDValue N1, ISD::CondCode Cond, 4591 DAGCombinerInfo &DCI, 4592 const SDLoc &DL) const; 4593 4594 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 4595 SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift( 4596 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, 4597 DAGCombinerInfo &DCI, const SDLoc &DL) const; 4598 4599 SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, 4600 SDValue CompTargetNode, ISD::CondCode Cond, 4601 DAGCombinerInfo &DCI, const SDLoc &DL, 4602 SmallVectorImpl<SDNode *> &Created) const; 4603 SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, 4604 ISD::CondCode Cond, DAGCombinerInfo &DCI, 4605 const SDLoc &DL) const; 4606 4607 SDValue prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, 4608 SDValue CompTargetNode, ISD::CondCode Cond, 4609 DAGCombinerInfo &DCI, const SDLoc &DL, 4610 SmallVectorImpl<SDNode *> &Created) const; 4611 SDValue buildSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, 4612 ISD::CondCode Cond, DAGCombinerInfo &DCI, 4613 const SDLoc &DL) const; 4614 }; 4615 4616 /// Given an LLVM IR type and return type attributes, compute the return value 4617 /// EVTs and flags, and optionally also the offsets, if the return value is 4618 /// being lowered to memory. 4619 void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, 4620 SmallVectorImpl<ISD::OutputArg> &Outs, 4621 const TargetLowering &TLI, const DataLayout &DL); 4622 4623 } // end namespace llvm 4624 4625 #endif // LLVM_CODEGEN_TARGETLOWERING_H 4626