1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the interfaces that X86 uses to lower LLVM code into a 11 // selection DAG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 16 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 17 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/SelectionDAG.h" 20 #include "llvm/CodeGen/TargetLowering.h" 21 #include "llvm/Target/TargetOptions.h" 22 23 namespace llvm { 24 class X86Subtarget; 25 class X86TargetMachine; 26 27 namespace X86ISD { 28 // X86 Specific DAG Nodes 29 enum NodeType : unsigned { 30 // Start the numbering where the builtin ops leave off. 31 FIRST_NUMBER = ISD::BUILTIN_OP_END, 32 33 /// Bit scan forward. 34 BSF, 35 /// Bit scan reverse. 36 BSR, 37 38 /// Double shift instructions. These correspond to 39 /// X86::SHLDxx and X86::SHRDxx instructions. 40 SHLD, 41 SHRD, 42 43 /// Bitwise logical AND of floating point values. This corresponds 44 /// to X86::ANDPS or X86::ANDPD. 45 FAND, 46 47 /// Bitwise logical OR of floating point values. This corresponds 48 /// to X86::ORPS or X86::ORPD. 49 FOR, 50 51 /// Bitwise logical XOR of floating point values. This corresponds 52 /// to X86::XORPS or X86::XORPD. 53 FXOR, 54 55 /// Bitwise logical ANDNOT of floating point values. This 56 /// corresponds to X86::ANDNPS or X86::ANDNPD. 57 FANDN, 58 59 /// These operations represent an abstract X86 call 60 /// instruction, which includes a bunch of information. In particular the 61 /// operands of these node are: 62 /// 63 /// #0 - The incoming token chain 64 /// #1 - The callee 65 /// #2 - The number of arg bytes the caller pushes on the stack. 66 /// #3 - The number of arg bytes the callee pops off the stack. 67 /// #4 - The value to pass in AL/AX/EAX (optional) 68 /// #5 - The value to pass in DL/DX/EDX (optional) 69 /// 70 /// The result values of these nodes are: 71 /// 72 /// #0 - The outgoing token chain 73 /// #1 - The first register result value (optional) 74 /// #2 - The second register result value (optional) 75 /// 76 CALL, 77 78 /// Same as call except it adds the NoTrack prefix. 79 NT_CALL, 80 81 /// This operation implements the lowering for readcyclecounter. 82 RDTSC_DAG, 83 84 /// X86 Read Time-Stamp Counter and Processor ID. 85 RDTSCP_DAG, 86 87 /// X86 Read Performance Monitoring Counters. 88 RDPMC_DAG, 89 90 /// X86 compare and logical compare instructions. 91 CMP, COMI, UCOMI, 92 93 /// X86 bit-test instructions. 94 BT, 95 96 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS 97 /// operand, usually produced by a CMP instruction. 98 SETCC, 99 100 /// X86 Select 101 SELECTS, 102 103 // Same as SETCC except it's materialized with a sbb and the value is all 104 // one's or all zero's. 105 SETCC_CARRY, // R = carry_bit ? ~0 : 0 106 107 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. 108 /// Operands are two FP values to compare; result is a mask of 109 /// 0s or 1s. Generally DTRT for C/C++ with NaNs. 110 FSETCC, 111 112 /// X86 FP SETCC, similar to above, but with output as an i1 mask and 113 /// with optional rounding mode. 114 FSETCCM, FSETCCM_RND, 115 116 /// X86 conditional moves. Operand 0 and operand 1 are the two values 117 /// to select from. Operand 2 is the condition code, and operand 3 is the 118 /// flag operand produced by a CMP or TEST instruction. It also writes a 119 /// flag result. 120 CMOV, 121 122 /// X86 conditional branches. Operand 0 is the chain operand, operand 1 123 /// is the block to branch if condition is true, operand 2 is the 124 /// condition code, and operand 3 is the flag operand produced by a CMP 125 /// or TEST instruction. 126 BRCOND, 127 128 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and 129 /// operand 1 is the target address. 130 NT_BRIND, 131 132 /// Return with a flag operand. Operand 0 is the chain operand, operand 133 /// 1 is the number of bytes of stack to pop. 134 RET_FLAG, 135 136 /// Return from interrupt. Operand 0 is the number of bytes to pop. 137 IRET, 138 139 /// Repeat fill, corresponds to X86::REP_STOSx. 140 REP_STOS, 141 142 /// Repeat move, corresponds to X86::REP_MOVSx. 143 REP_MOVS, 144 145 /// On Darwin, this node represents the result of the popl 146 /// at function entry, used for PIC code. 147 GlobalBaseReg, 148 149 /// A wrapper node for TargetConstantPool, TargetJumpTable, 150 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, 151 /// MCSymbol and TargetBlockAddress. 152 Wrapper, 153 154 /// Special wrapper used under X86-64 PIC mode for RIP 155 /// relative displacements. 156 WrapperRIP, 157 158 /// Copies a 64-bit value from the low word of an XMM vector 159 /// to an MMX vector. 160 MOVDQ2Q, 161 162 /// Copies a 32-bit value from the low word of a MMX 163 /// vector to a GPR. 164 MMX_MOVD2W, 165 166 /// Copies a GPR into the low 32-bit word of a MMX vector 167 /// and zero out the high word. 168 MMX_MOVW2D, 169 170 /// Extract an 8-bit value from a vector and zero extend it to 171 /// i32, corresponds to X86::PEXTRB. 172 PEXTRB, 173 174 /// Extract a 16-bit value from a vector and zero extend it to 175 /// i32, corresponds to X86::PEXTRW. 176 PEXTRW, 177 178 /// Insert any element of a 4 x float vector into any element 179 /// of a destination 4 x floatvector. 180 INSERTPS, 181 182 /// Insert the lower 8-bits of a 32-bit value to a vector, 183 /// corresponds to X86::PINSRB. 184 PINSRB, 185 186 /// Insert the lower 16-bits of a 32-bit value to a vector, 187 /// corresponds to X86::PINSRW. 188 PINSRW, 189 190 /// Shuffle 16 8-bit values within a vector. 191 PSHUFB, 192 193 /// Compute Sum of Absolute Differences. 194 PSADBW, 195 /// Compute Double Block Packed Sum-Absolute-Differences 196 DBPSADBW, 197 198 /// Bitwise Logical AND NOT of Packed FP values. 199 ANDNP, 200 201 /// Blend where the selector is an immediate. 202 BLENDI, 203 204 /// Dynamic (non-constant condition) vector blend where only the sign bits 205 /// of the condition elements are used. This is used to enforce that the 206 /// condition mask is not valid for generic VSELECT optimizations. This 207 /// can also be used to implement the intrinsics. 208 BLENDV, 209 210 /// Combined add and sub on an FP vector. 211 ADDSUB, 212 213 // FP vector ops with rounding mode. 214 FADD_RND, FADDS_RND, 215 FSUB_RND, FSUBS_RND, 216 FMUL_RND, FMULS_RND, 217 FDIV_RND, FDIVS_RND, 218 FMAX_RND, FMAXS_RND, 219 FMIN_RND, FMINS_RND, 220 FSQRT_RND, FSQRTS_RND, 221 222 // FP vector get exponent. 223 FGETEXP_RND, FGETEXPS_RND, 224 // Extract Normalized Mantissas. 225 VGETMANT, VGETMANT_RND, VGETMANTS, VGETMANTS_RND, 226 // FP Scale. 227 SCALEF, 228 SCALEFS, 229 230 // Unsigned Integer average. 231 AVG, 232 233 /// Integer horizontal add/sub. 234 HADD, 235 HSUB, 236 237 /// Floating point horizontal add/sub. 238 FHADD, 239 FHSUB, 240 241 // Detect Conflicts Within a Vector 242 CONFLICT, 243 244 /// Floating point max and min. 245 FMAX, FMIN, 246 247 /// Commutative FMIN and FMAX. 248 FMAXC, FMINC, 249 250 /// Scalar intrinsic floating point max and min. 251 FMAXS, FMINS, 252 253 /// Floating point reciprocal-sqrt and reciprocal approximation. 254 /// Note that these typically require refinement 255 /// in order to obtain suitable precision. 256 FRSQRT, FRCP, 257 258 // AVX-512 reciprocal approximations with a little more precision. 259 RSQRT14, RSQRT14S, RCP14, RCP14S, 260 261 // Thread Local Storage. 262 TLSADDR, 263 264 // Thread Local Storage. A call to get the start address 265 // of the TLS block for the current module. 266 TLSBASEADDR, 267 268 // Thread Local Storage. When calling to an OS provided 269 // thunk at the address from an earlier relocation. 270 TLSCALL, 271 272 // Exception Handling helpers. 273 EH_RETURN, 274 275 // SjLj exception handling setjmp. 276 EH_SJLJ_SETJMP, 277 278 // SjLj exception handling longjmp. 279 EH_SJLJ_LONGJMP, 280 281 // SjLj exception handling dispatch. 282 EH_SJLJ_SETUP_DISPATCH, 283 284 /// Tail call return. See X86TargetLowering::LowerCall for 285 /// the list of operands. 286 TC_RETURN, 287 288 // Vector move to low scalar and zero higher vector elements. 289 VZEXT_MOVL, 290 291 // Vector integer truncate. 292 VTRUNC, 293 // Vector integer truncate with unsigned/signed saturation. 294 VTRUNCUS, VTRUNCS, 295 296 // Masked version of the above. Used when less than a 128-bit result is 297 // produced since the mask only applies to the lower elements and can't 298 // be represented by a select. 299 // SRC, PASSTHRU, MASK 300 VMTRUNC, VMTRUNCUS, VMTRUNCS, 301 302 // Vector FP extend. 303 VFPEXT, VFPEXT_RND, VFPEXTS_RND, 304 305 // Vector FP round. 306 VFPROUND, VFPROUND_RND, VFPROUNDS_RND, 307 308 // Masked version of above. Used for v2f64->v4f32. 309 // SRC, PASSTHRU, MASK 310 VMFPROUND, 311 312 // 128-bit vector logical left / right shift 313 VSHLDQ, VSRLDQ, 314 315 // Vector shift elements 316 VSHL, VSRL, VSRA, 317 318 // Vector variable shift 319 VSHLV, VSRLV, VSRAV, 320 321 // Vector shift elements by immediate 322 VSHLI, VSRLI, VSRAI, 323 324 // Shifts of mask registers. 325 KSHIFTL, KSHIFTR, 326 327 // Bit rotate by immediate 328 VROTLI, VROTRI, 329 330 // Vector packed double/float comparison. 331 CMPP, 332 333 // Vector integer comparisons. 334 PCMPEQ, PCMPGT, 335 336 // v8i16 Horizontal minimum and position. 337 PHMINPOS, 338 339 MULTISHIFT, 340 341 /// Vector comparison generating mask bits for fp and 342 /// integer signed and unsigned data types. 343 CMPM, 344 // Vector comparison with rounding mode for FP values 345 CMPM_RND, 346 347 // Arithmetic operations with FLAGS results. 348 ADD, SUB, ADC, SBB, SMUL, UMUL, 349 OR, XOR, AND, 350 351 // Bit field extract. 352 BEXTR, 353 354 // Zero High Bits Starting with Specified Bit Position. 355 BZHI, 356 357 // X86-specific multiply by immediate. 358 MUL_IMM, 359 360 // Vector sign bit extraction. 361 MOVMSK, 362 363 // Vector bitwise comparisons. 364 PTEST, 365 366 // Vector packed fp sign bitwise comparisons. 367 TESTP, 368 369 // OR/AND test for masks. 370 KORTEST, 371 KTEST, 372 373 // ADD for masks. 374 KADD, 375 376 // Several flavors of instructions with vector shuffle behaviors. 377 // Saturated signed/unnsigned packing. 378 PACKSS, 379 PACKUS, 380 // Intra-lane alignr. 381 PALIGNR, 382 // AVX512 inter-lane alignr. 383 VALIGN, 384 PSHUFD, 385 PSHUFHW, 386 PSHUFLW, 387 SHUFP, 388 // VBMI2 Concat & Shift. 389 VSHLD, 390 VSHRD, 391 VSHLDV, 392 VSHRDV, 393 //Shuffle Packed Values at 128-bit granularity. 394 SHUF128, 395 MOVDDUP, 396 MOVSHDUP, 397 MOVSLDUP, 398 MOVLHPS, 399 MOVHLPS, 400 MOVSD, 401 MOVSS, 402 UNPCKL, 403 UNPCKH, 404 VPERMILPV, 405 VPERMILPI, 406 VPERMI, 407 VPERM2X128, 408 409 // Variable Permute (VPERM). 410 // Res = VPERMV MaskV, V0 411 VPERMV, 412 413 // 3-op Variable Permute (VPERMT2). 414 // Res = VPERMV3 V0, MaskV, V1 415 VPERMV3, 416 417 // Bitwise ternary logic. 418 VPTERNLOG, 419 // Fix Up Special Packed Float32/64 values. 420 VFIXUPIMM, 421 VFIXUPIMMS, 422 // Range Restriction Calculation For Packed Pairs of Float32/64 values. 423 VRANGE, VRANGE_RND, VRANGES, VRANGES_RND, 424 // Reduce - Perform Reduction Transformation on scalar\packed FP. 425 VREDUCE, VREDUCE_RND, VREDUCES, VREDUCES_RND, 426 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 427 // Also used by the legacy (V)ROUND intrinsics where we mask out the 428 // scaling part of the immediate. 429 VRNDSCALE, VRNDSCALE_RND, VRNDSCALES, VRNDSCALES_RND, 430 // Tests Types Of a FP Values for packed types. 431 VFPCLASS, 432 // Tests Types Of a FP Values for scalar types. 433 VFPCLASSS, 434 435 // Broadcast scalar to vector. 436 VBROADCAST, 437 // Broadcast mask to vector. 438 VBROADCASTM, 439 // Broadcast subvector to vector. 440 SUBV_BROADCAST, 441 442 /// SSE4A Extraction and Insertion. 443 EXTRQI, INSERTQI, 444 445 // XOP arithmetic/logical shifts. 446 VPSHA, VPSHL, 447 // XOP signed/unsigned integer comparisons. 448 VPCOM, VPCOMU, 449 // XOP packed permute bytes. 450 VPPERM, 451 // XOP two source permutation. 452 VPERMIL2, 453 454 // Vector multiply packed unsigned doubleword integers. 455 PMULUDQ, 456 // Vector multiply packed signed doubleword integers. 457 PMULDQ, 458 // Vector Multiply Packed UnsignedIntegers with Round and Scale. 459 MULHRS, 460 461 // Multiply and Add Packed Integers. 462 VPMADDUBSW, VPMADDWD, 463 464 // AVX512IFMA multiply and add. 465 // NOTE: These are different than the instruction and perform 466 // op0 x op1 + op2. 467 VPMADD52L, VPMADD52H, 468 469 // VNNI 470 VPDPBUSD, 471 VPDPBUSDS, 472 VPDPWSSD, 473 VPDPWSSDS, 474 475 // FMA nodes. 476 // We use the target independent ISD::FMA for the non-inverted case. 477 FNMADD, 478 FMSUB, 479 FNMSUB, 480 FMADDSUB, 481 FMSUBADD, 482 483 // FMA with rounding mode. 484 FMADD_RND, 485 FNMADD_RND, 486 FMSUB_RND, 487 FNMSUB_RND, 488 FMADDSUB_RND, 489 FMSUBADD_RND, 490 491 // Compress and expand. 492 COMPRESS, 493 EXPAND, 494 495 // Bits shuffle 496 VPSHUFBITQMB, 497 498 // Convert Unsigned/Integer to Floating-Point Value with rounding mode. 499 SINT_TO_FP_RND, UINT_TO_FP_RND, 500 SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND, 501 502 // Vector float/double to signed/unsigned integer. 503 CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND, 504 // Scalar float/double to signed/unsigned integer. 505 CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND, 506 507 // Vector float/double to signed/unsigned integer with truncation. 508 CVTTP2SI, CVTTP2UI, CVTTP2SI_RND, CVTTP2UI_RND, 509 // Scalar float/double to signed/unsigned integer with truncation. 510 CVTTS2SI, CVTTS2UI, CVTTS2SI_RND, CVTTS2UI_RND, 511 512 // Vector signed/unsigned integer to float/double. 513 CVTSI2P, CVTUI2P, 514 515 // Masked versions of above. Used for v2f64->v4f32. 516 // SRC, PASSTHRU, MASK 517 MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI, 518 519 // Save xmm argument registers to the stack, according to %al. An operator 520 // is needed so that this can be expanded with control flow. 521 VASTART_SAVE_XMM_REGS, 522 523 // Windows's _chkstk call to do stack probing. 524 WIN_ALLOCA, 525 526 // For allocating variable amounts of stack space when using 527 // segmented stacks. Check if the current stacklet has enough space, and 528 // falls back to heap allocation if not. 529 SEG_ALLOCA, 530 531 // Memory barriers. 532 MEMBARRIER, 533 MFENCE, 534 535 // Store FP status word into i16 register. 536 FNSTSW16r, 537 538 // Store contents of %ah into %eflags. 539 SAHF, 540 541 // Get a random integer and indicate whether it is valid in CF. 542 RDRAND, 543 544 // Get a NIST SP800-90B & C compliant random integer and 545 // indicate whether it is valid in CF. 546 RDSEED, 547 548 // SSE42 string comparisons. 549 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG 550 // will emit one or two instructions based on which results are used. If 551 // flags and index/mask this allows us to use a single instruction since 552 // we won't have to pick and opcode for flags. Instead we can rely on the 553 // DAG to CSE everything and decide at isel. 554 PCMPISTR, 555 PCMPESTR, 556 557 // Test if in transactional execution. 558 XTEST, 559 560 // ERI instructions. 561 RSQRT28, RSQRT28S, RCP28, RCP28S, EXP2, 562 563 // Conversions between float and half-float. 564 CVTPS2PH, CVTPH2PS, CVTPH2PS_RND, 565 566 // Masked version of above. 567 // SRC, RND, PASSTHRU, MASK 568 MCVTPS2PH, 569 570 // Galois Field Arithmetic Instructions 571 GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB, 572 573 // LWP insert record. 574 LWPINS, 575 576 // User level wait 577 UMWAIT, TPAUSE, 578 579 // Compare and swap. 580 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, 581 LCMPXCHG8_DAG, 582 LCMPXCHG16_DAG, 583 LCMPXCHG8_SAVE_EBX_DAG, 584 LCMPXCHG16_SAVE_RBX_DAG, 585 586 /// LOCK-prefixed arithmetic read-modify-write instructions. 587 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) 588 LADD, LSUB, LOR, LXOR, LAND, 589 590 // Load, scalar_to_vector, and zero extend. 591 VZEXT_LOAD, 592 593 // Store FP control world into i16 memory. 594 FNSTCW16m, 595 596 /// This instruction implements FP_TO_SINT with the 597 /// integer destination in memory and a FP reg source. This corresponds 598 /// to the X86::FIST*m instructions and the rounding mode change stuff. It 599 /// has two inputs (token chain and address) and two outputs (int value 600 /// and token chain). 601 FP_TO_INT16_IN_MEM, 602 FP_TO_INT32_IN_MEM, 603 FP_TO_INT64_IN_MEM, 604 605 /// This instruction implements SINT_TO_FP with the 606 /// integer source in memory and FP reg result. This corresponds to the 607 /// X86::FILD*m instructions. It has three inputs (token chain, address, 608 /// and source type) and two outputs (FP value and token chain). FILD_FLAG 609 /// also produces a flag). 610 FILD, 611 FILD_FLAG, 612 613 /// This instruction implements an extending load to FP stack slots. 614 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain 615 /// operand, ptr to load from, and a ValueType node indicating the type 616 /// to load to. 617 FLD, 618 619 /// This instruction implements a truncating store to FP stack 620 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a 621 /// chain operand, value to store, address, and a ValueType to store it 622 /// as. 623 FST, 624 625 /// This instruction grabs the address of the next argument 626 /// from a va_list. (reads and modifies the va_list in memory) 627 VAARG_64, 628 629 // Vector truncating store with unsigned/signed saturation 630 VTRUNCSTOREUS, VTRUNCSTORES, 631 // Vector truncating masked store with unsigned/signed saturation 632 VMTRUNCSTOREUS, VMTRUNCSTORES, 633 634 // X86 specific gather and scatter 635 MGATHER, MSCATTER, 636 637 // WARNING: Do not add anything in the end unless you want the node to 638 // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all 639 // opcodes will be thought as target memory ops! 640 }; 641 } // end namespace X86ISD 642 643 /// Define some predicates that are used for node matching. 644 namespace X86 { 645 /// Returns true if Elt is a constant zero or floating point constant +0.0. 646 bool isZeroNode(SDValue Elt); 647 648 /// Returns true of the given offset can be 649 /// fit into displacement field of the instruction. 650 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 651 bool hasSymbolicDisplacement = true); 652 653 /// Determines whether the callee is required to pop its 654 /// own arguments. Callee pop is necessary to support tail calls. 655 bool isCalleePop(CallingConv::ID CallingConv, 656 bool is64Bit, bool IsVarArg, bool GuaranteeTCO); 657 658 } // end namespace X86 659 660 //===--------------------------------------------------------------------===// 661 // X86 Implementation of the TargetLowering interface 662 class X86TargetLowering final : public TargetLowering { 663 public: 664 explicit X86TargetLowering(const X86TargetMachine &TM, 665 const X86Subtarget &STI); 666 667 unsigned getJumpTableEncoding() const override; 668 bool useSoftFloat() const override; 669 670 void markLibCallAttributes(MachineFunction *MF, unsigned CC, 671 ArgListTy &Args) const override; 672 getScalarShiftAmountTy(const DataLayout &,EVT VT)673 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override { 674 return MVT::i8; 675 } 676 677 const MCExpr * 678 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 679 const MachineBasicBlock *MBB, unsigned uid, 680 MCContext &Ctx) const override; 681 682 /// Returns relocation base for the given PIC jumptable. 683 SDValue getPICJumpTableRelocBase(SDValue Table, 684 SelectionDAG &DAG) const override; 685 const MCExpr * 686 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 687 unsigned JTI, MCContext &Ctx) const override; 688 689 /// Return the desired alignment for ByVal aggregate 690 /// function arguments in the caller parameter area. For X86, aggregates 691 /// that contains are placed at 16-byte boundaries while the rest are at 692 /// 4-byte boundaries. 693 unsigned getByValTypeAlignment(Type *Ty, 694 const DataLayout &DL) const override; 695 696 /// Returns the target specific optimal type for load 697 /// and store operations as a result of memset, memcpy, and memmove 698 /// lowering. If DstAlign is zero that means it's safe to destination 699 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 700 /// means there isn't a need to check it against alignment requirement, 701 /// probably because the source does not need to be loaded. If 'IsMemset' is 702 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 703 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 704 /// source is constant so it does not need to be loaded. 705 /// It returns EVT::Other if the type should be determined using generic 706 /// target-independent logic. 707 EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, 708 bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, 709 MachineFunction &MF) const override; 710 711 /// Returns true if it's safe to use load / store of the 712 /// specified type to expand memcpy / memset inline. This is mostly true 713 /// for all types except for some special cases. For example, on X86 714 /// targets without SSE2 f64 load / store are done with fldl / fstpl which 715 /// also does type conversion. Note the specified type doesn't have to be 716 /// legal as the hook is used before type legalization. 717 bool isSafeMemOpType(MVT VT) const override; 718 719 /// Returns true if the target allows unaligned memory accesses of the 720 /// specified type. Returns whether it is "fast" in the last argument. 721 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, 722 bool *Fast) const override; 723 724 /// Provide custom lowering hooks for some operations. 725 /// 726 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 727 728 /// Places new result values for the node in Results (their number 729 /// and types must exactly match those of the original return values of 730 /// the node), or leaves Results empty, which indicates that the node is not 731 /// to be custom lowered after all. 732 void LowerOperationWrapper(SDNode *N, 733 SmallVectorImpl<SDValue> &Results, 734 SelectionDAG &DAG) const override; 735 736 /// Replace the results of node with an illegal result 737 /// type with new values built out of custom code. 738 /// 739 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 740 SelectionDAG &DAG) const override; 741 742 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 743 744 // Return true if it is profitable to combine a BUILD_VECTOR with a 745 // stride-pattern to a shuffle and a truncate. 746 // Example of such a combine: 747 // v4i32 build_vector((extract_elt V, 1), 748 // (extract_elt V, 3), 749 // (extract_elt V, 5), 750 // (extract_elt V, 7)) 751 // --> 752 // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to 753 // v4i64) 754 bool isDesirableToCombineBuildVectorToShuffleTruncate( 755 ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override; 756 757 /// Return true if the target has native support for 758 /// the specified value type and it is 'desirable' to use the type for the 759 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 760 /// instruction encodings are longer and some i16 instructions are slow. 761 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; 762 763 /// Return true if the target has native support for the 764 /// specified value type and it is 'desirable' to use the type. e.g. On x86 765 /// i16 is legal, but undesirable since i16 instruction encodings are longer 766 /// and some i16 instructions are slow. 767 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; 768 769 MachineBasicBlock * 770 EmitInstrWithCustomInserter(MachineInstr &MI, 771 MachineBasicBlock *MBB) const override; 772 773 /// This method returns the name of a target specific DAG node. 774 const char *getTargetNodeName(unsigned Opcode) const override; 775 mergeStoresAfterLegalization()776 bool mergeStoresAfterLegalization() const override { return true; } 777 778 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 779 const SelectionDAG &DAG) const override; 780 781 bool isCheapToSpeculateCttz() const override; 782 783 bool isCheapToSpeculateCtlz() const override; 784 785 bool isCtlzFast() const override; 786 hasBitPreservingFPLogic(EVT VT)787 bool hasBitPreservingFPLogic(EVT VT) const override { 788 return VT == MVT::f32 || VT == MVT::f64 || VT.isVector(); 789 } 790 isMultiStoresCheaperThanBitsMerge(EVT LTy,EVT HTy)791 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { 792 // If the pair to store is a mixture of float and int values, we will 793 // save two bitwise instructions and one float-to-int instruction and 794 // increase one store instruction. There is potentially a more 795 // significant benefit because it avoids the float->int domain switch 796 // for input value. So It is more likely a win. 797 if ((LTy.isFloatingPoint() && HTy.isInteger()) || 798 (LTy.isInteger() && HTy.isFloatingPoint())) 799 return true; 800 // If the pair only contains int values, we will save two bitwise 801 // instructions and increase one store instruction (costing one more 802 // store buffer). Since the benefit is more blurred so we leave 803 // such pair out until we get testcase to prove it is a win. 804 return false; 805 } 806 807 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 808 809 bool hasAndNotCompare(SDValue Y) const override; 810 811 bool hasAndNot(SDValue Y) const override; 812 813 bool preferShiftsToClearExtremeBits(SDValue Y) const override; 814 815 bool shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)816 shouldTransformSignedTruncationCheck(EVT XVT, 817 unsigned KeptBits) const override { 818 // For vectors, we don't have a preference.. 819 if (XVT.isVector()) 820 return false; 821 822 auto VTIsOk = [](EVT VT) -> bool { 823 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 824 VT == MVT::i64; 825 }; 826 827 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports. 828 // XVT will be larger than KeptBitsVT. 829 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 830 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 831 } 832 833 bool shouldSplatInsEltVarIndex(EVT VT) const override; 834 convertSetCCLogicToBitwiseLogic(EVT VT)835 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { 836 return VT.isScalarInteger(); 837 } 838 839 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST. 840 MVT hasFastEqualityCompare(unsigned NumBits) const override; 841 842 /// Allow multiple load pairs per block for smaller and faster code. getMemcmpEqZeroLoadsPerBlock()843 unsigned getMemcmpEqZeroLoadsPerBlock() const override { 844 return 2; 845 } 846 847 /// Return the value type to use for ISD::SETCC. 848 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 849 EVT VT) const override; 850 851 bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, 852 TargetLoweringOpt &TLO) const override; 853 854 /// Determine which of the bits specified in Mask are known to be either 855 /// zero or one and return them in the KnownZero/KnownOne bitsets. 856 void computeKnownBitsForTargetNode(const SDValue Op, 857 KnownBits &Known, 858 const APInt &DemandedElts, 859 const SelectionDAG &DAG, 860 unsigned Depth = 0) const override; 861 862 /// Determine the number of bits in the operation that are sign bits. 863 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 864 const APInt &DemandedElts, 865 const SelectionDAG &DAG, 866 unsigned Depth) const override; 867 868 bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, 869 const APInt &DemandedElts, 870 APInt &KnownUndef, 871 APInt &KnownZero, 872 TargetLoweringOpt &TLO, 873 unsigned Depth) const override; 874 875 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 876 const APInt &DemandedBits, 877 const APInt &DemandedElts, 878 KnownBits &Known, 879 TargetLoweringOpt &TLO, 880 unsigned Depth) const override; 881 882 SDValue unwrapAddress(SDValue N) const override; 883 884 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; 885 886 bool ExpandInlineAsm(CallInst *CI) const override; 887 888 ConstraintType getConstraintType(StringRef Constraint) const override; 889 890 /// Examine constraint string and operand type and determine a weight value. 891 /// The operand object must already have been set up with the operand type. 892 ConstraintWeight 893 getSingleConstraintMatchWeight(AsmOperandInfo &info, 894 const char *constraint) const override; 895 896 const char *LowerXConstraint(EVT ConstraintVT) const override; 897 898 /// Lower the specified operand into the Ops vector. If it is invalid, don't 899 /// add anything to Ops. If hasMemory is true it means one of the asm 900 /// constraint of the inline asm instruction being processed is 'm'. 901 void LowerAsmOperandForConstraint(SDValue Op, 902 std::string &Constraint, 903 std::vector<SDValue> &Ops, 904 SelectionDAG &DAG) const override; 905 906 unsigned getInlineAsmMemConstraint(StringRef ConstraintCode)907 getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 908 if (ConstraintCode == "i") 909 return InlineAsm::Constraint_i; 910 else if (ConstraintCode == "o") 911 return InlineAsm::Constraint_o; 912 else if (ConstraintCode == "v") 913 return InlineAsm::Constraint_v; 914 else if (ConstraintCode == "X") 915 return InlineAsm::Constraint_X; 916 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 917 } 918 919 /// Given a physical register constraint 920 /// (e.g. {edx}), return the register number and the register class for the 921 /// register. This should only be used for C_Register constraints. On 922 /// error, this returns a register number of 0. 923 std::pair<unsigned, const TargetRegisterClass *> 924 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 925 StringRef Constraint, MVT VT) const override; 926 927 /// Return true if the addressing mode represented 928 /// by AM is legal for this target, for a load/store of the specified type. 929 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, 930 Type *Ty, unsigned AS, 931 Instruction *I = nullptr) const override; 932 933 /// Return true if the specified immediate is legal 934 /// icmp immediate, that is the target has icmp instructions which can 935 /// compare a register against the immediate without having to materialize 936 /// the immediate into a register. 937 bool isLegalICmpImmediate(int64_t Imm) const override; 938 939 /// Return true if the specified immediate is legal 940 /// add immediate, that is the target has add instructions which can 941 /// add a register and the immediate without having to materialize 942 /// the immediate into a register. 943 bool isLegalAddImmediate(int64_t Imm) const override; 944 945 bool isLegalStoreImmediate(int64_t Imm) const override; 946 947 /// Return the cost of the scaling factor used in the addressing 948 /// mode represented by AM for this target, for a load/store 949 /// of the specified type. 950 /// If the AM is supported, the return value must be >= 0. 951 /// If the AM is not supported, it returns a negative value. 952 int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, 953 unsigned AS) const override; 954 955 bool isVectorShiftByScalarCheap(Type *Ty) const override; 956 957 /// Return true if it's free to truncate a value of 958 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 959 /// register EAX to i16 by referencing its sub-register AX. 960 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 961 bool isTruncateFree(EVT VT1, EVT VT2) const override; 962 963 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; 964 965 /// Return true if any actual instruction that defines a 966 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result 967 /// register. This does not necessarily include registers defined in 968 /// unknown ways, such as incoming arguments, or copies from unknown 969 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this 970 /// does not necessarily apply to truncate instructions. e.g. on x86-64, 971 /// all instructions that define 32-bit values implicit zero-extend the 972 /// result out to 64 bits. 973 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 974 bool isZExtFree(EVT VT1, EVT VT2) const override; 975 bool isZExtFree(SDValue Val, EVT VT2) const override; 976 977 /// Return true if folding a vector load into ExtVal (a sign, zero, or any 978 /// extend node) is profitable. 979 bool isVectorLoadExtDesirable(SDValue) const override; 980 981 /// Return true if an FMA operation is faster than a pair of fmul and fadd 982 /// instructions. fmuladd intrinsics will be expanded to FMAs when this 983 /// method returns true, otherwise fmuladd is expanded to fmul + fadd. 984 bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; 985 986 /// Return true if it's profitable to narrow 987 /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow 988 /// from i32 to i8 but not from i32 to i16. 989 bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; 990 991 /// Given an intrinsic, checks if on the target the intrinsic will need to map 992 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns 993 /// true and stores the intrinsic information into the IntrinsicInfo that was 994 /// passed to the function. 995 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 996 MachineFunction &MF, 997 unsigned Intrinsic) const override; 998 999 /// Returns true if the target can instruction select the 1000 /// specified FP immediate natively. If false, the legalizer will 1001 /// materialize the FP immediate as a load from a constant pool. 1002 bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; 1003 1004 /// Targets can use this to indicate that they only support *some* 1005 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a 1006 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to 1007 /// be legal. 1008 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1009 1010 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there 1011 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a 1012 /// constant pool entry. 1013 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1014 1015 /// Returns true if lowering to a jump table is allowed. 1016 bool areJTsAllowed(const Function *Fn) const override; 1017 1018 /// If true, then instruction selection should 1019 /// seek to shrink the FP constant of the specified type to a smaller type 1020 /// in order to save space and / or reduce runtime. ShouldShrinkFPConstant(EVT VT)1021 bool ShouldShrinkFPConstant(EVT VT) const override { 1022 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more 1023 // expensive than a straight movsd. On the other hand, it's important to 1024 // shrink long double fp constant since fldt is very slow. 1025 return !X86ScalarSSEf64 || VT == MVT::f80; 1026 } 1027 1028 /// Return true if we believe it is correct and profitable to reduce the 1029 /// load node to a smaller type. 1030 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 1031 EVT NewVT) const override; 1032 1033 /// Return true if the specified scalar FP type is computed in an SSE 1034 /// register, not on the X87 floating point stack. isScalarFPTypeInSSEReg(EVT VT)1035 bool isScalarFPTypeInSSEReg(EVT VT) const { 1036 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 1037 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 1038 } 1039 1040 /// Returns true if it is beneficial to convert a load of a constant 1041 /// to just the constant itself. 1042 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 1043 Type *Ty) const override; 1044 1045 bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override; 1046 1047 bool convertSelectOfConstantsToMath(EVT VT) const override; 1048 1049 bool decomposeMulByConstant(EVT VT, SDValue C) const override; 1050 1051 bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, 1052 bool IsSigned) const override; 1053 1054 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 1055 /// with this index. 1056 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 1057 unsigned Index) const override; 1058 1059 /// Scalar ops always have equal or better analysis/performance/power than 1060 /// the vector equivalent, so this always makes sense if the scalar op is 1061 /// supported. shouldScalarizeBinop(SDValue)1062 bool shouldScalarizeBinop(SDValue) const override; 1063 1064 bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, 1065 unsigned AddrSpace) const override { 1066 // If we can replace more than 2 scalar stores, there will be a reduction 1067 // in instructions even after we add a vector constant load. 1068 return NumElem > 2; 1069 } 1070 1071 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override; 1072 1073 /// Intel processors have a unified instruction and data cache getClearCacheBuiltinName()1074 const char * getClearCacheBuiltinName() const override { 1075 return nullptr; // nothing to do, move along. 1076 } 1077 1078 unsigned getRegisterByName(const char* RegName, EVT VT, 1079 SelectionDAG &DAG) const override; 1080 1081 /// If a physical register, this returns the register that receives the 1082 /// exception address on entry to an EH pad. 1083 unsigned 1084 getExceptionPointerRegister(const Constant *PersonalityFn) const override; 1085 1086 /// If a physical register, this returns the register that receives the 1087 /// exception typeid on entry to a landing pad. 1088 unsigned 1089 getExceptionSelectorRegister(const Constant *PersonalityFn) const override; 1090 1091 virtual bool needsFixedCatchObjects() const override; 1092 1093 /// This method returns a target specific FastISel object, 1094 /// or null if the target does not support "fast" ISel. 1095 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1096 const TargetLibraryInfo *libInfo) const override; 1097 1098 /// If the target has a standard location for the stack protector cookie, 1099 /// returns the address of that location. Otherwise, returns nullptr. 1100 Value *getIRStackGuard(IRBuilder<> &IRB) const override; 1101 1102 bool useLoadStackGuardNode() const override; 1103 bool useStackGuardXorFP() const override; 1104 void insertSSPDeclarations(Module &M) const override; 1105 Value *getSDagStackGuard(const Module &M) const override; 1106 Value *getSSPStackGuardCheck(const Module &M) const override; 1107 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, 1108 const SDLoc &DL) const override; 1109 1110 1111 /// Return true if the target stores SafeStack pointer at a fixed offset in 1112 /// some non-standard address space, and populates the address space and 1113 /// offset as appropriate. 1114 Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override; 1115 1116 SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, 1117 SelectionDAG &DAG) const; 1118 1119 bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; 1120 1121 /// Customize the preferred legalization strategy for certain types. 1122 LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; 1123 1124 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, 1125 EVT VT) const override; 1126 1127 unsigned getNumRegistersForCallingConv(LLVMContext &Context, 1128 CallingConv::ID CC, 1129 EVT VT) const override; 1130 1131 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 1132 1133 bool supportSwiftError() const override; 1134 1135 StringRef getStackProbeSymbolName(MachineFunction &MF) const override; 1136 hasVectorBlend()1137 bool hasVectorBlend() const override { return true; } 1138 getMaxSupportedInterleaveFactor()1139 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 1140 1141 /// Lower interleaved load(s) into target specific 1142 /// instructions/intrinsics. 1143 bool lowerInterleavedLoad(LoadInst *LI, 1144 ArrayRef<ShuffleVectorInst *> Shuffles, 1145 ArrayRef<unsigned> Indices, 1146 unsigned Factor) const override; 1147 1148 /// Lower interleaved store(s) into target specific 1149 /// instructions/intrinsics. 1150 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 1151 unsigned Factor) const override; 1152 1153 SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, 1154 SDValue Addr, SelectionDAG &DAG) 1155 const override; 1156 1157 protected: 1158 std::pair<const TargetRegisterClass *, uint8_t> 1159 findRepresentativeClass(const TargetRegisterInfo *TRI, 1160 MVT VT) const override; 1161 1162 private: 1163 /// Keep a reference to the X86Subtarget around so that we can 1164 /// make the right decision when generating code for different targets. 1165 const X86Subtarget &Subtarget; 1166 1167 /// Select between SSE or x87 floating point ops. 1168 /// When SSE is available, use it for f32 operations. 1169 /// When SSE2 is available, use it for f64 operations. 1170 bool X86ScalarSSEf32; 1171 bool X86ScalarSSEf64; 1172 1173 /// A list of legal FP immediates. 1174 std::vector<APFloat> LegalFPImmediates; 1175 1176 /// Indicate that this x86 target can instruction 1177 /// select the specified FP immediate natively. addLegalFPImmediate(const APFloat & Imm)1178 void addLegalFPImmediate(const APFloat& Imm) { 1179 LegalFPImmediates.push_back(Imm); 1180 } 1181 1182 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 1183 CallingConv::ID CallConv, bool isVarArg, 1184 const SmallVectorImpl<ISD::InputArg> &Ins, 1185 const SDLoc &dl, SelectionDAG &DAG, 1186 SmallVectorImpl<SDValue> &InVals, 1187 uint32_t *RegMask) const; 1188 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, 1189 const SmallVectorImpl<ISD::InputArg> &ArgInfo, 1190 const SDLoc &dl, SelectionDAG &DAG, 1191 const CCValAssign &VA, MachineFrameInfo &MFI, 1192 unsigned i) const; 1193 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, 1194 const SDLoc &dl, SelectionDAG &DAG, 1195 const CCValAssign &VA, 1196 ISD::ArgFlagsTy Flags) const; 1197 1198 // Call lowering helpers. 1199 1200 /// Check whether the call is eligible for tail call optimization. Targets 1201 /// that want to do tail call optimization should implement this function. 1202 bool IsEligibleForTailCallOptimization(SDValue Callee, 1203 CallingConv::ID CalleeCC, 1204 bool isVarArg, 1205 bool isCalleeStructRet, 1206 bool isCallerStructRet, 1207 Type *RetTy, 1208 const SmallVectorImpl<ISD::OutputArg> &Outs, 1209 const SmallVectorImpl<SDValue> &OutVals, 1210 const SmallVectorImpl<ISD::InputArg> &Ins, 1211 SelectionDAG& DAG) const; 1212 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, 1213 SDValue Chain, bool IsTailCall, 1214 bool Is64Bit, int FPDiff, 1215 const SDLoc &dl) const; 1216 1217 unsigned GetAlignedArgumentStackSize(unsigned StackSize, 1218 SelectionDAG &DAG) const; 1219 1220 unsigned getAddressSpace(void) const; 1221 1222 std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, 1223 bool isSigned, 1224 bool isReplace) const; 1225 1226 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1227 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; 1228 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1229 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1230 1231 unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr, 1232 const unsigned char OpFlags = 0) const; 1233 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 1234 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 1235 SDValue LowerGlobalAddress(const GlobalValue *GV, const SDLoc &dl, 1236 int64_t Offset, SelectionDAG &DAG) const; 1237 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 1238 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1239 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; 1240 1241 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1242 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1243 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1244 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1245 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 1246 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; 1247 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 1248 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 1249 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 1250 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1251 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 1252 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 1253 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1254 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1255 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 1256 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; 1257 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; 1258 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; 1259 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; 1260 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; 1261 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 1262 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 1263 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; 1264 SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const; 1265 SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const; 1266 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 1267 1268 SDValue 1269 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1270 const SmallVectorImpl<ISD::InputArg> &Ins, 1271 const SDLoc &dl, SelectionDAG &DAG, 1272 SmallVectorImpl<SDValue> &InVals) const override; 1273 SDValue LowerCall(CallLoweringInfo &CLI, 1274 SmallVectorImpl<SDValue> &InVals) const override; 1275 1276 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1277 const SmallVectorImpl<ISD::OutputArg> &Outs, 1278 const SmallVectorImpl<SDValue> &OutVals, 1279 const SDLoc &dl, SelectionDAG &DAG) const override; 1280 supportSplitCSR(MachineFunction * MF)1281 bool supportSplitCSR(MachineFunction *MF) const override { 1282 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 1283 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 1284 } 1285 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 1286 void insertCopiesSplitCSR( 1287 MachineBasicBlock *Entry, 1288 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 1289 1290 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1291 1292 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1293 1294 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, 1295 ISD::NodeType ExtendKind) const override; 1296 1297 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 1298 bool isVarArg, 1299 const SmallVectorImpl<ISD::OutputArg> &Outs, 1300 LLVMContext &Context) const override; 1301 1302 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 1303 1304 TargetLoweringBase::AtomicExpansionKind 1305 shouldExpandAtomicLoadInIR(LoadInst *SI) const override; 1306 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 1307 TargetLoweringBase::AtomicExpansionKind 1308 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 1309 1310 LoadInst * 1311 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; 1312 1313 bool needsCmpXchgNb(Type *MemType) const; 1314 1315 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, 1316 MachineBasicBlock *DispatchBB, int FI) const; 1317 1318 // Utility function to emit the low-level va_arg code for X86-64. 1319 MachineBasicBlock * 1320 EmitVAARG64WithCustomInserter(MachineInstr &MI, 1321 MachineBasicBlock *MBB) const; 1322 1323 /// Utility function to emit the xmm reg save portion of va_start. 1324 MachineBasicBlock * 1325 EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr, 1326 MachineBasicBlock *BB) const; 1327 1328 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1, 1329 MachineInstr &MI2, 1330 MachineBasicBlock *BB) const; 1331 1332 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I, 1333 MachineBasicBlock *BB) const; 1334 1335 MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I, 1336 MachineBasicBlock *BB) const; 1337 1338 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 1339 MachineBasicBlock *BB) const; 1340 1341 MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI, 1342 MachineBasicBlock *BB) const; 1343 1344 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI, 1345 MachineBasicBlock *BB) const; 1346 1347 MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI, 1348 MachineBasicBlock *BB) const; 1349 1350 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, 1351 MachineBasicBlock *BB) const; 1352 1353 MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI, 1354 MachineBasicBlock *BB) const; 1355 1356 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, 1357 MachineBasicBlock *MBB) const; 1358 1359 void emitSetJmpShadowStackFix(MachineInstr &MI, 1360 MachineBasicBlock *MBB) const; 1361 1362 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, 1363 MachineBasicBlock *MBB) const; 1364 1365 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI, 1366 MachineBasicBlock *MBB) const; 1367 1368 MachineBasicBlock *emitFMA3Instr(MachineInstr &MI, 1369 MachineBasicBlock *MBB) const; 1370 1371 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI, 1372 MachineBasicBlock *MBB) const; 1373 1374 /// Emit nodes that will be selected as "cmp Op0,Op1", or something 1375 /// equivalent, for use with the given x86 condition code. 1376 SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl, 1377 SelectionDAG &DAG) const; 1378 1379 /// Convert a comparison if required by the subtarget. 1380 SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const; 1381 1382 /// Emit flags for the given setcc condition and operands. Also returns the 1383 /// corresponding X86 condition code constant in X86CC. 1384 SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, 1385 ISD::CondCode CC, const SDLoc &dl, 1386 SelectionDAG &DAG, 1387 SDValue &X86CC) const; 1388 1389 /// Check if replacement of SQRT with RSQRT should be disabled. 1390 bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override; 1391 1392 /// Use rsqrt* to speed up sqrt calculations. 1393 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1394 int &RefinementSteps, bool &UseOneConstNR, 1395 bool Reciprocal) const override; 1396 1397 /// Use rcp* to speed up fdiv calculations. 1398 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1399 int &RefinementSteps) const override; 1400 1401 /// Reassociate floating point divisions into multiply by reciprocal. 1402 unsigned combineRepeatedFPDivisors() const override; 1403 }; 1404 1405 namespace X86 { 1406 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1407 const TargetLibraryInfo *libInfo); 1408 } // end namespace X86 1409 1410 // Base class for all X86 non-masked store operations. 1411 class X86StoreSDNode : public MemSDNode { 1412 public: X86StoreSDNode(unsigned Opcode,unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1413 X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, 1414 SDVTList VTs, EVT MemVT, 1415 MachineMemOperand *MMO) 1416 :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {} getValue()1417 const SDValue &getValue() const { return getOperand(1); } getBasePtr()1418 const SDValue &getBasePtr() const { return getOperand(2); } 1419 classof(const SDNode * N)1420 static bool classof(const SDNode *N) { 1421 return N->getOpcode() == X86ISD::VTRUNCSTORES || 1422 N->getOpcode() == X86ISD::VTRUNCSTOREUS; 1423 } 1424 }; 1425 1426 // Base class for all X86 masked store operations. 1427 // The class has the same order of operands as MaskedStoreSDNode for 1428 // convenience. 1429 class X86MaskedStoreSDNode : public MemSDNode { 1430 public: X86MaskedStoreSDNode(unsigned Opcode,unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1431 X86MaskedStoreSDNode(unsigned Opcode, unsigned Order, 1432 const DebugLoc &dl, SDVTList VTs, EVT MemVT, 1433 MachineMemOperand *MMO) 1434 : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {} 1435 getValue()1436 const SDValue &getValue() const { return getOperand(1); } getBasePtr()1437 const SDValue &getBasePtr() const { return getOperand(2); } getMask()1438 const SDValue &getMask() const { return getOperand(3); } 1439 classof(const SDNode * N)1440 static bool classof(const SDNode *N) { 1441 return N->getOpcode() == X86ISD::VMTRUNCSTORES || 1442 N->getOpcode() == X86ISD::VMTRUNCSTOREUS; 1443 } 1444 }; 1445 1446 // X86 Truncating Store with Signed saturation. 1447 class TruncSStoreSDNode : public X86StoreSDNode { 1448 public: TruncSStoreSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1449 TruncSStoreSDNode(unsigned Order, const DebugLoc &dl, 1450 SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) 1451 : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {} 1452 classof(const SDNode * N)1453 static bool classof(const SDNode *N) { 1454 return N->getOpcode() == X86ISD::VTRUNCSTORES; 1455 } 1456 }; 1457 1458 // X86 Truncating Store with Unsigned saturation. 1459 class TruncUSStoreSDNode : public X86StoreSDNode { 1460 public: TruncUSStoreSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1461 TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl, 1462 SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) 1463 : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {} 1464 classof(const SDNode * N)1465 static bool classof(const SDNode *N) { 1466 return N->getOpcode() == X86ISD::VTRUNCSTOREUS; 1467 } 1468 }; 1469 1470 // X86 Truncating Masked Store with Signed saturation. 1471 class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode { 1472 public: MaskedTruncSStoreSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1473 MaskedTruncSStoreSDNode(unsigned Order, 1474 const DebugLoc &dl, SDVTList VTs, EVT MemVT, 1475 MachineMemOperand *MMO) 1476 : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {} 1477 classof(const SDNode * N)1478 static bool classof(const SDNode *N) { 1479 return N->getOpcode() == X86ISD::VMTRUNCSTORES; 1480 } 1481 }; 1482 1483 // X86 Truncating Masked Store with Unsigned saturation. 1484 class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode { 1485 public: MaskedTruncUSStoreSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1486 MaskedTruncUSStoreSDNode(unsigned Order, 1487 const DebugLoc &dl, SDVTList VTs, EVT MemVT, 1488 MachineMemOperand *MMO) 1489 : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {} 1490 classof(const SDNode * N)1491 static bool classof(const SDNode *N) { 1492 return N->getOpcode() == X86ISD::VMTRUNCSTOREUS; 1493 } 1494 }; 1495 1496 // X86 specific Gather/Scatter nodes. 1497 // The class has the same order of operands as MaskedGatherScatterSDNode for 1498 // convenience. 1499 class X86MaskedGatherScatterSDNode : public MemSDNode { 1500 public: X86MaskedGatherScatterSDNode(unsigned Opc,unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1501 X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order, 1502 const DebugLoc &dl, SDVTList VTs, EVT MemVT, 1503 MachineMemOperand *MMO) 1504 : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {} 1505 getBasePtr()1506 const SDValue &getBasePtr() const { return getOperand(3); } getIndex()1507 const SDValue &getIndex() const { return getOperand(4); } getMask()1508 const SDValue &getMask() const { return getOperand(2); } getScale()1509 const SDValue &getScale() const { return getOperand(5); } 1510 classof(const SDNode * N)1511 static bool classof(const SDNode *N) { 1512 return N->getOpcode() == X86ISD::MGATHER || 1513 N->getOpcode() == X86ISD::MSCATTER; 1514 } 1515 }; 1516 1517 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode { 1518 public: X86MaskedGatherSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1519 X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, 1520 EVT MemVT, MachineMemOperand *MMO) 1521 : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT, 1522 MMO) {} 1523 getPassThru()1524 const SDValue &getPassThru() const { return getOperand(1); } 1525 classof(const SDNode * N)1526 static bool classof(const SDNode *N) { 1527 return N->getOpcode() == X86ISD::MGATHER; 1528 } 1529 }; 1530 1531 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode { 1532 public: X86MaskedScatterSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1533 X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, 1534 EVT MemVT, MachineMemOperand *MMO) 1535 : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT, 1536 MMO) {} 1537 getValue()1538 const SDValue &getValue() const { return getOperand(1); } 1539 classof(const SDNode * N)1540 static bool classof(const SDNode *N) { 1541 return N->getOpcode() == X86ISD::MSCATTER; 1542 } 1543 }; 1544 1545 /// Generate unpacklo/unpackhi shuffle mask. 1546 template <typename T = int> createUnpackShuffleMask(MVT VT,SmallVectorImpl<T> & Mask,bool Lo,bool Unary)1547 void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo, 1548 bool Unary) { 1549 assert(Mask.empty() && "Expected an empty shuffle mask vector"); 1550 int NumElts = VT.getVectorNumElements(); 1551 int NumEltsInLane = 128 / VT.getScalarSizeInBits(); 1552 for (int i = 0; i < NumElts; ++i) { 1553 unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane; 1554 int Pos = (i % NumEltsInLane) / 2 + LaneStart; 1555 Pos += (Unary ? 0 : NumElts * (i % 2)); 1556 Pos += (Lo ? 0 : NumEltsInLane / 2); 1557 Mask.push_back(Pos); 1558 } 1559 } 1560 1561 /// Helper function to scale a shuffle or target shuffle mask, replacing each 1562 /// mask index with the scaled sequential indices for an equivalent narrowed 1563 /// mask. This is the reverse process to canWidenShuffleElements, but can 1564 /// always succeed. 1565 template <typename T> scaleShuffleMask(int Scale,ArrayRef<T> Mask,SmallVectorImpl<T> & ScaledMask)1566 void scaleShuffleMask(int Scale, ArrayRef<T> Mask, 1567 SmallVectorImpl<T> &ScaledMask) { 1568 assert(0 < Scale && "Unexpected scaling factor"); 1569 int NumElts = Mask.size(); 1570 ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1); 1571 1572 for (int i = 0; i != NumElts; ++i) { 1573 int M = Mask[i]; 1574 1575 // Repeat sentinel values in every mask element. 1576 if (M < 0) { 1577 for (int s = 0; s != Scale; ++s) 1578 ScaledMask[(Scale * i) + s] = M; 1579 continue; 1580 } 1581 1582 // Scale mask element and increment across each mask element. 1583 for (int s = 0; s != Scale; ++s) 1584 ScaledMask[(Scale * i) + s] = (Scale * M) + s; 1585 } 1586 } 1587 } // end namespace llvm 1588 1589 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 1590