1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that X86 uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
17 
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/CodeGen/TargetLowering.h"
21 #include "llvm/Target/TargetOptions.h"
22 
23 namespace llvm {
24   class X86Subtarget;
25   class X86TargetMachine;
26 
27   namespace X86ISD {
28     // X86 Specific DAG Nodes
29     enum NodeType : unsigned {
30       // Start the numbering where the builtin ops leave off.
31       FIRST_NUMBER = ISD::BUILTIN_OP_END,
32 
33       /// Bit scan forward.
34       BSF,
35       /// Bit scan reverse.
36       BSR,
37 
38       /// Double shift instructions. These correspond to
39       /// X86::SHLDxx and X86::SHRDxx instructions.
40       SHLD,
41       SHRD,
42 
43       /// Bitwise logical AND of floating point values. This corresponds
44       /// to X86::ANDPS or X86::ANDPD.
45       FAND,
46 
47       /// Bitwise logical OR of floating point values. This corresponds
48       /// to X86::ORPS or X86::ORPD.
49       FOR,
50 
51       /// Bitwise logical XOR of floating point values. This corresponds
52       /// to X86::XORPS or X86::XORPD.
53       FXOR,
54 
55       ///  Bitwise logical ANDNOT of floating point values. This
56       /// corresponds to X86::ANDNPS or X86::ANDNPD.
57       FANDN,
58 
59       /// These operations represent an abstract X86 call
60       /// instruction, which includes a bunch of information.  In particular the
61       /// operands of these node are:
62       ///
63       ///     #0 - The incoming token chain
64       ///     #1 - The callee
65       ///     #2 - The number of arg bytes the caller pushes on the stack.
66       ///     #3 - The number of arg bytes the callee pops off the stack.
67       ///     #4 - The value to pass in AL/AX/EAX (optional)
68       ///     #5 - The value to pass in DL/DX/EDX (optional)
69       ///
70       /// The result values of these nodes are:
71       ///
72       ///     #0 - The outgoing token chain
73       ///     #1 - The first register result value (optional)
74       ///     #2 - The second register result value (optional)
75       ///
76       CALL,
77 
78       /// Same as call except it adds the NoTrack prefix.
79       NT_CALL,
80 
81       /// This operation implements the lowering for readcyclecounter.
82       RDTSC_DAG,
83 
84       /// X86 Read Time-Stamp Counter and Processor ID.
85       RDTSCP_DAG,
86 
87       /// X86 Read Performance Monitoring Counters.
88       RDPMC_DAG,
89 
90       /// X86 compare and logical compare instructions.
91       CMP, COMI, UCOMI,
92 
93       /// X86 bit-test instructions.
94       BT,
95 
96       /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
97       /// operand, usually produced by a CMP instruction.
98       SETCC,
99 
100       /// X86 Select
101       SELECT, SELECTS,
102 
103       // Same as SETCC except it's materialized with a sbb and the value is all
104       // one's or all zero's.
105       SETCC_CARRY,  // R = carry_bit ? ~0 : 0
106 
107       /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
108       /// Operands are two FP values to compare; result is a mask of
109       /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
110       FSETCC,
111 
112       /// X86 FP SETCC, similar to above, but with output as an i1 mask and
113       /// with optional rounding mode.
114       FSETCCM, FSETCCM_RND,
115 
116       /// X86 conditional moves. Operand 0 and operand 1 are the two values
117       /// to select from. Operand 2 is the condition code, and operand 3 is the
118       /// flag operand produced by a CMP or TEST instruction. It also writes a
119       /// flag result.
120       CMOV,
121 
122       /// X86 conditional branches. Operand 0 is the chain operand, operand 1
123       /// is the block to branch if condition is true, operand 2 is the
124       /// condition code, and operand 3 is the flag operand produced by a CMP
125       /// or TEST instruction.
126       BRCOND,
127 
128       /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
129       /// operand 1 is the target address.
130       NT_BRIND,
131 
132       /// Return with a flag operand. Operand 0 is the chain operand, operand
133       /// 1 is the number of bytes of stack to pop.
134       RET_FLAG,
135 
136       /// Return from interrupt. Operand 0 is the number of bytes to pop.
137       IRET,
138 
139       /// Repeat fill, corresponds to X86::REP_STOSx.
140       REP_STOS,
141 
142       /// Repeat move, corresponds to X86::REP_MOVSx.
143       REP_MOVS,
144 
145       /// On Darwin, this node represents the result of the popl
146       /// at function entry, used for PIC code.
147       GlobalBaseReg,
148 
149       /// A wrapper node for TargetConstantPool, TargetJumpTable,
150       /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
151       /// MCSymbol and TargetBlockAddress.
152       Wrapper,
153 
154       /// Special wrapper used under X86-64 PIC mode for RIP
155       /// relative displacements.
156       WrapperRIP,
157 
158       /// Copies a 64-bit value from the low word of an XMM vector
159       /// to an MMX vector.
160       MOVDQ2Q,
161 
162       /// Copies a 32-bit value from the low word of a MMX
163       /// vector to a GPR.
164       MMX_MOVD2W,
165 
166       /// Copies a GPR into the low 32-bit word of a MMX vector
167       /// and zero out the high word.
168       MMX_MOVW2D,
169 
170       /// Extract an 8-bit value from a vector and zero extend it to
171       /// i32, corresponds to X86::PEXTRB.
172       PEXTRB,
173 
174       /// Extract a 16-bit value from a vector and zero extend it to
175       /// i32, corresponds to X86::PEXTRW.
176       PEXTRW,
177 
178       /// Insert any element of a 4 x float vector into any element
179       /// of a destination 4 x floatvector.
180       INSERTPS,
181 
182       /// Insert the lower 8-bits of a 32-bit value to a vector,
183       /// corresponds to X86::PINSRB.
184       PINSRB,
185 
186       /// Insert the lower 16-bits of a 32-bit value to a vector,
187       /// corresponds to X86::PINSRW.
188       PINSRW,
189 
190       /// Shuffle 16 8-bit values within a vector.
191       PSHUFB,
192 
193       /// Compute Sum of Absolute Differences.
194       PSADBW,
195       /// Compute Double Block Packed Sum-Absolute-Differences
196       DBPSADBW,
197 
198       /// Bitwise Logical AND NOT of Packed FP values.
199       ANDNP,
200 
201       /// Blend where the selector is an immediate.
202       BLENDI,
203 
204       /// Dynamic (non-constant condition) vector blend where only the sign bits
205       /// of the condition elements are used. This is used to enforce that the
206       /// condition mask is not valid for generic VSELECT optimizations.
207       SHRUNKBLEND,
208 
209       /// Combined add and sub on an FP vector.
210       ADDSUB,
211 
212       //  FP vector ops with rounding mode.
213       FADD_RND, FADDS_RND,
214       FSUB_RND, FSUBS_RND,
215       FMUL_RND, FMULS_RND,
216       FDIV_RND, FDIVS_RND,
217       FMAX_RND, FMAXS_RND,
218       FMIN_RND, FMINS_RND,
219       FSQRT_RND, FSQRTS_RND,
220 
221       // FP vector get exponent.
222       FGETEXP_RND, FGETEXPS_RND,
223       // Extract Normalized Mantissas.
224       VGETMANT, VGETMANT_RND, VGETMANTS, VGETMANTS_RND,
225       // FP Scale.
226       SCALEF,
227       SCALEFS,
228 
229       // Integer add/sub with unsigned saturation.
230       ADDUS,
231       SUBUS,
232 
233       // Integer add/sub with signed saturation.
234       ADDS,
235       SUBS,
236 
237       // Unsigned Integer average.
238       AVG,
239 
240       /// Integer horizontal add/sub.
241       HADD,
242       HSUB,
243 
244       /// Floating point horizontal add/sub.
245       FHADD,
246       FHSUB,
247 
248       // Detect Conflicts Within a Vector
249       CONFLICT,
250 
251       /// Floating point max and min.
252       FMAX, FMIN,
253 
254       /// Commutative FMIN and FMAX.
255       FMAXC, FMINC,
256 
257       /// Scalar intrinsic floating point max and min.
258       FMAXS, FMINS,
259 
260       /// Floating point reciprocal-sqrt and reciprocal approximation.
261       /// Note that these typically require refinement
262       /// in order to obtain suitable precision.
263       FRSQRT, FRCP,
264 
265       // AVX-512 reciprocal approximations with a little more precision.
266       RSQRT14, RSQRT14S, RCP14, RCP14S,
267 
268       // Thread Local Storage.
269       TLSADDR,
270 
271       // Thread Local Storage. A call to get the start address
272       // of the TLS block for the current module.
273       TLSBASEADDR,
274 
275       // Thread Local Storage.  When calling to an OS provided
276       // thunk at the address from an earlier relocation.
277       TLSCALL,
278 
279       // Exception Handling helpers.
280       EH_RETURN,
281 
282       // SjLj exception handling setjmp.
283       EH_SJLJ_SETJMP,
284 
285       // SjLj exception handling longjmp.
286       EH_SJLJ_LONGJMP,
287 
288       // SjLj exception handling dispatch.
289       EH_SJLJ_SETUP_DISPATCH,
290 
291       /// Tail call return. See X86TargetLowering::LowerCall for
292       /// the list of operands.
293       TC_RETURN,
294 
295       // Vector move to low scalar and zero higher vector elements.
296       VZEXT_MOVL,
297 
298       // Vector integer zero-extend.
299       VZEXT,
300       // Vector integer signed-extend.
301       VSEXT,
302 
303       // Vector integer truncate.
304       VTRUNC,
305       // Vector integer truncate with unsigned/signed saturation.
306       VTRUNCUS, VTRUNCS,
307 
308       // Vector FP extend.
309       VFPEXT, VFPEXT_RND, VFPEXTS_RND,
310 
311       // Vector FP round.
312       VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
313 
314       // 128-bit vector logical left / right shift
315       VSHLDQ, VSRLDQ,
316 
317       // Vector shift elements
318       VSHL, VSRL, VSRA,
319 
320       // Vector variable shift right arithmetic.
321       // Unlike ISD::SRA, in case shift count greater then element size
322       // use sign bit to fill destination data element.
323       VSRAV,
324 
325       // Vector shift elements by immediate
326       VSHLI, VSRLI, VSRAI,
327 
328       // Shifts of mask registers.
329       KSHIFTL, KSHIFTR,
330 
331       // Bit rotate by immediate
332       VROTLI, VROTRI,
333 
334       // Vector packed double/float comparison.
335       CMPP,
336 
337       // Vector integer comparisons.
338       PCMPEQ, PCMPGT,
339 
340       // v8i16 Horizontal minimum and position.
341       PHMINPOS,
342 
343       MULTISHIFT,
344 
345       /// Vector comparison generating mask bits for fp and
346       /// integer signed and unsigned data types.
347       CMPM,
348       // Vector comparison with rounding mode for FP values
349       CMPM_RND,
350 
351       // Arithmetic operations with FLAGS results.
352       ADD, SUB, ADC, SBB, SMUL,
353       INC, DEC, OR, XOR, AND,
354 
355       // Bit field extract.
356       BEXTR,
357 
358       // LOW, HI, FLAGS = umul LHS, RHS.
359       UMUL,
360 
361       // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS.
362       SMUL8, UMUL8,
363 
364       // 8-bit divrem that zero-extend the high result (AH).
365       UDIVREM8_ZEXT_HREG,
366       SDIVREM8_SEXT_HREG,
367 
368       // X86-specific multiply by immediate.
369       MUL_IMM,
370 
371       // Vector sign bit extraction.
372       MOVMSK,
373 
374       // Vector bitwise comparisons.
375       PTEST,
376 
377       // Vector packed fp sign bitwise comparisons.
378       TESTP,
379 
380       // OR/AND test for masks.
381       KORTEST,
382       KTEST,
383 
384       // ADD for masks.
385       KADD,
386 
387       // Several flavors of instructions with vector shuffle behaviors.
388       // Saturated signed/unnsigned packing.
389       PACKSS,
390       PACKUS,
391       // Intra-lane alignr.
392       PALIGNR,
393       // AVX512 inter-lane alignr.
394       VALIGN,
395       PSHUFD,
396       PSHUFHW,
397       PSHUFLW,
398       SHUFP,
399       // VBMI2 Concat & Shift.
400       VSHLD,
401       VSHRD,
402       VSHLDV,
403       VSHRDV,
404       //Shuffle Packed Values at 128-bit granularity.
405       SHUF128,
406       MOVDDUP,
407       MOVSHDUP,
408       MOVSLDUP,
409       MOVLHPS,
410       MOVHLPS,
411       MOVSD,
412       MOVSS,
413       UNPCKL,
414       UNPCKH,
415       VPERMILPV,
416       VPERMILPI,
417       VPERMI,
418       VPERM2X128,
419 
420       // Variable Permute (VPERM).
421       // Res = VPERMV MaskV, V0
422       VPERMV,
423 
424       // 3-op Variable Permute (VPERMT2).
425       // Res = VPERMV3 V0, MaskV, V1
426       VPERMV3,
427 
428       // Bitwise ternary logic.
429       VPTERNLOG,
430       // Fix Up Special Packed Float32/64 values.
431       VFIXUPIMM,
432       VFIXUPIMMS,
433       // Range Restriction Calculation For Packed Pairs of Float32/64 values.
434       VRANGE, VRANGE_RND, VRANGES, VRANGES_RND,
435       // Reduce - Perform Reduction Transformation on scalar\packed FP.
436       VREDUCE, VREDUCE_RND, VREDUCES, VREDUCES_RND,
437       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
438       // Also used by the legacy (V)ROUND intrinsics where we mask out the
439       // scaling part of the immediate.
440       VRNDSCALE, VRNDSCALE_RND, VRNDSCALES, VRNDSCALES_RND,
441       // Tests Types Of a FP Values for packed types.
442       VFPCLASS,
443       // Tests Types Of a FP Values for scalar types.
444       VFPCLASSS,
445 
446       // Broadcast scalar to vector.
447       VBROADCAST,
448       // Broadcast mask to vector.
449       VBROADCASTM,
450       // Broadcast subvector to vector.
451       SUBV_BROADCAST,
452 
453       /// SSE4A Extraction and Insertion.
454       EXTRQI, INSERTQI,
455 
456       // XOP arithmetic/logical shifts.
457       VPSHA, VPSHL,
458       // XOP signed/unsigned integer comparisons.
459       VPCOM, VPCOMU,
460       // XOP packed permute bytes.
461       VPPERM,
462       // XOP two source permutation.
463       VPERMIL2,
464 
465       // Vector multiply packed unsigned doubleword integers.
466       PMULUDQ,
467       // Vector multiply packed signed doubleword integers.
468       PMULDQ,
469       // Vector Multiply Packed UnsignedIntegers with Round and Scale.
470       MULHRS,
471 
472       // Multiply and Add Packed Integers.
473       VPMADDUBSW, VPMADDWD,
474 
475       // AVX512IFMA multiply and add.
476       // NOTE: These are different than the instruction and perform
477       // op0 x op1 + op2.
478       VPMADD52L, VPMADD52H,
479 
480       // VNNI
481       VPDPBUSD,
482       VPDPBUSDS,
483       VPDPWSSD,
484       VPDPWSSDS,
485 
486       // FMA nodes.
487       // We use the target independent ISD::FMA for the non-inverted case.
488       FNMADD,
489       FMSUB,
490       FNMSUB,
491       FMADDSUB,
492       FMSUBADD,
493 
494       // FMA with rounding mode.
495       FMADD_RND,
496       FNMADD_RND,
497       FMSUB_RND,
498       FNMSUB_RND,
499       FMADDSUB_RND,
500       FMSUBADD_RND,
501 
502       // Compress and expand.
503       COMPRESS,
504       EXPAND,
505 
506       // Bits shuffle
507       VPSHUFBITQMB,
508 
509       // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
510       SINT_TO_FP_RND, UINT_TO_FP_RND,
511       SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
512 
513       // Vector float/double to signed/unsigned integer.
514       CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
515       // Scalar float/double to signed/unsigned integer.
516       CVTS2SI_RND, CVTS2UI_RND,
517 
518       // Vector float/double to signed/unsigned integer with truncation.
519       CVTTP2SI, CVTTP2UI, CVTTP2SI_RND, CVTTP2UI_RND,
520       // Scalar float/double to signed/unsigned integer with truncation.
521       CVTTS2SI_RND, CVTTS2UI_RND,
522 
523       // Vector signed/unsigned integer to float/double.
524       CVTSI2P, CVTUI2P,
525 
526       // Save xmm argument registers to the stack, according to %al. An operator
527       // is needed so that this can be expanded with control flow.
528       VASTART_SAVE_XMM_REGS,
529 
530       // Windows's _chkstk call to do stack probing.
531       WIN_ALLOCA,
532 
533       // For allocating variable amounts of stack space when using
534       // segmented stacks. Check if the current stacklet has enough space, and
535       // falls back to heap allocation if not.
536       SEG_ALLOCA,
537 
538       // Memory barriers.
539       MEMBARRIER,
540       MFENCE,
541 
542       // Store FP status word into i16 register.
543       FNSTSW16r,
544 
545       // Store contents of %ah into %eflags.
546       SAHF,
547 
548       // Get a random integer and indicate whether it is valid in CF.
549       RDRAND,
550 
551       // Get a NIST SP800-90B & C compliant random integer and
552       // indicate whether it is valid in CF.
553       RDSEED,
554 
555       // SSE42 string comparisons.
556       // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
557       // will emit one or two instructions based on which results are used. If
558       // flags and index/mask this allows us to use a single instruction since
559       // we won't have to pick and opcode for flags. Instead we can rely on the
560       // DAG to CSE everything and decide at isel.
561       PCMPISTR,
562       PCMPESTR,
563 
564       // Test if in transactional execution.
565       XTEST,
566 
567       // ERI instructions.
568       RSQRT28, RSQRT28S, RCP28, RCP28S, EXP2,
569 
570       // Conversions between float and half-float.
571       CVTPS2PH, CVTPH2PS, CVTPH2PS_RND,
572 
573       // Galois Field Arithmetic Instructions
574       GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
575 
576       // LWP insert record.
577       LWPINS,
578 
579       // User level wait
580       UMWAIT, TPAUSE,
581 
582       // Compare and swap.
583       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
584       LCMPXCHG8_DAG,
585       LCMPXCHG16_DAG,
586       LCMPXCHG8_SAVE_EBX_DAG,
587       LCMPXCHG16_SAVE_RBX_DAG,
588 
589       /// LOCK-prefixed arithmetic read-modify-write instructions.
590       /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
591       LADD, LSUB, LOR, LXOR, LAND, LINC, LDEC,
592 
593       // Load, scalar_to_vector, and zero extend.
594       VZEXT_LOAD,
595 
596       // Store FP control world into i16 memory.
597       FNSTCW16m,
598 
599       /// This instruction implements FP_TO_SINT with the
600       /// integer destination in memory and a FP reg source.  This corresponds
601       /// to the X86::FIST*m instructions and the rounding mode change stuff. It
602       /// has two inputs (token chain and address) and two outputs (int value
603       /// and token chain).
604       FP_TO_INT16_IN_MEM,
605       FP_TO_INT32_IN_MEM,
606       FP_TO_INT64_IN_MEM,
607 
608       /// This instruction implements SINT_TO_FP with the
609       /// integer source in memory and FP reg result.  This corresponds to the
610       /// X86::FILD*m instructions. It has three inputs (token chain, address,
611       /// and source type) and two outputs (FP value and token chain). FILD_FLAG
612       /// also produces a flag).
613       FILD,
614       FILD_FLAG,
615 
616       /// This instruction implements an extending load to FP stack slots.
617       /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
618       /// operand, ptr to load from, and a ValueType node indicating the type
619       /// to load to.
620       FLD,
621 
622       /// This instruction implements a truncating store to FP stack
623       /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
624       /// chain operand, value to store, address, and a ValueType to store it
625       /// as.
626       FST,
627 
628       /// This instruction grabs the address of the next argument
629       /// from a va_list. (reads and modifies the va_list in memory)
630       VAARG_64,
631 
632       // Vector truncating store with unsigned/signed saturation
633       VTRUNCSTOREUS, VTRUNCSTORES,
634       // Vector truncating masked store with unsigned/signed saturation
635       VMTRUNCSTOREUS, VMTRUNCSTORES,
636 
637       // X86 specific gather and scatter
638       MGATHER, MSCATTER,
639 
640       // WARNING: Do not add anything in the end unless you want the node to
641       // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
642       // opcodes will be thought as target memory ops!
643     };
644   } // end namespace X86ISD
645 
646   /// Define some predicates that are used for node matching.
647   namespace X86 {
648     /// Returns true if Elt is a constant zero or floating point constant +0.0.
649     bool isZeroNode(SDValue Elt);
650 
651     /// Returns true of the given offset can be
652     /// fit into displacement field of the instruction.
653     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
654                                       bool hasSymbolicDisplacement = true);
655 
656     /// Determines whether the callee is required to pop its
657     /// own arguments. Callee pop is necessary to support tail calls.
658     bool isCalleePop(CallingConv::ID CallingConv,
659                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
660 
661   } // end namespace X86
662 
663   //===--------------------------------------------------------------------===//
664   //  X86 Implementation of the TargetLowering interface
665   class X86TargetLowering final : public TargetLowering {
666   public:
667     explicit X86TargetLowering(const X86TargetMachine &TM,
668                                const X86Subtarget &STI);
669 
670     unsigned getJumpTableEncoding() const override;
671     bool useSoftFloat() const override;
672 
673     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
674                                ArgListTy &Args) const override;
675 
getScalarShiftAmountTy(const DataLayout &,EVT VT)676     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
677       return MVT::i8;
678     }
679 
680     const MCExpr *
681     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
682                               const MachineBasicBlock *MBB, unsigned uid,
683                               MCContext &Ctx) const override;
684 
685     /// Returns relocation base for the given PIC jumptable.
686     SDValue getPICJumpTableRelocBase(SDValue Table,
687                                      SelectionDAG &DAG) const override;
688     const MCExpr *
689     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
690                                  unsigned JTI, MCContext &Ctx) const override;
691 
692     /// Return the desired alignment for ByVal aggregate
693     /// function arguments in the caller parameter area. For X86, aggregates
694     /// that contains are placed at 16-byte boundaries while the rest are at
695     /// 4-byte boundaries.
696     unsigned getByValTypeAlignment(Type *Ty,
697                                    const DataLayout &DL) const override;
698 
699     /// Returns the target specific optimal type for load
700     /// and store operations as a result of memset, memcpy, and memmove
701     /// lowering. If DstAlign is zero that means it's safe to destination
702     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
703     /// means there isn't a need to check it against alignment requirement,
704     /// probably because the source does not need to be loaded. If 'IsMemset' is
705     /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
706     /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
707     /// source is constant so it does not need to be loaded.
708     /// It returns EVT::Other if the type should be determined using generic
709     /// target-independent logic.
710     EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
711                             bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
712                             MachineFunction &MF) const override;
713 
714     /// Returns true if it's safe to use load / store of the
715     /// specified type to expand memcpy / memset inline. This is mostly true
716     /// for all types except for some special cases. For example, on X86
717     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
718     /// also does type conversion. Note the specified type doesn't have to be
719     /// legal as the hook is used before type legalization.
720     bool isSafeMemOpType(MVT VT) const override;
721 
722     /// Returns true if the target allows unaligned memory accesses of the
723     /// specified type. Returns whether it is "fast" in the last argument.
724     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
725                                        bool *Fast) const override;
726 
727     /// Provide custom lowering hooks for some operations.
728     ///
729     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
730 
731     /// Places new result values for the node in Results (their number
732     /// and types must exactly match those of the original return values of
733     /// the node), or leaves Results empty, which indicates that the node is not
734     /// to be custom lowered after all.
735     void LowerOperationWrapper(SDNode *N,
736                                SmallVectorImpl<SDValue> &Results,
737                                SelectionDAG &DAG) const override;
738 
739     /// Replace the results of node with an illegal result
740     /// type with new values built out of custom code.
741     ///
742     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
743                             SelectionDAG &DAG) const override;
744 
745     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
746 
747     // Return true if it is profitable to combine a BUILD_VECTOR with a
748     // stride-pattern to a shuffle and a truncate.
749     // Example of such a combine:
750     // v4i32 build_vector((extract_elt V, 1),
751     //                    (extract_elt V, 3),
752     //                    (extract_elt V, 5),
753     //                    (extract_elt V, 7))
754     //  -->
755     // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
756     // v4i64)
757     bool isDesirableToCombineBuildVectorToShuffleTruncate(
758         ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
759 
760     /// Return true if the target has native support for
761     /// the specified value type and it is 'desirable' to use the type for the
762     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
763     /// instruction encodings are longer and some i16 instructions are slow.
764     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
765 
766     /// Return true if the target has native support for the
767     /// specified value type and it is 'desirable' to use the type. e.g. On x86
768     /// i16 is legal, but undesirable since i16 instruction encodings are longer
769     /// and some i16 instructions are slow.
770     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
771 
772     MachineBasicBlock *
773     EmitInstrWithCustomInserter(MachineInstr &MI,
774                                 MachineBasicBlock *MBB) const override;
775 
776     /// This method returns the name of a target specific DAG node.
777     const char *getTargetNodeName(unsigned Opcode) const override;
778 
mergeStoresAfterLegalization()779     bool mergeStoresAfterLegalization() const override { return true; }
780 
781     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
782                           const SelectionDAG &DAG) const override;
783 
784     bool isCheapToSpeculateCttz() const override;
785 
786     bool isCheapToSpeculateCtlz() const override;
787 
788     bool isCtlzFast() const override;
789 
hasBitPreservingFPLogic(EVT VT)790     bool hasBitPreservingFPLogic(EVT VT) const override {
791       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
792     }
793 
isMultiStoresCheaperThanBitsMerge(EVT LTy,EVT HTy)794     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
795       // If the pair to store is a mixture of float and int values, we will
796       // save two bitwise instructions and one float-to-int instruction and
797       // increase one store instruction. There is potentially a more
798       // significant benefit because it avoids the float->int domain switch
799       // for input value. So It is more likely a win.
800       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
801           (LTy.isInteger() && HTy.isFloatingPoint()))
802         return true;
803       // If the pair only contains int values, we will save two bitwise
804       // instructions and increase one store instruction (costing one more
805       // store buffer). Since the benefit is more blurred so we leave
806       // such pair out until we get testcase to prove it is a win.
807       return false;
808     }
809 
810     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
811 
812     bool hasAndNotCompare(SDValue Y) const override;
813 
814     bool hasAndNot(SDValue Y) const override;
815 
816     bool preferShiftsToClearExtremeBits(SDValue Y) const override;
817 
818     bool
shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)819     shouldTransformSignedTruncationCheck(EVT XVT,
820                                          unsigned KeptBits) const override {
821       // For vectors, we don't have a preference..
822       if (XVT.isVector())
823         return false;
824 
825       auto VTIsOk = [](EVT VT) -> bool {
826         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
827                VT == MVT::i64;
828       };
829 
830       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
831       // XVT will be larger than KeptBitsVT.
832       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
833       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
834     }
835 
convertSetCCLogicToBitwiseLogic(EVT VT)836     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
837       return VT.isScalarInteger();
838     }
839 
840     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
841     MVT hasFastEqualityCompare(unsigned NumBits) const override;
842 
843     /// Allow multiple load pairs per block for smaller and faster code.
getMemcmpEqZeroLoadsPerBlock()844     unsigned getMemcmpEqZeroLoadsPerBlock() const override {
845       return 2;
846     }
847 
848     /// Return the value type to use for ISD::SETCC.
849     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
850                            EVT VT) const override;
851 
852     bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
853                                       TargetLoweringOpt &TLO) const override;
854 
855     /// Determine which of the bits specified in Mask are known to be either
856     /// zero or one and return them in the KnownZero/KnownOne bitsets.
857     void computeKnownBitsForTargetNode(const SDValue Op,
858                                        KnownBits &Known,
859                                        const APInt &DemandedElts,
860                                        const SelectionDAG &DAG,
861                                        unsigned Depth = 0) const override;
862 
863     /// Determine the number of bits in the operation that are sign bits.
864     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
865                                              const APInt &DemandedElts,
866                                              const SelectionDAG &DAG,
867                                              unsigned Depth) const override;
868 
869     SDValue unwrapAddress(SDValue N) const override;
870 
871     bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
872                         int64_t &Offset) const override;
873 
874     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
875 
876     bool ExpandInlineAsm(CallInst *CI) const override;
877 
878     ConstraintType getConstraintType(StringRef Constraint) const override;
879 
880     /// Examine constraint string and operand type and determine a weight value.
881     /// The operand object must already have been set up with the operand type.
882     ConstraintWeight
883       getSingleConstraintMatchWeight(AsmOperandInfo &info,
884                                      const char *constraint) const override;
885 
886     const char *LowerXConstraint(EVT ConstraintVT) const override;
887 
888     /// Lower the specified operand into the Ops vector. If it is invalid, don't
889     /// add anything to Ops. If hasMemory is true it means one of the asm
890     /// constraint of the inline asm instruction being processed is 'm'.
891     void LowerAsmOperandForConstraint(SDValue Op,
892                                       std::string &Constraint,
893                                       std::vector<SDValue> &Ops,
894                                       SelectionDAG &DAG) const override;
895 
896     unsigned
getInlineAsmMemConstraint(StringRef ConstraintCode)897     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
898       if (ConstraintCode == "i")
899         return InlineAsm::Constraint_i;
900       else if (ConstraintCode == "o")
901         return InlineAsm::Constraint_o;
902       else if (ConstraintCode == "v")
903         return InlineAsm::Constraint_v;
904       else if (ConstraintCode == "X")
905         return InlineAsm::Constraint_X;
906       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
907     }
908 
909     /// Given a physical register constraint
910     /// (e.g. {edx}), return the register number and the register class for the
911     /// register.  This should only be used for C_Register constraints.  On
912     /// error, this returns a register number of 0.
913     std::pair<unsigned, const TargetRegisterClass *>
914     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
915                                  StringRef Constraint, MVT VT) const override;
916 
917     /// Return true if the addressing mode represented
918     /// by AM is legal for this target, for a load/store of the specified type.
919     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
920                                Type *Ty, unsigned AS,
921                                Instruction *I = nullptr) const override;
922 
923     /// Return true if the specified immediate is legal
924     /// icmp immediate, that is the target has icmp instructions which can
925     /// compare a register against the immediate without having to materialize
926     /// the immediate into a register.
927     bool isLegalICmpImmediate(int64_t Imm) const override;
928 
929     /// Return true if the specified immediate is legal
930     /// add immediate, that is the target has add instructions which can
931     /// add a register and the immediate without having to materialize
932     /// the immediate into a register.
933     bool isLegalAddImmediate(int64_t Imm) const override;
934 
935     /// Return the cost of the scaling factor used in the addressing
936     /// mode represented by AM for this target, for a load/store
937     /// of the specified type.
938     /// If the AM is supported, the return value must be >= 0.
939     /// If the AM is not supported, it returns a negative value.
940     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
941                              unsigned AS) const override;
942 
943     bool isVectorShiftByScalarCheap(Type *Ty) const override;
944 
945     /// Return true if it's free to truncate a value of
946     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
947     /// register EAX to i16 by referencing its sub-register AX.
948     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
949     bool isTruncateFree(EVT VT1, EVT VT2) const override;
950 
951     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
952 
953     /// Return true if any actual instruction that defines a
954     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
955     /// register. This does not necessarily include registers defined in
956     /// unknown ways, such as incoming arguments, or copies from unknown
957     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
958     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
959     /// all instructions that define 32-bit values implicit zero-extend the
960     /// result out to 64 bits.
961     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
962     bool isZExtFree(EVT VT1, EVT VT2) const override;
963     bool isZExtFree(SDValue Val, EVT VT2) const override;
964 
965     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
966     /// extend node) is profitable.
967     bool isVectorLoadExtDesirable(SDValue) const override;
968 
969     /// Return true if an FMA operation is faster than a pair of fmul and fadd
970     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
971     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
972     bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
973 
974     /// Return true if it's profitable to narrow
975     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
976     /// from i32 to i8 but not from i32 to i16.
977     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
978 
979     /// Given an intrinsic, checks if on the target the intrinsic will need to map
980     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
981     /// true and stores the intrinsic information into the IntrinsicInfo that was
982     /// passed to the function.
983     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
984                             MachineFunction &MF,
985                             unsigned Intrinsic) const override;
986 
987     /// Returns true if the target can instruction select the
988     /// specified FP immediate natively. If false, the legalizer will
989     /// materialize the FP immediate as a load from a constant pool.
990     bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
991 
992     /// Targets can use this to indicate that they only support *some*
993     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
994     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
995     /// be legal.
996     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
997 
998     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
999     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1000     /// constant pool entry.
1001     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1002 
1003     /// Returns true if lowering to a jump table is allowed.
1004     bool areJTsAllowed(const Function *Fn) const override;
1005 
1006     /// If true, then instruction selection should
1007     /// seek to shrink the FP constant of the specified type to a smaller type
1008     /// in order to save space and / or reduce runtime.
ShouldShrinkFPConstant(EVT VT)1009     bool ShouldShrinkFPConstant(EVT VT) const override {
1010       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1011       // expensive than a straight movsd. On the other hand, it's important to
1012       // shrink long double fp constant since fldt is very slow.
1013       return !X86ScalarSSEf64 || VT == MVT::f80;
1014     }
1015 
1016     /// Return true if we believe it is correct and profitable to reduce the
1017     /// load node to a smaller type.
1018     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1019                                EVT NewVT) const override;
1020 
1021     /// Return true if the specified scalar FP type is computed in an SSE
1022     /// register, not on the X87 floating point stack.
isScalarFPTypeInSSEReg(EVT VT)1023     bool isScalarFPTypeInSSEReg(EVT VT) const {
1024       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1025              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1026     }
1027 
1028     /// Returns true if it is beneficial to convert a load of a constant
1029     /// to just the constant itself.
1030     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1031                                            Type *Ty) const override;
1032 
1033     bool convertSelectOfConstantsToMath(EVT VT) const override;
1034 
1035     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1036     /// with this index.
1037     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1038                                  unsigned Index) const override;
1039 
storeOfVectorConstantIsCheap(EVT MemVT,unsigned NumElem,unsigned AddrSpace)1040     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1041                                       unsigned AddrSpace) const override {
1042       // If we can replace more than 2 scalar stores, there will be a reduction
1043       // in instructions even after we add a vector constant load.
1044       return NumElem > 2;
1045     }
1046 
1047     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override;
1048 
1049     /// Intel processors have a unified instruction and data cache
getClearCacheBuiltinName()1050     const char * getClearCacheBuiltinName() const override {
1051       return nullptr; // nothing to do, move along.
1052     }
1053 
1054     unsigned getRegisterByName(const char* RegName, EVT VT,
1055                                SelectionDAG &DAG) const override;
1056 
1057     /// If a physical register, this returns the register that receives the
1058     /// exception address on entry to an EH pad.
1059     unsigned
1060     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1061 
1062     /// If a physical register, this returns the register that receives the
1063     /// exception typeid on entry to a landing pad.
1064     unsigned
1065     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1066 
1067     virtual bool needsFixedCatchObjects() const override;
1068 
1069     /// This method returns a target specific FastISel object,
1070     /// or null if the target does not support "fast" ISel.
1071     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1072                              const TargetLibraryInfo *libInfo) const override;
1073 
1074     /// If the target has a standard location for the stack protector cookie,
1075     /// returns the address of that location. Otherwise, returns nullptr.
1076     Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1077 
1078     bool useLoadStackGuardNode() const override;
1079     bool useStackGuardXorFP() const override;
1080     void insertSSPDeclarations(Module &M) const override;
1081     Value *getSDagStackGuard(const Module &M) const override;
1082     Value *getSSPStackGuardCheck(const Module &M) const override;
1083     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1084                                 const SDLoc &DL) const override;
1085 
1086 
1087     /// Return true if the target stores SafeStack pointer at a fixed offset in
1088     /// some non-standard address space, and populates the address space and
1089     /// offset as appropriate.
1090     Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1091 
1092     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1093                       SelectionDAG &DAG) const;
1094 
1095     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1096 
1097     /// Customize the preferred legalization strategy for certain types.
1098     LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
1099 
1100     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1101                                       EVT VT) const override;
1102 
1103     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1104                                            CallingConv::ID CC,
1105                                            EVT VT) const override;
1106 
1107     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1108 
1109     bool supportSwiftError() const override;
1110 
1111     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1112 
hasVectorBlend()1113     bool hasVectorBlend() const override { return true; }
1114 
getMaxSupportedInterleaveFactor()1115     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1116 
1117     /// Lower interleaved load(s) into target specific
1118     /// instructions/intrinsics.
1119     bool lowerInterleavedLoad(LoadInst *LI,
1120                               ArrayRef<ShuffleVectorInst *> Shuffles,
1121                               ArrayRef<unsigned> Indices,
1122                               unsigned Factor) const override;
1123 
1124     /// Lower interleaved store(s) into target specific
1125     /// instructions/intrinsics.
1126     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1127                                unsigned Factor) const override;
1128 
1129     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1130                                    SDValue Addr, SelectionDAG &DAG)
1131                                    const override;
1132 
1133   protected:
1134     std::pair<const TargetRegisterClass *, uint8_t>
1135     findRepresentativeClass(const TargetRegisterInfo *TRI,
1136                             MVT VT) const override;
1137 
1138   private:
1139     /// Keep a reference to the X86Subtarget around so that we can
1140     /// make the right decision when generating code for different targets.
1141     const X86Subtarget &Subtarget;
1142 
1143     /// Select between SSE or x87 floating point ops.
1144     /// When SSE is available, use it for f32 operations.
1145     /// When SSE2 is available, use it for f64 operations.
1146     bool X86ScalarSSEf32;
1147     bool X86ScalarSSEf64;
1148 
1149     /// A list of legal FP immediates.
1150     std::vector<APFloat> LegalFPImmediates;
1151 
1152     /// Indicate that this x86 target can instruction
1153     /// select the specified FP immediate natively.
addLegalFPImmediate(const APFloat & Imm)1154     void addLegalFPImmediate(const APFloat& Imm) {
1155       LegalFPImmediates.push_back(Imm);
1156     }
1157 
1158     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1159                             CallingConv::ID CallConv, bool isVarArg,
1160                             const SmallVectorImpl<ISD::InputArg> &Ins,
1161                             const SDLoc &dl, SelectionDAG &DAG,
1162                             SmallVectorImpl<SDValue> &InVals,
1163                             uint32_t *RegMask) const;
1164     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1165                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1166                              const SDLoc &dl, SelectionDAG &DAG,
1167                              const CCValAssign &VA, MachineFrameInfo &MFI,
1168                              unsigned i) const;
1169     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1170                              const SDLoc &dl, SelectionDAG &DAG,
1171                              const CCValAssign &VA,
1172                              ISD::ArgFlagsTy Flags) const;
1173 
1174     // Call lowering helpers.
1175 
1176     /// Check whether the call is eligible for tail call optimization. Targets
1177     /// that want to do tail call optimization should implement this function.
1178     bool IsEligibleForTailCallOptimization(SDValue Callee,
1179                                            CallingConv::ID CalleeCC,
1180                                            bool isVarArg,
1181                                            bool isCalleeStructRet,
1182                                            bool isCallerStructRet,
1183                                            Type *RetTy,
1184                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1185                                     const SmallVectorImpl<SDValue> &OutVals,
1186                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1187                                            SelectionDAG& DAG) const;
1188     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1189                                     SDValue Chain, bool IsTailCall,
1190                                     bool Is64Bit, int FPDiff,
1191                                     const SDLoc &dl) const;
1192 
1193     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1194                                          SelectionDAG &DAG) const;
1195 
1196     unsigned getAddressSpace(void) const;
1197 
1198     std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
1199                                                bool isSigned,
1200                                                bool isReplace) const;
1201 
1202     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1203     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1204     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1205     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1206 
1207     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1208                                   const unsigned char OpFlags = 0) const;
1209     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1210     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1211     SDValue LowerGlobalAddress(const GlobalValue *GV, const SDLoc &dl,
1212                                int64_t Offset, SelectionDAG &DAG) const;
1213     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1214     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1215     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1216 
1217     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1218     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1219     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1220     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1221     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1222     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1223     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1224     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1225     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1226     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1227     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1228     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1229     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1230     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1231     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1232     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1233     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1234     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1235     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1236     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1237     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1238     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1239     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1240     SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1241     SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1242     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1243 
1244     SDValue
1245     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1246                          const SmallVectorImpl<ISD::InputArg> &Ins,
1247                          const SDLoc &dl, SelectionDAG &DAG,
1248                          SmallVectorImpl<SDValue> &InVals) const override;
1249     SDValue LowerCall(CallLoweringInfo &CLI,
1250                       SmallVectorImpl<SDValue> &InVals) const override;
1251 
1252     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1253                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1254                         const SmallVectorImpl<SDValue> &OutVals,
1255                         const SDLoc &dl, SelectionDAG &DAG) const override;
1256 
supportSplitCSR(MachineFunction * MF)1257     bool supportSplitCSR(MachineFunction *MF) const override {
1258       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1259           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1260     }
1261     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1262     void insertCopiesSplitCSR(
1263       MachineBasicBlock *Entry,
1264       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1265 
1266     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1267 
1268     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1269 
1270     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1271                             ISD::NodeType ExtendKind) const override;
1272 
1273     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1274                         bool isVarArg,
1275                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1276                         LLVMContext &Context) const override;
1277 
1278     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1279 
1280     TargetLoweringBase::AtomicExpansionKind
1281     shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1282     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1283     TargetLoweringBase::AtomicExpansionKind
1284     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1285 
1286     LoadInst *
1287     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1288 
1289     bool needsCmpXchgNb(Type *MemType) const;
1290 
1291     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1292                                 MachineBasicBlock *DispatchBB, int FI) const;
1293 
1294     // Utility function to emit the low-level va_arg code for X86-64.
1295     MachineBasicBlock *
1296     EmitVAARG64WithCustomInserter(MachineInstr &MI,
1297                                   MachineBasicBlock *MBB) const;
1298 
1299     /// Utility function to emit the xmm reg save portion of va_start.
1300     MachineBasicBlock *
1301     EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1302                                              MachineBasicBlock *BB) const;
1303 
1304     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1305                                                  MachineInstr &MI2,
1306                                                  MachineBasicBlock *BB) const;
1307 
1308     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1309                                          MachineBasicBlock *BB) const;
1310 
1311     MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1312                                            MachineBasicBlock *BB) const;
1313 
1314     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1315                                            MachineBasicBlock *BB) const;
1316 
1317     MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1318                                            MachineBasicBlock *BB) const;
1319 
1320     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1321                                             MachineBasicBlock *BB) const;
1322 
1323     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1324                                           MachineBasicBlock *BB) const;
1325 
1326     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1327                                           MachineBasicBlock *BB) const;
1328 
1329     MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1330                                             MachineBasicBlock *BB) const;
1331 
1332     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1333                                         MachineBasicBlock *MBB) const;
1334 
1335     void emitSetJmpShadowStackFix(MachineInstr &MI,
1336                                   MachineBasicBlock *MBB) const;
1337 
1338     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1339                                          MachineBasicBlock *MBB) const;
1340 
1341     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1342                                                  MachineBasicBlock *MBB) const;
1343 
1344     MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1345                                      MachineBasicBlock *MBB) const;
1346 
1347     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1348                                              MachineBasicBlock *MBB) const;
1349 
1350     /// Emit nodes that will be selected as "test Op0,Op0", or something
1351     /// equivalent, for use with the given x86 condition code.
1352     SDValue EmitTest(SDValue Op0, unsigned X86CC, const SDLoc &dl,
1353                      SelectionDAG &DAG) const;
1354 
1355     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1356     /// equivalent, for use with the given x86 condition code.
1357     SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1358                     SelectionDAG &DAG) const;
1359 
1360     /// Convert a comparison if required by the subtarget.
1361     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1362 
1363     /// Check if replacement of SQRT with RSQRT should be disabled.
1364     bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1365 
1366     /// Use rsqrt* to speed up sqrt calculations.
1367     SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1368                             int &RefinementSteps, bool &UseOneConstNR,
1369                             bool Reciprocal) const override;
1370 
1371     /// Use rcp* to speed up fdiv calculations.
1372     SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1373                              int &RefinementSteps) const override;
1374 
1375     /// Reassociate floating point divisions into multiply by reciprocal.
1376     unsigned combineRepeatedFPDivisors() const override;
1377   };
1378 
1379   namespace X86 {
1380     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1381                              const TargetLibraryInfo *libInfo);
1382   } // end namespace X86
1383 
1384   // Base class for all X86 non-masked store operations.
1385   class X86StoreSDNode : public MemSDNode {
1386   public:
X86StoreSDNode(unsigned Opcode,unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1387     X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1388                    SDVTList VTs, EVT MemVT,
1389                    MachineMemOperand *MMO)
1390       :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
getValue()1391     const SDValue &getValue() const { return getOperand(1); }
getBasePtr()1392     const SDValue &getBasePtr() const { return getOperand(2); }
1393 
classof(const SDNode * N)1394     static bool classof(const SDNode *N) {
1395       return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1396         N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1397     }
1398   };
1399 
1400   // Base class for all X86 masked store operations.
1401   // The class has the same order of operands as MaskedStoreSDNode for
1402   // convenience.
1403   class X86MaskedStoreSDNode : public MemSDNode {
1404   public:
X86MaskedStoreSDNode(unsigned Opcode,unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1405     X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1406                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1407                          MachineMemOperand *MMO)
1408       : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1409 
getBasePtr()1410     const SDValue &getBasePtr() const { return getOperand(1); }
getMask()1411     const SDValue &getMask()    const { return getOperand(2); }
getValue()1412     const SDValue &getValue()   const { return getOperand(3); }
1413 
classof(const SDNode * N)1414     static bool classof(const SDNode *N) {
1415       return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1416         N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1417     }
1418   };
1419 
1420   // X86 Truncating Store with Signed saturation.
1421   class TruncSStoreSDNode : public X86StoreSDNode {
1422   public:
TruncSStoreSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1423     TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1424                         SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1425       : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1426 
classof(const SDNode * N)1427     static bool classof(const SDNode *N) {
1428       return N->getOpcode() == X86ISD::VTRUNCSTORES;
1429     }
1430   };
1431 
1432   // X86 Truncating Store with Unsigned saturation.
1433   class TruncUSStoreSDNode : public X86StoreSDNode {
1434   public:
TruncUSStoreSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1435     TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1436                       SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1437       : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1438 
classof(const SDNode * N)1439     static bool classof(const SDNode *N) {
1440       return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1441     }
1442   };
1443 
1444   // X86 Truncating Masked Store with Signed saturation.
1445   class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1446   public:
MaskedTruncSStoreSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1447     MaskedTruncSStoreSDNode(unsigned Order,
1448                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1449                          MachineMemOperand *MMO)
1450       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1451 
classof(const SDNode * N)1452     static bool classof(const SDNode *N) {
1453       return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1454     }
1455   };
1456 
1457   // X86 Truncating Masked Store with Unsigned saturation.
1458   class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1459   public:
MaskedTruncUSStoreSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1460     MaskedTruncUSStoreSDNode(unsigned Order,
1461                             const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1462                             MachineMemOperand *MMO)
1463       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1464 
classof(const SDNode * N)1465     static bool classof(const SDNode *N) {
1466       return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1467     }
1468   };
1469 
1470   // X86 specific Gather/Scatter nodes.
1471   // The class has the same order of operands as MaskedGatherScatterSDNode for
1472   // convenience.
1473   class X86MaskedGatherScatterSDNode : public MemSDNode {
1474   public:
X86MaskedGatherScatterSDNode(unsigned Opc,unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1475     X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1476                                  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1477                                  MachineMemOperand *MMO)
1478         : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1479 
getBasePtr()1480     const SDValue &getBasePtr() const { return getOperand(3); }
getIndex()1481     const SDValue &getIndex()   const { return getOperand(4); }
getMask()1482     const SDValue &getMask()    const { return getOperand(2); }
getValue()1483     const SDValue &getValue()   const { return getOperand(1); }
getScale()1484     const SDValue &getScale()   const { return getOperand(5); }
1485 
classof(const SDNode * N)1486     static bool classof(const SDNode *N) {
1487       return N->getOpcode() == X86ISD::MGATHER ||
1488              N->getOpcode() == X86ISD::MSCATTER;
1489     }
1490   };
1491 
1492   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1493   public:
X86MaskedGatherSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1494     X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1495                           EVT MemVT, MachineMemOperand *MMO)
1496         : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1497                                        MMO) {}
1498 
classof(const SDNode * N)1499     static bool classof(const SDNode *N) {
1500       return N->getOpcode() == X86ISD::MGATHER;
1501     }
1502   };
1503 
1504   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1505   public:
X86MaskedScatterSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1506     X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1507                            EVT MemVT, MachineMemOperand *MMO)
1508         : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1509                                        MMO) {}
1510 
classof(const SDNode * N)1511     static bool classof(const SDNode *N) {
1512       return N->getOpcode() == X86ISD::MSCATTER;
1513     }
1514   };
1515 
1516   /// Generate unpacklo/unpackhi shuffle mask.
1517   template <typename T = int>
createUnpackShuffleMask(MVT VT,SmallVectorImpl<T> & Mask,bool Lo,bool Unary)1518   void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1519                                bool Unary) {
1520     assert(Mask.empty() && "Expected an empty shuffle mask vector");
1521     int NumElts = VT.getVectorNumElements();
1522     int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1523     for (int i = 0; i < NumElts; ++i) {
1524       unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1525       int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1526       Pos += (Unary ? 0 : NumElts * (i % 2));
1527       Pos += (Lo ? 0 : NumEltsInLane / 2);
1528       Mask.push_back(Pos);
1529     }
1530   }
1531 
1532   /// Helper function to scale a shuffle or target shuffle mask, replacing each
1533   /// mask index with the scaled sequential indices for an equivalent narrowed
1534   /// mask. This is the reverse process to canWidenShuffleElements, but can
1535   /// always succeed.
1536   template <typename T>
scaleShuffleMask(int Scale,ArrayRef<T> Mask,SmallVectorImpl<T> & ScaledMask)1537   void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
1538                         SmallVectorImpl<T> &ScaledMask) {
1539     assert(0 < Scale && "Unexpected scaling factor");
1540     int NumElts = Mask.size();
1541     ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1);
1542 
1543     for (int i = 0; i != NumElts; ++i) {
1544       int M = Mask[i];
1545 
1546       // Repeat sentinel values in every mask element.
1547       if (M < 0) {
1548         for (int s = 0; s != Scale; ++s)
1549           ScaledMask[(Scale * i) + s] = M;
1550         continue;
1551       }
1552 
1553       // Scale mask element and increment across each mask element.
1554       for (int s = 0; s != Scale; ++s)
1555         ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1556     }
1557   }
1558 } // end namespace llvm
1559 
1560 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1561