1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that X86 uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
17 
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/CodeGen/TargetLowering.h"
21 #include "llvm/Target/TargetOptions.h"
22 
23 namespace llvm {
24   class X86Subtarget;
25   class X86TargetMachine;
26 
27   namespace X86ISD {
28     // X86 Specific DAG Nodes
29     enum NodeType : unsigned {
30       // Start the numbering where the builtin ops leave off.
31       FIRST_NUMBER = ISD::BUILTIN_OP_END,
32 
33       /// Bit scan forward.
34       BSF,
35       /// Bit scan reverse.
36       BSR,
37 
38       /// Double shift instructions. These correspond to
39       /// X86::SHLDxx and X86::SHRDxx instructions.
40       SHLD,
41       SHRD,
42 
43       /// Bitwise logical AND of floating point values. This corresponds
44       /// to X86::ANDPS or X86::ANDPD.
45       FAND,
46 
47       /// Bitwise logical OR of floating point values. This corresponds
48       /// to X86::ORPS or X86::ORPD.
49       FOR,
50 
51       /// Bitwise logical XOR of floating point values. This corresponds
52       /// to X86::XORPS or X86::XORPD.
53       FXOR,
54 
55       ///  Bitwise logical ANDNOT of floating point values. This
56       /// corresponds to X86::ANDNPS or X86::ANDNPD.
57       FANDN,
58 
59       /// These operations represent an abstract X86 call
60       /// instruction, which includes a bunch of information.  In particular the
61       /// operands of these node are:
62       ///
63       ///     #0 - The incoming token chain
64       ///     #1 - The callee
65       ///     #2 - The number of arg bytes the caller pushes on the stack.
66       ///     #3 - The number of arg bytes the callee pops off the stack.
67       ///     #4 - The value to pass in AL/AX/EAX (optional)
68       ///     #5 - The value to pass in DL/DX/EDX (optional)
69       ///
70       /// The result values of these nodes are:
71       ///
72       ///     #0 - The outgoing token chain
73       ///     #1 - The first register result value (optional)
74       ///     #2 - The second register result value (optional)
75       ///
76       CALL,
77 
78       /// Same as call except it adds the NoTrack prefix.
79       NT_CALL,
80 
81       /// This operation implements the lowering for readcyclecounter.
82       RDTSC_DAG,
83 
84       /// X86 Read Time-Stamp Counter and Processor ID.
85       RDTSCP_DAG,
86 
87       /// X86 Read Performance Monitoring Counters.
88       RDPMC_DAG,
89 
90       /// X86 compare and logical compare instructions.
91       CMP, COMI, UCOMI,
92 
93       /// X86 bit-test instructions.
94       BT,
95 
96       /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
97       /// operand, usually produced by a CMP instruction.
98       SETCC,
99 
100       /// X86 Select
101       SELECTS,
102 
103       // Same as SETCC except it's materialized with a sbb and the value is all
104       // one's or all zero's.
105       SETCC_CARRY,  // R = carry_bit ? ~0 : 0
106 
107       /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
108       /// Operands are two FP values to compare; result is a mask of
109       /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
110       FSETCC,
111 
112       /// X86 FP SETCC, similar to above, but with output as an i1 mask and
113       /// with optional rounding mode.
114       FSETCCM, FSETCCM_RND,
115 
116       /// X86 conditional moves. Operand 0 and operand 1 are the two values
117       /// to select from. Operand 2 is the condition code, and operand 3 is the
118       /// flag operand produced by a CMP or TEST instruction. It also writes a
119       /// flag result.
120       CMOV,
121 
122       /// X86 conditional branches. Operand 0 is the chain operand, operand 1
123       /// is the block to branch if condition is true, operand 2 is the
124       /// condition code, and operand 3 is the flag operand produced by a CMP
125       /// or TEST instruction.
126       BRCOND,
127 
128       /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
129       /// operand 1 is the target address.
130       NT_BRIND,
131 
132       /// Return with a flag operand. Operand 0 is the chain operand, operand
133       /// 1 is the number of bytes of stack to pop.
134       RET_FLAG,
135 
136       /// Return from interrupt. Operand 0 is the number of bytes to pop.
137       IRET,
138 
139       /// Repeat fill, corresponds to X86::REP_STOSx.
140       REP_STOS,
141 
142       /// Repeat move, corresponds to X86::REP_MOVSx.
143       REP_MOVS,
144 
145       /// On Darwin, this node represents the result of the popl
146       /// at function entry, used for PIC code.
147       GlobalBaseReg,
148 
149       /// A wrapper node for TargetConstantPool, TargetJumpTable,
150       /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
151       /// MCSymbol and TargetBlockAddress.
152       Wrapper,
153 
154       /// Special wrapper used under X86-64 PIC mode for RIP
155       /// relative displacements.
156       WrapperRIP,
157 
158       /// Copies a 64-bit value from the low word of an XMM vector
159       /// to an MMX vector.
160       MOVDQ2Q,
161 
162       /// Copies a 32-bit value from the low word of a MMX
163       /// vector to a GPR.
164       MMX_MOVD2W,
165 
166       /// Copies a GPR into the low 32-bit word of a MMX vector
167       /// and zero out the high word.
168       MMX_MOVW2D,
169 
170       /// Extract an 8-bit value from a vector and zero extend it to
171       /// i32, corresponds to X86::PEXTRB.
172       PEXTRB,
173 
174       /// Extract a 16-bit value from a vector and zero extend it to
175       /// i32, corresponds to X86::PEXTRW.
176       PEXTRW,
177 
178       /// Insert any element of a 4 x float vector into any element
179       /// of a destination 4 x floatvector.
180       INSERTPS,
181 
182       /// Insert the lower 8-bits of a 32-bit value to a vector,
183       /// corresponds to X86::PINSRB.
184       PINSRB,
185 
186       /// Insert the lower 16-bits of a 32-bit value to a vector,
187       /// corresponds to X86::PINSRW.
188       PINSRW,
189 
190       /// Shuffle 16 8-bit values within a vector.
191       PSHUFB,
192 
193       /// Compute Sum of Absolute Differences.
194       PSADBW,
195       /// Compute Double Block Packed Sum-Absolute-Differences
196       DBPSADBW,
197 
198       /// Bitwise Logical AND NOT of Packed FP values.
199       ANDNP,
200 
201       /// Blend where the selector is an immediate.
202       BLENDI,
203 
204       /// Dynamic (non-constant condition) vector blend where only the sign bits
205       /// of the condition elements are used. This is used to enforce that the
206       /// condition mask is not valid for generic VSELECT optimizations. This
207       /// can also be used to implement the intrinsics.
208       BLENDV,
209 
210       /// Combined add and sub on an FP vector.
211       ADDSUB,
212 
213       //  FP vector ops with rounding mode.
214       FADD_RND, FADDS_RND,
215       FSUB_RND, FSUBS_RND,
216       FMUL_RND, FMULS_RND,
217       FDIV_RND, FDIVS_RND,
218       FMAX_RND, FMAXS_RND,
219       FMIN_RND, FMINS_RND,
220       FSQRT_RND, FSQRTS_RND,
221 
222       // FP vector get exponent.
223       FGETEXP_RND, FGETEXPS_RND,
224       // Extract Normalized Mantissas.
225       VGETMANT, VGETMANT_RND, VGETMANTS, VGETMANTS_RND,
226       // FP Scale.
227       SCALEF,
228       SCALEFS,
229 
230       // Unsigned Integer average.
231       AVG,
232 
233       /// Integer horizontal add/sub.
234       HADD,
235       HSUB,
236 
237       /// Floating point horizontal add/sub.
238       FHADD,
239       FHSUB,
240 
241       // Detect Conflicts Within a Vector
242       CONFLICT,
243 
244       /// Floating point max and min.
245       FMAX, FMIN,
246 
247       /// Commutative FMIN and FMAX.
248       FMAXC, FMINC,
249 
250       /// Scalar intrinsic floating point max and min.
251       FMAXS, FMINS,
252 
253       /// Floating point reciprocal-sqrt and reciprocal approximation.
254       /// Note that these typically require refinement
255       /// in order to obtain suitable precision.
256       FRSQRT, FRCP,
257 
258       // AVX-512 reciprocal approximations with a little more precision.
259       RSQRT14, RSQRT14S, RCP14, RCP14S,
260 
261       // Thread Local Storage.
262       TLSADDR,
263 
264       // Thread Local Storage. A call to get the start address
265       // of the TLS block for the current module.
266       TLSBASEADDR,
267 
268       // Thread Local Storage.  When calling to an OS provided
269       // thunk at the address from an earlier relocation.
270       TLSCALL,
271 
272       // Exception Handling helpers.
273       EH_RETURN,
274 
275       // SjLj exception handling setjmp.
276       EH_SJLJ_SETJMP,
277 
278       // SjLj exception handling longjmp.
279       EH_SJLJ_LONGJMP,
280 
281       // SjLj exception handling dispatch.
282       EH_SJLJ_SETUP_DISPATCH,
283 
284       /// Tail call return. See X86TargetLowering::LowerCall for
285       /// the list of operands.
286       TC_RETURN,
287 
288       // Vector move to low scalar and zero higher vector elements.
289       VZEXT_MOVL,
290 
291       // Vector integer truncate.
292       VTRUNC,
293       // Vector integer truncate with unsigned/signed saturation.
294       VTRUNCUS, VTRUNCS,
295 
296       // Masked version of the above. Used when less than a 128-bit result is
297       // produced since the mask only applies to the lower elements and can't
298       // be represented by a select.
299       // SRC, PASSTHRU, MASK
300       VMTRUNC, VMTRUNCUS, VMTRUNCS,
301 
302       // Vector FP extend.
303       VFPEXT, VFPEXT_RND, VFPEXTS_RND,
304 
305       // Vector FP round.
306       VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
307 
308       // Masked version of above. Used for v2f64->v4f32.
309       // SRC, PASSTHRU, MASK
310       VMFPROUND,
311 
312       // 128-bit vector logical left / right shift
313       VSHLDQ, VSRLDQ,
314 
315       // Vector shift elements
316       VSHL, VSRL, VSRA,
317 
318       // Vector variable shift
319       VSHLV, VSRLV, VSRAV,
320 
321       // Vector shift elements by immediate
322       VSHLI, VSRLI, VSRAI,
323 
324       // Shifts of mask registers.
325       KSHIFTL, KSHIFTR,
326 
327       // Bit rotate by immediate
328       VROTLI, VROTRI,
329 
330       // Vector packed double/float comparison.
331       CMPP,
332 
333       // Vector integer comparisons.
334       PCMPEQ, PCMPGT,
335 
336       // v8i16 Horizontal minimum and position.
337       PHMINPOS,
338 
339       MULTISHIFT,
340 
341       /// Vector comparison generating mask bits for fp and
342       /// integer signed and unsigned data types.
343       CMPM,
344       // Vector comparison with rounding mode for FP values
345       CMPM_RND,
346 
347       // Arithmetic operations with FLAGS results.
348       ADD, SUB, ADC, SBB, SMUL, UMUL,
349       OR, XOR, AND,
350 
351       // Bit field extract.
352       BEXTR,
353 
354       // Zero High Bits Starting with Specified Bit Position.
355       BZHI,
356 
357       // X86-specific multiply by immediate.
358       MUL_IMM,
359 
360       // Vector sign bit extraction.
361       MOVMSK,
362 
363       // Vector bitwise comparisons.
364       PTEST,
365 
366       // Vector packed fp sign bitwise comparisons.
367       TESTP,
368 
369       // OR/AND test for masks.
370       KORTEST,
371       KTEST,
372 
373       // ADD for masks.
374       KADD,
375 
376       // Several flavors of instructions with vector shuffle behaviors.
377       // Saturated signed/unnsigned packing.
378       PACKSS,
379       PACKUS,
380       // Intra-lane alignr.
381       PALIGNR,
382       // AVX512 inter-lane alignr.
383       VALIGN,
384       PSHUFD,
385       PSHUFHW,
386       PSHUFLW,
387       SHUFP,
388       // VBMI2 Concat & Shift.
389       VSHLD,
390       VSHRD,
391       VSHLDV,
392       VSHRDV,
393       //Shuffle Packed Values at 128-bit granularity.
394       SHUF128,
395       MOVDDUP,
396       MOVSHDUP,
397       MOVSLDUP,
398       MOVLHPS,
399       MOVHLPS,
400       MOVSD,
401       MOVSS,
402       UNPCKL,
403       UNPCKH,
404       VPERMILPV,
405       VPERMILPI,
406       VPERMI,
407       VPERM2X128,
408 
409       // Variable Permute (VPERM).
410       // Res = VPERMV MaskV, V0
411       VPERMV,
412 
413       // 3-op Variable Permute (VPERMT2).
414       // Res = VPERMV3 V0, MaskV, V1
415       VPERMV3,
416 
417       // Bitwise ternary logic.
418       VPTERNLOG,
419       // Fix Up Special Packed Float32/64 values.
420       VFIXUPIMM,
421       VFIXUPIMMS,
422       // Range Restriction Calculation For Packed Pairs of Float32/64 values.
423       VRANGE, VRANGE_RND, VRANGES, VRANGES_RND,
424       // Reduce - Perform Reduction Transformation on scalar\packed FP.
425       VREDUCE, VREDUCE_RND, VREDUCES, VREDUCES_RND,
426       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
427       // Also used by the legacy (V)ROUND intrinsics where we mask out the
428       // scaling part of the immediate.
429       VRNDSCALE, VRNDSCALE_RND, VRNDSCALES, VRNDSCALES_RND,
430       // Tests Types Of a FP Values for packed types.
431       VFPCLASS,
432       // Tests Types Of a FP Values for scalar types.
433       VFPCLASSS,
434 
435       // Broadcast scalar to vector.
436       VBROADCAST,
437       // Broadcast mask to vector.
438       VBROADCASTM,
439       // Broadcast subvector to vector.
440       SUBV_BROADCAST,
441 
442       /// SSE4A Extraction and Insertion.
443       EXTRQI, INSERTQI,
444 
445       // XOP arithmetic/logical shifts.
446       VPSHA, VPSHL,
447       // XOP signed/unsigned integer comparisons.
448       VPCOM, VPCOMU,
449       // XOP packed permute bytes.
450       VPPERM,
451       // XOP two source permutation.
452       VPERMIL2,
453 
454       // Vector multiply packed unsigned doubleword integers.
455       PMULUDQ,
456       // Vector multiply packed signed doubleword integers.
457       PMULDQ,
458       // Vector Multiply Packed UnsignedIntegers with Round and Scale.
459       MULHRS,
460 
461       // Multiply and Add Packed Integers.
462       VPMADDUBSW, VPMADDWD,
463 
464       // AVX512IFMA multiply and add.
465       // NOTE: These are different than the instruction and perform
466       // op0 x op1 + op2.
467       VPMADD52L, VPMADD52H,
468 
469       // VNNI
470       VPDPBUSD,
471       VPDPBUSDS,
472       VPDPWSSD,
473       VPDPWSSDS,
474 
475       // FMA nodes.
476       // We use the target independent ISD::FMA for the non-inverted case.
477       FNMADD,
478       FMSUB,
479       FNMSUB,
480       FMADDSUB,
481       FMSUBADD,
482 
483       // FMA with rounding mode.
484       FMADD_RND,
485       FNMADD_RND,
486       FMSUB_RND,
487       FNMSUB_RND,
488       FMADDSUB_RND,
489       FMSUBADD_RND,
490 
491       // Compress and expand.
492       COMPRESS,
493       EXPAND,
494 
495       // Bits shuffle
496       VPSHUFBITQMB,
497 
498       // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
499       SINT_TO_FP_RND, UINT_TO_FP_RND,
500       SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
501 
502       // Vector float/double to signed/unsigned integer.
503       CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
504       // Scalar float/double to signed/unsigned integer.
505       CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
506 
507       // Vector float/double to signed/unsigned integer with truncation.
508       CVTTP2SI, CVTTP2UI, CVTTP2SI_RND, CVTTP2UI_RND,
509       // Scalar float/double to signed/unsigned integer with truncation.
510       CVTTS2SI, CVTTS2UI, CVTTS2SI_RND, CVTTS2UI_RND,
511 
512       // Vector signed/unsigned integer to float/double.
513       CVTSI2P, CVTUI2P,
514 
515       // Masked versions of above. Used for v2f64->v4f32.
516       // SRC, PASSTHRU, MASK
517       MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
518 
519       // Save xmm argument registers to the stack, according to %al. An operator
520       // is needed so that this can be expanded with control flow.
521       VASTART_SAVE_XMM_REGS,
522 
523       // Windows's _chkstk call to do stack probing.
524       WIN_ALLOCA,
525 
526       // For allocating variable amounts of stack space when using
527       // segmented stacks. Check if the current stacklet has enough space, and
528       // falls back to heap allocation if not.
529       SEG_ALLOCA,
530 
531       // Memory barriers.
532       MEMBARRIER,
533       MFENCE,
534 
535       // Store FP status word into i16 register.
536       FNSTSW16r,
537 
538       // Store contents of %ah into %eflags.
539       SAHF,
540 
541       // Get a random integer and indicate whether it is valid in CF.
542       RDRAND,
543 
544       // Get a NIST SP800-90B & C compliant random integer and
545       // indicate whether it is valid in CF.
546       RDSEED,
547 
548       // SSE42 string comparisons.
549       // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
550       // will emit one or two instructions based on which results are used. If
551       // flags and index/mask this allows us to use a single instruction since
552       // we won't have to pick and opcode for flags. Instead we can rely on the
553       // DAG to CSE everything and decide at isel.
554       PCMPISTR,
555       PCMPESTR,
556 
557       // Test if in transactional execution.
558       XTEST,
559 
560       // ERI instructions.
561       RSQRT28, RSQRT28S, RCP28, RCP28S, EXP2,
562 
563       // Conversions between float and half-float.
564       CVTPS2PH, CVTPH2PS, CVTPH2PS_RND,
565 
566       // Masked version of above.
567       // SRC, RND, PASSTHRU, MASK
568       MCVTPS2PH,
569 
570       // Galois Field Arithmetic Instructions
571       GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
572 
573       // LWP insert record.
574       LWPINS,
575 
576       // User level wait
577       UMWAIT, TPAUSE,
578 
579       // Compare and swap.
580       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
581       LCMPXCHG8_DAG,
582       LCMPXCHG16_DAG,
583       LCMPXCHG8_SAVE_EBX_DAG,
584       LCMPXCHG16_SAVE_RBX_DAG,
585 
586       /// LOCK-prefixed arithmetic read-modify-write instructions.
587       /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
588       LADD, LSUB, LOR, LXOR, LAND,
589 
590       // Load, scalar_to_vector, and zero extend.
591       VZEXT_LOAD,
592 
593       // Store FP control world into i16 memory.
594       FNSTCW16m,
595 
596       /// This instruction implements FP_TO_SINT with the
597       /// integer destination in memory and a FP reg source.  This corresponds
598       /// to the X86::FIST*m instructions and the rounding mode change stuff. It
599       /// has two inputs (token chain and address) and two outputs (int value
600       /// and token chain).
601       FP_TO_INT16_IN_MEM,
602       FP_TO_INT32_IN_MEM,
603       FP_TO_INT64_IN_MEM,
604 
605       /// This instruction implements SINT_TO_FP with the
606       /// integer source in memory and FP reg result.  This corresponds to the
607       /// X86::FILD*m instructions. It has three inputs (token chain, address,
608       /// and source type) and two outputs (FP value and token chain). FILD_FLAG
609       /// also produces a flag).
610       FILD,
611       FILD_FLAG,
612 
613       /// This instruction implements an extending load to FP stack slots.
614       /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
615       /// operand, ptr to load from, and a ValueType node indicating the type
616       /// to load to.
617       FLD,
618 
619       /// This instruction implements a truncating store to FP stack
620       /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
621       /// chain operand, value to store, address, and a ValueType to store it
622       /// as.
623       FST,
624 
625       /// This instruction grabs the address of the next argument
626       /// from a va_list. (reads and modifies the va_list in memory)
627       VAARG_64,
628 
629       // Vector truncating store with unsigned/signed saturation
630       VTRUNCSTOREUS, VTRUNCSTORES,
631       // Vector truncating masked store with unsigned/signed saturation
632       VMTRUNCSTOREUS, VMTRUNCSTORES,
633 
634       // X86 specific gather and scatter
635       MGATHER, MSCATTER,
636 
637       // WARNING: Do not add anything in the end unless you want the node to
638       // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
639       // opcodes will be thought as target memory ops!
640     };
641   } // end namespace X86ISD
642 
643   /// Define some predicates that are used for node matching.
644   namespace X86 {
645     /// Returns true if Elt is a constant zero or floating point constant +0.0.
646     bool isZeroNode(SDValue Elt);
647 
648     /// Returns true of the given offset can be
649     /// fit into displacement field of the instruction.
650     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
651                                       bool hasSymbolicDisplacement = true);
652 
653     /// Determines whether the callee is required to pop its
654     /// own arguments. Callee pop is necessary to support tail calls.
655     bool isCalleePop(CallingConv::ID CallingConv,
656                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
657 
658   } // end namespace X86
659 
660   //===--------------------------------------------------------------------===//
661   //  X86 Implementation of the TargetLowering interface
662   class X86TargetLowering final : public TargetLowering {
663   public:
664     explicit X86TargetLowering(const X86TargetMachine &TM,
665                                const X86Subtarget &STI);
666 
667     unsigned getJumpTableEncoding() const override;
668     bool useSoftFloat() const override;
669 
670     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
671                                ArgListTy &Args) const override;
672 
getScalarShiftAmountTy(const DataLayout &,EVT VT)673     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
674       return MVT::i8;
675     }
676 
677     const MCExpr *
678     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
679                               const MachineBasicBlock *MBB, unsigned uid,
680                               MCContext &Ctx) const override;
681 
682     /// Returns relocation base for the given PIC jumptable.
683     SDValue getPICJumpTableRelocBase(SDValue Table,
684                                      SelectionDAG &DAG) const override;
685     const MCExpr *
686     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
687                                  unsigned JTI, MCContext &Ctx) const override;
688 
689     /// Return the desired alignment for ByVal aggregate
690     /// function arguments in the caller parameter area. For X86, aggregates
691     /// that contains are placed at 16-byte boundaries while the rest are at
692     /// 4-byte boundaries.
693     unsigned getByValTypeAlignment(Type *Ty,
694                                    const DataLayout &DL) const override;
695 
696     /// Returns the target specific optimal type for load
697     /// and store operations as a result of memset, memcpy, and memmove
698     /// lowering. If DstAlign is zero that means it's safe to destination
699     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
700     /// means there isn't a need to check it against alignment requirement,
701     /// probably because the source does not need to be loaded. If 'IsMemset' is
702     /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
703     /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
704     /// source is constant so it does not need to be loaded.
705     /// It returns EVT::Other if the type should be determined using generic
706     /// target-independent logic.
707     EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
708                             bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
709                             MachineFunction &MF) const override;
710 
711     /// Returns true if it's safe to use load / store of the
712     /// specified type to expand memcpy / memset inline. This is mostly true
713     /// for all types except for some special cases. For example, on X86
714     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
715     /// also does type conversion. Note the specified type doesn't have to be
716     /// legal as the hook is used before type legalization.
717     bool isSafeMemOpType(MVT VT) const override;
718 
719     /// Returns true if the target allows unaligned memory accesses of the
720     /// specified type. Returns whether it is "fast" in the last argument.
721     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
722                                        bool *Fast) const override;
723 
724     /// Provide custom lowering hooks for some operations.
725     ///
726     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
727 
728     /// Places new result values for the node in Results (their number
729     /// and types must exactly match those of the original return values of
730     /// the node), or leaves Results empty, which indicates that the node is not
731     /// to be custom lowered after all.
732     void LowerOperationWrapper(SDNode *N,
733                                SmallVectorImpl<SDValue> &Results,
734                                SelectionDAG &DAG) const override;
735 
736     /// Replace the results of node with an illegal result
737     /// type with new values built out of custom code.
738     ///
739     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
740                             SelectionDAG &DAG) const override;
741 
742     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
743 
744     // Return true if it is profitable to combine a BUILD_VECTOR with a
745     // stride-pattern to a shuffle and a truncate.
746     // Example of such a combine:
747     // v4i32 build_vector((extract_elt V, 1),
748     //                    (extract_elt V, 3),
749     //                    (extract_elt V, 5),
750     //                    (extract_elt V, 7))
751     //  -->
752     // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
753     // v4i64)
754     bool isDesirableToCombineBuildVectorToShuffleTruncate(
755         ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
756 
757     /// Return true if the target has native support for
758     /// the specified value type and it is 'desirable' to use the type for the
759     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
760     /// instruction encodings are longer and some i16 instructions are slow.
761     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
762 
763     /// Return true if the target has native support for the
764     /// specified value type and it is 'desirable' to use the type. e.g. On x86
765     /// i16 is legal, but undesirable since i16 instruction encodings are longer
766     /// and some i16 instructions are slow.
767     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
768 
769     MachineBasicBlock *
770     EmitInstrWithCustomInserter(MachineInstr &MI,
771                                 MachineBasicBlock *MBB) const override;
772 
773     /// This method returns the name of a target specific DAG node.
774     const char *getTargetNodeName(unsigned Opcode) const override;
775 
mergeStoresAfterLegalization()776     bool mergeStoresAfterLegalization() const override { return true; }
777 
778     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
779                           const SelectionDAG &DAG) const override;
780 
781     bool isCheapToSpeculateCttz() const override;
782 
783     bool isCheapToSpeculateCtlz() const override;
784 
785     bool isCtlzFast() const override;
786 
hasBitPreservingFPLogic(EVT VT)787     bool hasBitPreservingFPLogic(EVT VT) const override {
788       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
789     }
790 
isMultiStoresCheaperThanBitsMerge(EVT LTy,EVT HTy)791     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
792       // If the pair to store is a mixture of float and int values, we will
793       // save two bitwise instructions and one float-to-int instruction and
794       // increase one store instruction. There is potentially a more
795       // significant benefit because it avoids the float->int domain switch
796       // for input value. So It is more likely a win.
797       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
798           (LTy.isInteger() && HTy.isFloatingPoint()))
799         return true;
800       // If the pair only contains int values, we will save two bitwise
801       // instructions and increase one store instruction (costing one more
802       // store buffer). Since the benefit is more blurred so we leave
803       // such pair out until we get testcase to prove it is a win.
804       return false;
805     }
806 
807     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
808 
809     bool hasAndNotCompare(SDValue Y) const override;
810 
811     bool hasAndNot(SDValue Y) const override;
812 
813     bool preferShiftsToClearExtremeBits(SDValue Y) const override;
814 
815     bool
shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)816     shouldTransformSignedTruncationCheck(EVT XVT,
817                                          unsigned KeptBits) const override {
818       // For vectors, we don't have a preference..
819       if (XVT.isVector())
820         return false;
821 
822       auto VTIsOk = [](EVT VT) -> bool {
823         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
824                VT == MVT::i64;
825       };
826 
827       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
828       // XVT will be larger than KeptBitsVT.
829       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
830       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
831     }
832 
833     bool shouldSplatInsEltVarIndex(EVT VT) const override;
834 
convertSetCCLogicToBitwiseLogic(EVT VT)835     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
836       return VT.isScalarInteger();
837     }
838 
839     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
840     MVT hasFastEqualityCompare(unsigned NumBits) const override;
841 
842     /// Allow multiple load pairs per block for smaller and faster code.
getMemcmpEqZeroLoadsPerBlock()843     unsigned getMemcmpEqZeroLoadsPerBlock() const override {
844       return 2;
845     }
846 
847     /// Return the value type to use for ISD::SETCC.
848     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
849                            EVT VT) const override;
850 
851     bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
852                                       TargetLoweringOpt &TLO) const override;
853 
854     /// Determine which of the bits specified in Mask are known to be either
855     /// zero or one and return them in the KnownZero/KnownOne bitsets.
856     void computeKnownBitsForTargetNode(const SDValue Op,
857                                        KnownBits &Known,
858                                        const APInt &DemandedElts,
859                                        const SelectionDAG &DAG,
860                                        unsigned Depth = 0) const override;
861 
862     /// Determine the number of bits in the operation that are sign bits.
863     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
864                                              const APInt &DemandedElts,
865                                              const SelectionDAG &DAG,
866                                              unsigned Depth) const override;
867 
868     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
869                                                  const APInt &DemandedElts,
870                                                  APInt &KnownUndef,
871                                                  APInt &KnownZero,
872                                                  TargetLoweringOpt &TLO,
873                                                  unsigned Depth) const override;
874 
875     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
876                                            const APInt &DemandedBits,
877                                            const APInt &DemandedElts,
878                                            KnownBits &Known,
879                                            TargetLoweringOpt &TLO,
880                                            unsigned Depth) const override;
881 
882     SDValue unwrapAddress(SDValue N) const override;
883 
884     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
885 
886     bool ExpandInlineAsm(CallInst *CI) const override;
887 
888     ConstraintType getConstraintType(StringRef Constraint) const override;
889 
890     /// Examine constraint string and operand type and determine a weight value.
891     /// The operand object must already have been set up with the operand type.
892     ConstraintWeight
893       getSingleConstraintMatchWeight(AsmOperandInfo &info,
894                                      const char *constraint) const override;
895 
896     const char *LowerXConstraint(EVT ConstraintVT) const override;
897 
898     /// Lower the specified operand into the Ops vector. If it is invalid, don't
899     /// add anything to Ops. If hasMemory is true it means one of the asm
900     /// constraint of the inline asm instruction being processed is 'm'.
901     void LowerAsmOperandForConstraint(SDValue Op,
902                                       std::string &Constraint,
903                                       std::vector<SDValue> &Ops,
904                                       SelectionDAG &DAG) const override;
905 
906     unsigned
getInlineAsmMemConstraint(StringRef ConstraintCode)907     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
908       if (ConstraintCode == "i")
909         return InlineAsm::Constraint_i;
910       else if (ConstraintCode == "o")
911         return InlineAsm::Constraint_o;
912       else if (ConstraintCode == "v")
913         return InlineAsm::Constraint_v;
914       else if (ConstraintCode == "X")
915         return InlineAsm::Constraint_X;
916       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
917     }
918 
919     /// Given a physical register constraint
920     /// (e.g. {edx}), return the register number and the register class for the
921     /// register.  This should only be used for C_Register constraints.  On
922     /// error, this returns a register number of 0.
923     std::pair<unsigned, const TargetRegisterClass *>
924     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
925                                  StringRef Constraint, MVT VT) const override;
926 
927     /// Return true if the addressing mode represented
928     /// by AM is legal for this target, for a load/store of the specified type.
929     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
930                                Type *Ty, unsigned AS,
931                                Instruction *I = nullptr) const override;
932 
933     /// Return true if the specified immediate is legal
934     /// icmp immediate, that is the target has icmp instructions which can
935     /// compare a register against the immediate without having to materialize
936     /// the immediate into a register.
937     bool isLegalICmpImmediate(int64_t Imm) const override;
938 
939     /// Return true if the specified immediate is legal
940     /// add immediate, that is the target has add instructions which can
941     /// add a register and the immediate without having to materialize
942     /// the immediate into a register.
943     bool isLegalAddImmediate(int64_t Imm) const override;
944 
945     bool isLegalStoreImmediate(int64_t Imm) const override;
946 
947     /// Return the cost of the scaling factor used in the addressing
948     /// mode represented by AM for this target, for a load/store
949     /// of the specified type.
950     /// If the AM is supported, the return value must be >= 0.
951     /// If the AM is not supported, it returns a negative value.
952     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
953                              unsigned AS) const override;
954 
955     bool isVectorShiftByScalarCheap(Type *Ty) const override;
956 
957     /// Return true if it's free to truncate a value of
958     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
959     /// register EAX to i16 by referencing its sub-register AX.
960     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
961     bool isTruncateFree(EVT VT1, EVT VT2) const override;
962 
963     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
964 
965     /// Return true if any actual instruction that defines a
966     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
967     /// register. This does not necessarily include registers defined in
968     /// unknown ways, such as incoming arguments, or copies from unknown
969     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
970     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
971     /// all instructions that define 32-bit values implicit zero-extend the
972     /// result out to 64 bits.
973     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
974     bool isZExtFree(EVT VT1, EVT VT2) const override;
975     bool isZExtFree(SDValue Val, EVT VT2) const override;
976 
977     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
978     /// extend node) is profitable.
979     bool isVectorLoadExtDesirable(SDValue) const override;
980 
981     /// Return true if an FMA operation is faster than a pair of fmul and fadd
982     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
983     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
984     bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
985 
986     /// Return true if it's profitable to narrow
987     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
988     /// from i32 to i8 but not from i32 to i16.
989     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
990 
991     /// Given an intrinsic, checks if on the target the intrinsic will need to map
992     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
993     /// true and stores the intrinsic information into the IntrinsicInfo that was
994     /// passed to the function.
995     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
996                             MachineFunction &MF,
997                             unsigned Intrinsic) const override;
998 
999     /// Returns true if the target can instruction select the
1000     /// specified FP immediate natively. If false, the legalizer will
1001     /// materialize the FP immediate as a load from a constant pool.
1002     bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
1003 
1004     /// Targets can use this to indicate that they only support *some*
1005     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1006     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1007     /// be legal.
1008     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1009 
1010     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1011     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1012     /// constant pool entry.
1013     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1014 
1015     /// Returns true if lowering to a jump table is allowed.
1016     bool areJTsAllowed(const Function *Fn) const override;
1017 
1018     /// If true, then instruction selection should
1019     /// seek to shrink the FP constant of the specified type to a smaller type
1020     /// in order to save space and / or reduce runtime.
ShouldShrinkFPConstant(EVT VT)1021     bool ShouldShrinkFPConstant(EVT VT) const override {
1022       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1023       // expensive than a straight movsd. On the other hand, it's important to
1024       // shrink long double fp constant since fldt is very slow.
1025       return !X86ScalarSSEf64 || VT == MVT::f80;
1026     }
1027 
1028     /// Return true if we believe it is correct and profitable to reduce the
1029     /// load node to a smaller type.
1030     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1031                                EVT NewVT) const override;
1032 
1033     /// Return true if the specified scalar FP type is computed in an SSE
1034     /// register, not on the X87 floating point stack.
isScalarFPTypeInSSEReg(EVT VT)1035     bool isScalarFPTypeInSSEReg(EVT VT) const {
1036       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1037              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1038     }
1039 
1040     /// Returns true if it is beneficial to convert a load of a constant
1041     /// to just the constant itself.
1042     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1043                                            Type *Ty) const override;
1044 
1045     bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override;
1046 
1047     bool convertSelectOfConstantsToMath(EVT VT) const override;
1048 
1049     bool decomposeMulByConstant(EVT VT, SDValue C) const override;
1050 
1051     bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1052                                   bool IsSigned) const override;
1053 
1054     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1055     /// with this index.
1056     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1057                                  unsigned Index) const override;
1058 
1059     /// Scalar ops always have equal or better analysis/performance/power than
1060     /// the vector equivalent, so this always makes sense if the scalar op is
1061     /// supported.
shouldScalarizeBinop(SDValue)1062     bool shouldScalarizeBinop(SDValue) const override;
1063 
1064     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1065                                       unsigned AddrSpace) const override {
1066       // If we can replace more than 2 scalar stores, there will be a reduction
1067       // in instructions even after we add a vector constant load.
1068       return NumElem > 2;
1069     }
1070 
1071     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override;
1072 
1073     /// Intel processors have a unified instruction and data cache
getClearCacheBuiltinName()1074     const char * getClearCacheBuiltinName() const override {
1075       return nullptr; // nothing to do, move along.
1076     }
1077 
1078     unsigned getRegisterByName(const char* RegName, EVT VT,
1079                                SelectionDAG &DAG) const override;
1080 
1081     /// If a physical register, this returns the register that receives the
1082     /// exception address on entry to an EH pad.
1083     unsigned
1084     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1085 
1086     /// If a physical register, this returns the register that receives the
1087     /// exception typeid on entry to a landing pad.
1088     unsigned
1089     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1090 
1091     virtual bool needsFixedCatchObjects() const override;
1092 
1093     /// This method returns a target specific FastISel object,
1094     /// or null if the target does not support "fast" ISel.
1095     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1096                              const TargetLibraryInfo *libInfo) const override;
1097 
1098     /// If the target has a standard location for the stack protector cookie,
1099     /// returns the address of that location. Otherwise, returns nullptr.
1100     Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1101 
1102     bool useLoadStackGuardNode() const override;
1103     bool useStackGuardXorFP() const override;
1104     void insertSSPDeclarations(Module &M) const override;
1105     Value *getSDagStackGuard(const Module &M) const override;
1106     Value *getSSPStackGuardCheck(const Module &M) const override;
1107     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1108                                 const SDLoc &DL) const override;
1109 
1110 
1111     /// Return true if the target stores SafeStack pointer at a fixed offset in
1112     /// some non-standard address space, and populates the address space and
1113     /// offset as appropriate.
1114     Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1115 
1116     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1117                       SelectionDAG &DAG) const;
1118 
1119     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1120 
1121     /// Customize the preferred legalization strategy for certain types.
1122     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1123 
1124     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1125                                       EVT VT) const override;
1126 
1127     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1128                                            CallingConv::ID CC,
1129                                            EVT VT) const override;
1130 
1131     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1132 
1133     bool supportSwiftError() const override;
1134 
1135     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1136 
hasVectorBlend()1137     bool hasVectorBlend() const override { return true; }
1138 
getMaxSupportedInterleaveFactor()1139     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1140 
1141     /// Lower interleaved load(s) into target specific
1142     /// instructions/intrinsics.
1143     bool lowerInterleavedLoad(LoadInst *LI,
1144                               ArrayRef<ShuffleVectorInst *> Shuffles,
1145                               ArrayRef<unsigned> Indices,
1146                               unsigned Factor) const override;
1147 
1148     /// Lower interleaved store(s) into target specific
1149     /// instructions/intrinsics.
1150     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1151                                unsigned Factor) const override;
1152 
1153     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1154                                    SDValue Addr, SelectionDAG &DAG)
1155                                    const override;
1156 
1157   protected:
1158     std::pair<const TargetRegisterClass *, uint8_t>
1159     findRepresentativeClass(const TargetRegisterInfo *TRI,
1160                             MVT VT) const override;
1161 
1162   private:
1163     /// Keep a reference to the X86Subtarget around so that we can
1164     /// make the right decision when generating code for different targets.
1165     const X86Subtarget &Subtarget;
1166 
1167     /// Select between SSE or x87 floating point ops.
1168     /// When SSE is available, use it for f32 operations.
1169     /// When SSE2 is available, use it for f64 operations.
1170     bool X86ScalarSSEf32;
1171     bool X86ScalarSSEf64;
1172 
1173     /// A list of legal FP immediates.
1174     std::vector<APFloat> LegalFPImmediates;
1175 
1176     /// Indicate that this x86 target can instruction
1177     /// select the specified FP immediate natively.
addLegalFPImmediate(const APFloat & Imm)1178     void addLegalFPImmediate(const APFloat& Imm) {
1179       LegalFPImmediates.push_back(Imm);
1180     }
1181 
1182     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1183                             CallingConv::ID CallConv, bool isVarArg,
1184                             const SmallVectorImpl<ISD::InputArg> &Ins,
1185                             const SDLoc &dl, SelectionDAG &DAG,
1186                             SmallVectorImpl<SDValue> &InVals,
1187                             uint32_t *RegMask) const;
1188     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1189                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1190                              const SDLoc &dl, SelectionDAG &DAG,
1191                              const CCValAssign &VA, MachineFrameInfo &MFI,
1192                              unsigned i) const;
1193     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1194                              const SDLoc &dl, SelectionDAG &DAG,
1195                              const CCValAssign &VA,
1196                              ISD::ArgFlagsTy Flags) const;
1197 
1198     // Call lowering helpers.
1199 
1200     /// Check whether the call is eligible for tail call optimization. Targets
1201     /// that want to do tail call optimization should implement this function.
1202     bool IsEligibleForTailCallOptimization(SDValue Callee,
1203                                            CallingConv::ID CalleeCC,
1204                                            bool isVarArg,
1205                                            bool isCalleeStructRet,
1206                                            bool isCallerStructRet,
1207                                            Type *RetTy,
1208                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1209                                     const SmallVectorImpl<SDValue> &OutVals,
1210                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1211                                            SelectionDAG& DAG) const;
1212     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1213                                     SDValue Chain, bool IsTailCall,
1214                                     bool Is64Bit, int FPDiff,
1215                                     const SDLoc &dl) const;
1216 
1217     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1218                                          SelectionDAG &DAG) const;
1219 
1220     unsigned getAddressSpace(void) const;
1221 
1222     std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
1223                                                bool isSigned,
1224                                                bool isReplace) const;
1225 
1226     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1227     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1228     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1229     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1230 
1231     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1232                                   const unsigned char OpFlags = 0) const;
1233     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1234     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1235     SDValue LowerGlobalAddress(const GlobalValue *GV, const SDLoc &dl,
1236                                int64_t Offset, SelectionDAG &DAG) const;
1237     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1238     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1239     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1240 
1241     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1242     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1243     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1244     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1245     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1246     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1247     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1248     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1249     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1250     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1251     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1252     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1253     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1254     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1255     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1256     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1257     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1258     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1259     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1260     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1261     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1262     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1263     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1264     SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1265     SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1266     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1267 
1268     SDValue
1269     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1270                          const SmallVectorImpl<ISD::InputArg> &Ins,
1271                          const SDLoc &dl, SelectionDAG &DAG,
1272                          SmallVectorImpl<SDValue> &InVals) const override;
1273     SDValue LowerCall(CallLoweringInfo &CLI,
1274                       SmallVectorImpl<SDValue> &InVals) const override;
1275 
1276     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1277                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1278                         const SmallVectorImpl<SDValue> &OutVals,
1279                         const SDLoc &dl, SelectionDAG &DAG) const override;
1280 
supportSplitCSR(MachineFunction * MF)1281     bool supportSplitCSR(MachineFunction *MF) const override {
1282       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1283           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1284     }
1285     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1286     void insertCopiesSplitCSR(
1287       MachineBasicBlock *Entry,
1288       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1289 
1290     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1291 
1292     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1293 
1294     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1295                             ISD::NodeType ExtendKind) const override;
1296 
1297     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1298                         bool isVarArg,
1299                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1300                         LLVMContext &Context) const override;
1301 
1302     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1303 
1304     TargetLoweringBase::AtomicExpansionKind
1305     shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1306     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1307     TargetLoweringBase::AtomicExpansionKind
1308     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1309 
1310     LoadInst *
1311     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1312 
1313     bool needsCmpXchgNb(Type *MemType) const;
1314 
1315     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1316                                 MachineBasicBlock *DispatchBB, int FI) const;
1317 
1318     // Utility function to emit the low-level va_arg code for X86-64.
1319     MachineBasicBlock *
1320     EmitVAARG64WithCustomInserter(MachineInstr &MI,
1321                                   MachineBasicBlock *MBB) const;
1322 
1323     /// Utility function to emit the xmm reg save portion of va_start.
1324     MachineBasicBlock *
1325     EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1326                                              MachineBasicBlock *BB) const;
1327 
1328     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1329                                                  MachineInstr &MI2,
1330                                                  MachineBasicBlock *BB) const;
1331 
1332     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1333                                          MachineBasicBlock *BB) const;
1334 
1335     MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1336                                            MachineBasicBlock *BB) const;
1337 
1338     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1339                                            MachineBasicBlock *BB) const;
1340 
1341     MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1342                                            MachineBasicBlock *BB) const;
1343 
1344     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1345                                             MachineBasicBlock *BB) const;
1346 
1347     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1348                                           MachineBasicBlock *BB) const;
1349 
1350     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1351                                           MachineBasicBlock *BB) const;
1352 
1353     MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1354                                             MachineBasicBlock *BB) const;
1355 
1356     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1357                                         MachineBasicBlock *MBB) const;
1358 
1359     void emitSetJmpShadowStackFix(MachineInstr &MI,
1360                                   MachineBasicBlock *MBB) const;
1361 
1362     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1363                                          MachineBasicBlock *MBB) const;
1364 
1365     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1366                                                  MachineBasicBlock *MBB) const;
1367 
1368     MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1369                                      MachineBasicBlock *MBB) const;
1370 
1371     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1372                                              MachineBasicBlock *MBB) const;
1373 
1374     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1375     /// equivalent, for use with the given x86 condition code.
1376     SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1377                     SelectionDAG &DAG) const;
1378 
1379     /// Convert a comparison if required by the subtarget.
1380     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1381 
1382     /// Emit flags for the given setcc condition and operands. Also returns the
1383     /// corresponding X86 condition code constant in X86CC.
1384     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1,
1385                               ISD::CondCode CC, const SDLoc &dl,
1386                               SelectionDAG &DAG,
1387                               SDValue &X86CC) const;
1388 
1389     /// Check if replacement of SQRT with RSQRT should be disabled.
1390     bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1391 
1392     /// Use rsqrt* to speed up sqrt calculations.
1393     SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1394                             int &RefinementSteps, bool &UseOneConstNR,
1395                             bool Reciprocal) const override;
1396 
1397     /// Use rcp* to speed up fdiv calculations.
1398     SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1399                              int &RefinementSteps) const override;
1400 
1401     /// Reassociate floating point divisions into multiply by reciprocal.
1402     unsigned combineRepeatedFPDivisors() const override;
1403   };
1404 
1405   namespace X86 {
1406     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1407                              const TargetLibraryInfo *libInfo);
1408   } // end namespace X86
1409 
1410   // Base class for all X86 non-masked store operations.
1411   class X86StoreSDNode : public MemSDNode {
1412   public:
X86StoreSDNode(unsigned Opcode,unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1413     X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1414                    SDVTList VTs, EVT MemVT,
1415                    MachineMemOperand *MMO)
1416       :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
getValue()1417     const SDValue &getValue() const { return getOperand(1); }
getBasePtr()1418     const SDValue &getBasePtr() const { return getOperand(2); }
1419 
classof(const SDNode * N)1420     static bool classof(const SDNode *N) {
1421       return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1422         N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1423     }
1424   };
1425 
1426   // Base class for all X86 masked store operations.
1427   // The class has the same order of operands as MaskedStoreSDNode for
1428   // convenience.
1429   class X86MaskedStoreSDNode : public MemSDNode {
1430   public:
X86MaskedStoreSDNode(unsigned Opcode,unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1431     X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1432                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1433                          MachineMemOperand *MMO)
1434       : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1435 
getValue()1436     const SDValue &getValue()   const { return getOperand(1); }
getBasePtr()1437     const SDValue &getBasePtr() const { return getOperand(2); }
getMask()1438     const SDValue &getMask()    const { return getOperand(3); }
1439 
classof(const SDNode * N)1440     static bool classof(const SDNode *N) {
1441       return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1442         N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1443     }
1444   };
1445 
1446   // X86 Truncating Store with Signed saturation.
1447   class TruncSStoreSDNode : public X86StoreSDNode {
1448   public:
TruncSStoreSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1449     TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1450                         SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1451       : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1452 
classof(const SDNode * N)1453     static bool classof(const SDNode *N) {
1454       return N->getOpcode() == X86ISD::VTRUNCSTORES;
1455     }
1456   };
1457 
1458   // X86 Truncating Store with Unsigned saturation.
1459   class TruncUSStoreSDNode : public X86StoreSDNode {
1460   public:
TruncUSStoreSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1461     TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1462                       SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1463       : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1464 
classof(const SDNode * N)1465     static bool classof(const SDNode *N) {
1466       return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1467     }
1468   };
1469 
1470   // X86 Truncating Masked Store with Signed saturation.
1471   class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1472   public:
MaskedTruncSStoreSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1473     MaskedTruncSStoreSDNode(unsigned Order,
1474                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1475                          MachineMemOperand *MMO)
1476       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1477 
classof(const SDNode * N)1478     static bool classof(const SDNode *N) {
1479       return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1480     }
1481   };
1482 
1483   // X86 Truncating Masked Store with Unsigned saturation.
1484   class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1485   public:
MaskedTruncUSStoreSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1486     MaskedTruncUSStoreSDNode(unsigned Order,
1487                             const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1488                             MachineMemOperand *MMO)
1489       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1490 
classof(const SDNode * N)1491     static bool classof(const SDNode *N) {
1492       return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1493     }
1494   };
1495 
1496   // X86 specific Gather/Scatter nodes.
1497   // The class has the same order of operands as MaskedGatherScatterSDNode for
1498   // convenience.
1499   class X86MaskedGatherScatterSDNode : public MemSDNode {
1500   public:
X86MaskedGatherScatterSDNode(unsigned Opc,unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1501     X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1502                                  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1503                                  MachineMemOperand *MMO)
1504         : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1505 
getBasePtr()1506     const SDValue &getBasePtr() const { return getOperand(3); }
getIndex()1507     const SDValue &getIndex()   const { return getOperand(4); }
getMask()1508     const SDValue &getMask()    const { return getOperand(2); }
getScale()1509     const SDValue &getScale()   const { return getOperand(5); }
1510 
classof(const SDNode * N)1511     static bool classof(const SDNode *N) {
1512       return N->getOpcode() == X86ISD::MGATHER ||
1513              N->getOpcode() == X86ISD::MSCATTER;
1514     }
1515   };
1516 
1517   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1518   public:
X86MaskedGatherSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1519     X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1520                           EVT MemVT, MachineMemOperand *MMO)
1521         : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1522                                        MMO) {}
1523 
getPassThru()1524     const SDValue &getPassThru() const { return getOperand(1); }
1525 
classof(const SDNode * N)1526     static bool classof(const SDNode *N) {
1527       return N->getOpcode() == X86ISD::MGATHER;
1528     }
1529   };
1530 
1531   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1532   public:
X86MaskedScatterSDNode(unsigned Order,const DebugLoc & dl,SDVTList VTs,EVT MemVT,MachineMemOperand * MMO)1533     X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1534                            EVT MemVT, MachineMemOperand *MMO)
1535         : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1536                                        MMO) {}
1537 
getValue()1538     const SDValue &getValue() const { return getOperand(1); }
1539 
classof(const SDNode * N)1540     static bool classof(const SDNode *N) {
1541       return N->getOpcode() == X86ISD::MSCATTER;
1542     }
1543   };
1544 
1545   /// Generate unpacklo/unpackhi shuffle mask.
1546   template <typename T = int>
createUnpackShuffleMask(MVT VT,SmallVectorImpl<T> & Mask,bool Lo,bool Unary)1547   void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1548                                bool Unary) {
1549     assert(Mask.empty() && "Expected an empty shuffle mask vector");
1550     int NumElts = VT.getVectorNumElements();
1551     int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1552     for (int i = 0; i < NumElts; ++i) {
1553       unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1554       int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1555       Pos += (Unary ? 0 : NumElts * (i % 2));
1556       Pos += (Lo ? 0 : NumEltsInLane / 2);
1557       Mask.push_back(Pos);
1558     }
1559   }
1560 
1561   /// Helper function to scale a shuffle or target shuffle mask, replacing each
1562   /// mask index with the scaled sequential indices for an equivalent narrowed
1563   /// mask. This is the reverse process to canWidenShuffleElements, but can
1564   /// always succeed.
1565   template <typename T>
scaleShuffleMask(int Scale,ArrayRef<T> Mask,SmallVectorImpl<T> & ScaledMask)1566   void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
1567                         SmallVectorImpl<T> &ScaledMask) {
1568     assert(0 < Scale && "Unexpected scaling factor");
1569     int NumElts = Mask.size();
1570     ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1);
1571 
1572     for (int i = 0; i != NumElts; ++i) {
1573       int M = Mask[i];
1574 
1575       // Repeat sentinel values in every mask element.
1576       if (M < 0) {
1577         for (int s = 0; s != Scale; ++s)
1578           ScaledMask[(Scale * i) + s] = M;
1579         continue;
1580       }
1581 
1582       // Scale mask element and increment across each mask element.
1583       for (int s = 0; s != Scale; ++s)
1584         ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1585     }
1586   }
1587 } // end namespace llvm
1588 
1589 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1590