1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that X86 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16 
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/TargetLowering.h"
19 
20 namespace llvm {
21   class X86Subtarget;
22   class X86TargetMachine;
23 
24   namespace X86ISD {
25     // X86 Specific DAG Nodes
26   enum NodeType : unsigned {
27     // Start the numbering where the builtin ops leave off.
28     FIRST_NUMBER = ISD::BUILTIN_OP_END,
29 
30     /// Bit scan forward.
31     BSF,
32     /// Bit scan reverse.
33     BSR,
34 
35     /// X86 funnel/double shift i16 instructions. These correspond to
36     /// X86::SHLDW and X86::SHRDW instructions which have different amt
37     /// modulo rules to generic funnel shifts.
38     /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
39     FSHL,
40     FSHR,
41 
42     /// Bitwise logical AND of floating point values. This corresponds
43     /// to X86::ANDPS or X86::ANDPD.
44     FAND,
45 
46     /// Bitwise logical OR of floating point values. This corresponds
47     /// to X86::ORPS or X86::ORPD.
48     FOR,
49 
50     /// Bitwise logical XOR of floating point values. This corresponds
51     /// to X86::XORPS or X86::XORPD.
52     FXOR,
53 
54     ///  Bitwise logical ANDNOT of floating point values. This
55     /// corresponds to X86::ANDNPS or X86::ANDNPD.
56     FANDN,
57 
58     /// These operations represent an abstract X86 call
59     /// instruction, which includes a bunch of information.  In particular the
60     /// operands of these node are:
61     ///
62     ///     #0 - The incoming token chain
63     ///     #1 - The callee
64     ///     #2 - The number of arg bytes the caller pushes on the stack.
65     ///     #3 - The number of arg bytes the callee pops off the stack.
66     ///     #4 - The value to pass in AL/AX/EAX (optional)
67     ///     #5 - The value to pass in DL/DX/EDX (optional)
68     ///
69     /// The result values of these nodes are:
70     ///
71     ///     #0 - The outgoing token chain
72     ///     #1 - The first register result value (optional)
73     ///     #2 - The second register result value (optional)
74     ///
75     CALL,
76 
77     /// Same as call except it adds the NoTrack prefix.
78     NT_CALL,
79 
80     // Pseudo for a OBJC call that gets emitted together with a special
81     // marker instruction.
82     CALL_RVMARKER,
83 
84     /// X86 compare and logical compare instructions.
85     CMP,
86     FCMP,
87     COMI,
88     UCOMI,
89 
90     /// X86 bit-test instructions.
91     BT,
92 
93     /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
94     /// operand, usually produced by a CMP instruction.
95     SETCC,
96 
97     /// X86 Select
98     SELECTS,
99 
100     // Same as SETCC except it's materialized with a sbb and the value is all
101     // one's or all zero's.
102     SETCC_CARRY, // R = carry_bit ? ~0 : 0
103 
104     /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
105     /// Operands are two FP values to compare; result is a mask of
106     /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
107     FSETCC,
108 
109     /// X86 FP SETCC, similar to above, but with output as an i1 mask and
110     /// and a version with SAE.
111     FSETCCM,
112     FSETCCM_SAE,
113 
114     /// X86 conditional moves. Operand 0 and operand 1 are the two values
115     /// to select from. Operand 2 is the condition code, and operand 3 is the
116     /// flag operand produced by a CMP or TEST instruction.
117     CMOV,
118 
119     /// X86 conditional branches. Operand 0 is the chain operand, operand 1
120     /// is the block to branch if condition is true, operand 2 is the
121     /// condition code, and operand 3 is the flag operand produced by a CMP
122     /// or TEST instruction.
123     BRCOND,
124 
125     /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
126     /// operand 1 is the target address.
127     NT_BRIND,
128 
129     /// Return with a glue operand. Operand 0 is the chain operand, operand
130     /// 1 is the number of bytes of stack to pop.
131     RET_GLUE,
132 
133     /// Return from interrupt. Operand 0 is the number of bytes to pop.
134     IRET,
135 
136     /// Repeat fill, corresponds to X86::REP_STOSx.
137     REP_STOS,
138 
139     /// Repeat move, corresponds to X86::REP_MOVSx.
140     REP_MOVS,
141 
142     /// On Darwin, this node represents the result of the popl
143     /// at function entry, used for PIC code.
144     GlobalBaseReg,
145 
146     /// A wrapper node for TargetConstantPool, TargetJumpTable,
147     /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
148     /// MCSymbol and TargetBlockAddress.
149     Wrapper,
150 
151     /// Special wrapper used under X86-64 PIC mode for RIP
152     /// relative displacements.
153     WrapperRIP,
154 
155     /// Copies a 64-bit value from an MMX vector to the low word
156     /// of an XMM vector, with the high word zero filled.
157     MOVQ2DQ,
158 
159     /// Copies a 64-bit value from the low word of an XMM vector
160     /// to an MMX vector.
161     MOVDQ2Q,
162 
163     /// Copies a 32-bit value from the low word of a MMX
164     /// vector to a GPR.
165     MMX_MOVD2W,
166 
167     /// Copies a GPR into the low 32-bit word of a MMX vector
168     /// and zero out the high word.
169     MMX_MOVW2D,
170 
171     /// Extract an 8-bit value from a vector and zero extend it to
172     /// i32, corresponds to X86::PEXTRB.
173     PEXTRB,
174 
175     /// Extract a 16-bit value from a vector and zero extend it to
176     /// i32, corresponds to X86::PEXTRW.
177     PEXTRW,
178 
179     /// Insert any element of a 4 x float vector into any element
180     /// of a destination 4 x floatvector.
181     INSERTPS,
182 
183     /// Insert the lower 8-bits of a 32-bit value to a vector,
184     /// corresponds to X86::PINSRB.
185     PINSRB,
186 
187     /// Insert the lower 16-bits of a 32-bit value to a vector,
188     /// corresponds to X86::PINSRW.
189     PINSRW,
190 
191     /// Shuffle 16 8-bit values within a vector.
192     PSHUFB,
193 
194     /// Compute Sum of Absolute Differences.
195     PSADBW,
196     /// Compute Double Block Packed Sum-Absolute-Differences
197     DBPSADBW,
198 
199     /// Bitwise Logical AND NOT of Packed FP values.
200     ANDNP,
201 
202     /// Blend where the selector is an immediate.
203     BLENDI,
204 
205     /// Dynamic (non-constant condition) vector blend where only the sign bits
206     /// of the condition elements are used. This is used to enforce that the
207     /// condition mask is not valid for generic VSELECT optimizations. This
208     /// is also used to implement the intrinsics.
209     /// Operands are in VSELECT order: MASK, TRUE, FALSE
210     BLENDV,
211 
212     /// Combined add and sub on an FP vector.
213     ADDSUB,
214 
215     //  FP vector ops with rounding mode.
216     FADD_RND,
217     FADDS,
218     FADDS_RND,
219     FSUB_RND,
220     FSUBS,
221     FSUBS_RND,
222     FMUL_RND,
223     FMULS,
224     FMULS_RND,
225     FDIV_RND,
226     FDIVS,
227     FDIVS_RND,
228     FMAX_SAE,
229     FMAXS_SAE,
230     FMIN_SAE,
231     FMINS_SAE,
232     FSQRT_RND,
233     FSQRTS,
234     FSQRTS_RND,
235 
236     // FP vector get exponent.
237     FGETEXP,
238     FGETEXP_SAE,
239     FGETEXPS,
240     FGETEXPS_SAE,
241     // Extract Normalized Mantissas.
242     VGETMANT,
243     VGETMANT_SAE,
244     VGETMANTS,
245     VGETMANTS_SAE,
246     // FP Scale.
247     SCALEF,
248     SCALEF_RND,
249     SCALEFS,
250     SCALEFS_RND,
251 
252     /// Integer horizontal add/sub.
253     HADD,
254     HSUB,
255 
256     /// Floating point horizontal add/sub.
257     FHADD,
258     FHSUB,
259 
260     // Detect Conflicts Within a Vector
261     CONFLICT,
262 
263     /// Floating point max and min.
264     FMAX,
265     FMIN,
266 
267     /// Commutative FMIN and FMAX.
268     FMAXC,
269     FMINC,
270 
271     /// Scalar intrinsic floating point max and min.
272     FMAXS,
273     FMINS,
274 
275     /// Floating point reciprocal-sqrt and reciprocal approximation.
276     /// Note that these typically require refinement
277     /// in order to obtain suitable precision.
278     FRSQRT,
279     FRCP,
280 
281     // AVX-512 reciprocal approximations with a little more precision.
282     RSQRT14,
283     RSQRT14S,
284     RCP14,
285     RCP14S,
286 
287     // Thread Local Storage.
288     TLSADDR,
289 
290     // Thread Local Storage. A call to get the start address
291     // of the TLS block for the current module.
292     TLSBASEADDR,
293 
294     // Thread Local Storage.  When calling to an OS provided
295     // thunk at the address from an earlier relocation.
296     TLSCALL,
297 
298     // Exception Handling helpers.
299     EH_RETURN,
300 
301     // SjLj exception handling setjmp.
302     EH_SJLJ_SETJMP,
303 
304     // SjLj exception handling longjmp.
305     EH_SJLJ_LONGJMP,
306 
307     // SjLj exception handling dispatch.
308     EH_SJLJ_SETUP_DISPATCH,
309 
310     /// Tail call return. See X86TargetLowering::LowerCall for
311     /// the list of operands.
312     TC_RETURN,
313 
314     // Vector move to low scalar and zero higher vector elements.
315     VZEXT_MOVL,
316 
317     // Vector integer truncate.
318     VTRUNC,
319     // Vector integer truncate with unsigned/signed saturation.
320     VTRUNCUS,
321     VTRUNCS,
322 
323     // Masked version of the above. Used when less than a 128-bit result is
324     // produced since the mask only applies to the lower elements and can't
325     // be represented by a select.
326     // SRC, PASSTHRU, MASK
327     VMTRUNC,
328     VMTRUNCUS,
329     VMTRUNCS,
330 
331     // Vector FP extend.
332     VFPEXT,
333     VFPEXT_SAE,
334     VFPEXTS,
335     VFPEXTS_SAE,
336 
337     // Vector FP round.
338     VFPROUND,
339     VFPROUND_RND,
340     VFPROUNDS,
341     VFPROUNDS_RND,
342 
343     // Masked version of above. Used for v2f64->v4f32.
344     // SRC, PASSTHRU, MASK
345     VMFPROUND,
346 
347     // 128-bit vector logical left / right shift
348     VSHLDQ,
349     VSRLDQ,
350 
351     // Vector shift elements
352     VSHL,
353     VSRL,
354     VSRA,
355 
356     // Vector variable shift
357     VSHLV,
358     VSRLV,
359     VSRAV,
360 
361     // Vector shift elements by immediate
362     VSHLI,
363     VSRLI,
364     VSRAI,
365 
366     // Shifts of mask registers.
367     KSHIFTL,
368     KSHIFTR,
369 
370     // Bit rotate by immediate
371     VROTLI,
372     VROTRI,
373 
374     // Vector packed double/float comparison.
375     CMPP,
376 
377     // Vector integer comparisons.
378     PCMPEQ,
379     PCMPGT,
380 
381     // v8i16 Horizontal minimum and position.
382     PHMINPOS,
383 
384     MULTISHIFT,
385 
386     /// Vector comparison generating mask bits for fp and
387     /// integer signed and unsigned data types.
388     CMPM,
389     // Vector mask comparison generating mask bits for FP values.
390     CMPMM,
391     // Vector mask comparison with SAE for FP values.
392     CMPMM_SAE,
393 
394     // Arithmetic operations with FLAGS results.
395     ADD,
396     SUB,
397     ADC,
398     SBB,
399     SMUL,
400     UMUL,
401     OR,
402     XOR,
403     AND,
404 
405     // Bit field extract.
406     BEXTR,
407     BEXTRI,
408 
409     // Zero High Bits Starting with Specified Bit Position.
410     BZHI,
411 
412     // Parallel extract and deposit.
413     PDEP,
414     PEXT,
415 
416     // X86-specific multiply by immediate.
417     MUL_IMM,
418 
419     // Vector sign bit extraction.
420     MOVMSK,
421 
422     // Vector bitwise comparisons.
423     PTEST,
424 
425     // Vector packed fp sign bitwise comparisons.
426     TESTP,
427 
428     // OR/AND test for masks.
429     KORTEST,
430     KTEST,
431 
432     // ADD for masks.
433     KADD,
434 
435     // Several flavors of instructions with vector shuffle behaviors.
436     // Saturated signed/unnsigned packing.
437     PACKSS,
438     PACKUS,
439     // Intra-lane alignr.
440     PALIGNR,
441     // AVX512 inter-lane alignr.
442     VALIGN,
443     PSHUFD,
444     PSHUFHW,
445     PSHUFLW,
446     SHUFP,
447     // VBMI2 Concat & Shift.
448     VSHLD,
449     VSHRD,
450     VSHLDV,
451     VSHRDV,
452     // Shuffle Packed Values at 128-bit granularity.
453     SHUF128,
454     MOVDDUP,
455     MOVSHDUP,
456     MOVSLDUP,
457     MOVLHPS,
458     MOVHLPS,
459     MOVSD,
460     MOVSS,
461     MOVSH,
462     UNPCKL,
463     UNPCKH,
464     VPERMILPV,
465     VPERMILPI,
466     VPERMI,
467     VPERM2X128,
468 
469     // Variable Permute (VPERM).
470     // Res = VPERMV MaskV, V0
471     VPERMV,
472 
473     // 3-op Variable Permute (VPERMT2).
474     // Res = VPERMV3 V0, MaskV, V1
475     VPERMV3,
476 
477     // Bitwise ternary logic.
478     VPTERNLOG,
479     // Fix Up Special Packed Float32/64 values.
480     VFIXUPIMM,
481     VFIXUPIMM_SAE,
482     VFIXUPIMMS,
483     VFIXUPIMMS_SAE,
484     // Range Restriction Calculation For Packed Pairs of Float32/64 values.
485     VRANGE,
486     VRANGE_SAE,
487     VRANGES,
488     VRANGES_SAE,
489     // Reduce - Perform Reduction Transformation on scalar\packed FP.
490     VREDUCE,
491     VREDUCE_SAE,
492     VREDUCES,
493     VREDUCES_SAE,
494     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
495     // Also used by the legacy (V)ROUND intrinsics where we mask out the
496     // scaling part of the immediate.
497     VRNDSCALE,
498     VRNDSCALE_SAE,
499     VRNDSCALES,
500     VRNDSCALES_SAE,
501     // Tests Types Of a FP Values for packed types.
502     VFPCLASS,
503     // Tests Types Of a FP Values for scalar types.
504     VFPCLASSS,
505 
506     // Broadcast (splat) scalar or element 0 of a vector. If the operand is
507     // a vector, this node may change the vector length as part of the splat.
508     VBROADCAST,
509     // Broadcast mask to vector.
510     VBROADCASTM,
511 
512     /// SSE4A Extraction and Insertion.
513     EXTRQI,
514     INSERTQI,
515 
516     // XOP arithmetic/logical shifts.
517     VPSHA,
518     VPSHL,
519     // XOP signed/unsigned integer comparisons.
520     VPCOM,
521     VPCOMU,
522     // XOP packed permute bytes.
523     VPPERM,
524     // XOP two source permutation.
525     VPERMIL2,
526 
527     // Vector multiply packed unsigned doubleword integers.
528     PMULUDQ,
529     // Vector multiply packed signed doubleword integers.
530     PMULDQ,
531     // Vector Multiply Packed UnsignedIntegers with Round and Scale.
532     MULHRS,
533 
534     // Multiply and Add Packed Integers.
535     VPMADDUBSW,
536     VPMADDWD,
537 
538     // AVX512IFMA multiply and add.
539     // NOTE: These are different than the instruction and perform
540     // op0 x op1 + op2.
541     VPMADD52L,
542     VPMADD52H,
543 
544     // VNNI
545     VPDPBUSD,
546     VPDPBUSDS,
547     VPDPWSSD,
548     VPDPWSSDS,
549 
550     // FMA nodes.
551     // We use the target independent ISD::FMA for the non-inverted case.
552     FNMADD,
553     FMSUB,
554     FNMSUB,
555     FMADDSUB,
556     FMSUBADD,
557 
558     // FMA with rounding mode.
559     FMADD_RND,
560     FNMADD_RND,
561     FMSUB_RND,
562     FNMSUB_RND,
563     FMADDSUB_RND,
564     FMSUBADD_RND,
565 
566     // AVX512-FP16 complex addition and multiplication.
567     VFMADDC,
568     VFMADDC_RND,
569     VFCMADDC,
570     VFCMADDC_RND,
571 
572     VFMULC,
573     VFMULC_RND,
574     VFCMULC,
575     VFCMULC_RND,
576 
577     VFMADDCSH,
578     VFMADDCSH_RND,
579     VFCMADDCSH,
580     VFCMADDCSH_RND,
581 
582     VFMULCSH,
583     VFMULCSH_RND,
584     VFCMULCSH,
585     VFCMULCSH_RND,
586 
587     VPDPBSUD,
588     VPDPBSUDS,
589     VPDPBUUD,
590     VPDPBUUDS,
591     VPDPBSSD,
592     VPDPBSSDS,
593 
594     // Compress and expand.
595     COMPRESS,
596     EXPAND,
597 
598     // Bits shuffle
599     VPSHUFBITQMB,
600 
601     // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
602     SINT_TO_FP_RND,
603     UINT_TO_FP_RND,
604     SCALAR_SINT_TO_FP,
605     SCALAR_UINT_TO_FP,
606     SCALAR_SINT_TO_FP_RND,
607     SCALAR_UINT_TO_FP_RND,
608 
609     // Vector float/double to signed/unsigned integer.
610     CVTP2SI,
611     CVTP2UI,
612     CVTP2SI_RND,
613     CVTP2UI_RND,
614     // Scalar float/double to signed/unsigned integer.
615     CVTS2SI,
616     CVTS2UI,
617     CVTS2SI_RND,
618     CVTS2UI_RND,
619 
620     // Vector float/double to signed/unsigned integer with truncation.
621     CVTTP2SI,
622     CVTTP2UI,
623     CVTTP2SI_SAE,
624     CVTTP2UI_SAE,
625     // Scalar float/double to signed/unsigned integer with truncation.
626     CVTTS2SI,
627     CVTTS2UI,
628     CVTTS2SI_SAE,
629     CVTTS2UI_SAE,
630 
631     // Vector signed/unsigned integer to float/double.
632     CVTSI2P,
633     CVTUI2P,
634 
635     // Masked versions of above. Used for v2f64->v4f32.
636     // SRC, PASSTHRU, MASK
637     MCVTP2SI,
638     MCVTP2UI,
639     MCVTTP2SI,
640     MCVTTP2UI,
641     MCVTSI2P,
642     MCVTUI2P,
643 
644     // Vector float to bfloat16.
645     // Convert TWO packed single data to one packed BF16 data
646     CVTNE2PS2BF16,
647     // Convert packed single data to packed BF16 data
648     CVTNEPS2BF16,
649     // Masked version of above.
650     // SRC, PASSTHRU, MASK
651     MCVTNEPS2BF16,
652 
653     // Dot product of BF16 pairs to accumulated into
654     // packed single precision.
655     DPBF16PS,
656 
657     // A stack checking function call. On Windows it's _chkstk call.
658     DYN_ALLOCA,
659 
660     // For allocating variable amounts of stack space when using
661     // segmented stacks. Check if the current stacklet has enough space, and
662     // falls back to heap allocation if not.
663     SEG_ALLOCA,
664 
665     // For allocating stack space when using stack clash protector.
666     // Allocation is performed by block, and each block is probed.
667     PROBED_ALLOCA,
668 
669     // Memory barriers.
670     MFENCE,
671 
672     // Get a random integer and indicate whether it is valid in CF.
673     RDRAND,
674 
675     // Get a NIST SP800-90B & C compliant random integer and
676     // indicate whether it is valid in CF.
677     RDSEED,
678 
679     // Protection keys
680     // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
681     // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
682     // value for ECX.
683     RDPKRU,
684     WRPKRU,
685 
686     // SSE42 string comparisons.
687     // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
688     // will emit one or two instructions based on which results are used. If
689     // flags and index/mask this allows us to use a single instruction since
690     // we won't have to pick and opcode for flags. Instead we can rely on the
691     // DAG to CSE everything and decide at isel.
692     PCMPISTR,
693     PCMPESTR,
694 
695     // Test if in transactional execution.
696     XTEST,
697 
698     // ERI instructions.
699     RSQRT28,
700     RSQRT28_SAE,
701     RSQRT28S,
702     RSQRT28S_SAE,
703     RCP28,
704     RCP28_SAE,
705     RCP28S,
706     RCP28S_SAE,
707     EXP2,
708     EXP2_SAE,
709 
710     // Conversions between float and half-float.
711     CVTPS2PH,
712     CVTPS2PH_SAE,
713     CVTPH2PS,
714     CVTPH2PS_SAE,
715 
716     // Masked version of above.
717     // SRC, RND, PASSTHRU, MASK
718     MCVTPS2PH,
719     MCVTPS2PH_SAE,
720 
721     // Galois Field Arithmetic Instructions
722     GF2P8AFFINEINVQB,
723     GF2P8AFFINEQB,
724     GF2P8MULB,
725 
726     // LWP insert record.
727     LWPINS,
728 
729     // User level wait
730     UMWAIT,
731     TPAUSE,
732 
733     // Enqueue Stores Instructions
734     ENQCMD,
735     ENQCMDS,
736 
737     // For avx512-vp2intersect
738     VP2INTERSECT,
739 
740     // User level interrupts - testui
741     TESTUI,
742 
743     // Perform an FP80 add after changing precision control in FPCW.
744     FP80_ADD,
745 
746     /// X86 strict FP compare instructions.
747     STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
748     STRICT_FCMPS,
749 
750     // Vector packed double/float comparison.
751     STRICT_CMPP,
752 
753     /// Vector comparison generating mask bits for fp and
754     /// integer signed and unsigned data types.
755     STRICT_CMPM,
756 
757     // Vector float/double to signed/unsigned integer with truncation.
758     STRICT_CVTTP2SI,
759     STRICT_CVTTP2UI,
760 
761     // Vector FP extend.
762     STRICT_VFPEXT,
763 
764     // Vector FP round.
765     STRICT_VFPROUND,
766 
767     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
768     // Also used by the legacy (V)ROUND intrinsics where we mask out the
769     // scaling part of the immediate.
770     STRICT_VRNDSCALE,
771 
772     // Vector signed/unsigned integer to float/double.
773     STRICT_CVTSI2P,
774     STRICT_CVTUI2P,
775 
776     // Strict FMA nodes.
777     STRICT_FNMADD,
778     STRICT_FMSUB,
779     STRICT_FNMSUB,
780 
781     // Conversions between float and half-float.
782     STRICT_CVTPS2PH,
783     STRICT_CVTPH2PS,
784 
785     // Perform an FP80 add after changing precision control in FPCW.
786     STRICT_FP80_ADD,
787 
788     // WARNING: Only add nodes here if they are strict FP nodes. Non-memory and
789     // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
790 
791     // Compare and swap.
792     LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
793     LCMPXCHG8_DAG,
794     LCMPXCHG16_DAG,
795     LCMPXCHG16_SAVE_RBX_DAG,
796 
797     /// LOCK-prefixed arithmetic read-modify-write instructions.
798     /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
799     LADD,
800     LSUB,
801     LOR,
802     LXOR,
803     LAND,
804     LBTS,
805     LBTC,
806     LBTR,
807     LBTS_RM,
808     LBTC_RM,
809     LBTR_RM,
810 
811     /// RAO arithmetic instructions.
812     /// OUTCHAIN = AADD(INCHAIN, PTR, RHS)
813     AADD,
814     AOR,
815     AXOR,
816     AAND,
817 
818     // Load, scalar_to_vector, and zero extend.
819     VZEXT_LOAD,
820 
821     // extract_vector_elt, store.
822     VEXTRACT_STORE,
823 
824     // scalar broadcast from memory.
825     VBROADCAST_LOAD,
826 
827     // subvector broadcast from memory.
828     SUBV_BROADCAST_LOAD,
829 
830     // Store FP control word into i16 memory.
831     FNSTCW16m,
832 
833     // Load FP control word from i16 memory.
834     FLDCW16m,
835 
836     // Store x87 FPU environment into memory.
837     FNSTENVm,
838 
839     // Load x87 FPU environment from memory.
840     FLDENVm,
841 
842     /// This instruction implements FP_TO_SINT with the
843     /// integer destination in memory and a FP reg source.  This corresponds
844     /// to the X86::FIST*m instructions and the rounding mode change stuff. It
845     /// has two inputs (token chain and address) and two outputs (int value
846     /// and token chain). Memory VT specifies the type to store to.
847     FP_TO_INT_IN_MEM,
848 
849     /// This instruction implements SINT_TO_FP with the
850     /// integer source in memory and FP reg result.  This corresponds to the
851     /// X86::FILD*m instructions. It has two inputs (token chain and address)
852     /// and two outputs (FP value and token chain). The integer source type is
853     /// specified by the memory VT.
854     FILD,
855 
856     /// This instruction implements a fp->int store from FP stack
857     /// slots. This corresponds to the fist instruction. It takes a
858     /// chain operand, value to store, address, and glue. The memory VT
859     /// specifies the type to store as.
860     FIST,
861 
862     /// This instruction implements an extending load to FP stack slots.
863     /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
864     /// operand, and ptr to load from. The memory VT specifies the type to
865     /// load from.
866     FLD,
867 
868     /// This instruction implements a truncating store from FP stack
869     /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
870     /// chain operand, value to store, address, and glue. The memory VT
871     /// specifies the type to store as.
872     FST,
873 
874     /// These instructions grab the address of the next argument
875     /// from a va_list. (reads and modifies the va_list in memory)
876     VAARG_64,
877     VAARG_X32,
878 
879     // Vector truncating store with unsigned/signed saturation
880     VTRUNCSTOREUS,
881     VTRUNCSTORES,
882     // Vector truncating masked store with unsigned/signed saturation
883     VMTRUNCSTOREUS,
884     VMTRUNCSTORES,
885 
886     // X86 specific gather and scatter
887     MGATHER,
888     MSCATTER,
889 
890     // Key locker nodes that produce flags.
891     AESENC128KL,
892     AESDEC128KL,
893     AESENC256KL,
894     AESDEC256KL,
895     AESENCWIDE128KL,
896     AESDECWIDE128KL,
897     AESENCWIDE256KL,
898     AESDECWIDE256KL,
899 
900     /// Compare and Add if Condition is Met. Compare value in operand 2 with
901     /// value in memory of operand 1. If condition of operand 4 is met, add
902     /// value operand 3 to m32 and write new value in operand 1. Operand 2 is
903     /// always updated with the original value from operand 1.
904     CMPCCXADD,
905 
906     // Save xmm argument registers to the stack, according to %al. An operator
907     // is needed so that this can be expanded with control flow.
908     VASTART_SAVE_XMM_REGS,
909 
910     // WARNING: Do not add anything in the end unless you want the node to
911     // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
912     // opcodes will be thought as target memory ops!
913   };
914   } // end namespace X86ISD
915 
916   namespace X86 {
917     /// Current rounding mode is represented in bits 11:10 of FPSR. These
918     /// values are same as corresponding constants for rounding mode used
919     /// in glibc.
920     enum RoundingMode {
921       rmToNearest   = 0,        // FE_TONEAREST
922       rmDownward    = 1 << 10,  // FE_DOWNWARD
923       rmUpward      = 2 << 10,  // FE_UPWARD
924       rmTowardZero  = 3 << 10,  // FE_TOWARDZERO
925       rmMask        = 3 << 10   // Bit mask selecting rounding mode
926     };
927   }
928 
929   /// Define some predicates that are used for node matching.
930   namespace X86 {
931     /// Returns true if Elt is a constant zero or floating point constant +0.0.
932     bool isZeroNode(SDValue Elt);
933 
934     /// Returns true of the given offset can be
935     /// fit into displacement field of the instruction.
936     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
937                                       bool hasSymbolicDisplacement);
938 
939     /// Determines whether the callee is required to pop its
940     /// own arguments. Callee pop is necessary to support tail calls.
941     bool isCalleePop(CallingConv::ID CallingConv,
942                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
943 
944     /// If Op is a constant whose elements are all the same constant or
945     /// undefined, return true and return the constant value in \p SplatVal.
946     /// If we have undef bits that don't cover an entire element, we treat these
947     /// as zero if AllowPartialUndefs is set, else we fail and return false.
948     bool isConstantSplat(SDValue Op, APInt &SplatVal,
949                          bool AllowPartialUndefs = true);
950 
951     /// Check if Op is a load operation that could be folded into some other x86
952     /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
953     bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
954                      bool AssumeSingleUse = false);
955 
956     /// Check if Op is a load operation that could be folded into a vector splat
957     /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
958     bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
959                                          const X86Subtarget &Subtarget,
960                                          bool AssumeSingleUse = false);
961 
962     /// Check if Op is a value that could be used to fold a store into some
963     /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
964     bool mayFoldIntoStore(SDValue Op);
965 
966     /// Check if Op is an operation that could be folded into a zero extend x86
967     /// instruction.
968     bool mayFoldIntoZeroExtend(SDValue Op);
969   } // end namespace X86
970 
971   //===--------------------------------------------------------------------===//
972   //  X86 Implementation of the TargetLowering interface
973   class X86TargetLowering final : public TargetLowering {
974   public:
975     explicit X86TargetLowering(const X86TargetMachine &TM,
976                                const X86Subtarget &STI);
977 
978     unsigned getJumpTableEncoding() const override;
979     bool useSoftFloat() const override;
980 
981     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
982                                ArgListTy &Args) const override;
983 
984     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
985       return MVT::i8;
986     }
987 
988     const MCExpr *
989     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
990                               const MachineBasicBlock *MBB, unsigned uid,
991                               MCContext &Ctx) const override;
992 
993     /// Returns relocation base for the given PIC jumptable.
994     SDValue getPICJumpTableRelocBase(SDValue Table,
995                                      SelectionDAG &DAG) const override;
996     const MCExpr *
997     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
998                                  unsigned JTI, MCContext &Ctx) const override;
999 
1000     /// Return the desired alignment for ByVal aggregate
1001     /// function arguments in the caller parameter area. For X86, aggregates
1002     /// that contains are placed at 16-byte boundaries while the rest are at
1003     /// 4-byte boundaries.
1004     uint64_t getByValTypeAlignment(Type *Ty,
1005                                    const DataLayout &DL) const override;
1006 
1007     EVT getOptimalMemOpType(const MemOp &Op,
1008                             const AttributeList &FuncAttributes) const override;
1009 
1010     /// Returns true if it's safe to use load / store of the
1011     /// specified type to expand memcpy / memset inline. This is mostly true
1012     /// for all types except for some special cases. For example, on X86
1013     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
1014     /// also does type conversion. Note the specified type doesn't have to be
1015     /// legal as the hook is used before type legalization.
1016     bool isSafeMemOpType(MVT VT) const override;
1017 
1018     bool isMemoryAccessFast(EVT VT, Align Alignment) const;
1019 
1020     /// Returns true if the target allows unaligned memory accesses of the
1021     /// specified type. Returns whether it is "fast" in the last argument.
1022     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
1023                                         MachineMemOperand::Flags Flags,
1024                                         unsigned *Fast) const override;
1025 
1026     /// This function returns true if the memory access is aligned or if the
1027     /// target allows this specific unaligned memory access. If the access is
1028     /// allowed, the optional final parameter returns a relative speed of the
1029     /// access (as defined by the target).
1030     bool allowsMemoryAccess(
1031         LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
1032         Align Alignment,
1033         MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1034         unsigned *Fast = nullptr) const override;
1035 
1036     bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1037                             const MachineMemOperand &MMO,
1038                             unsigned *Fast) const {
1039       return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(),
1040                                 MMO.getAlign(), MMO.getFlags(), Fast);
1041     }
1042 
1043     /// Provide custom lowering hooks for some operations.
1044     ///
1045     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
1046 
1047     /// Replace the results of node with an illegal result
1048     /// type with new values built out of custom code.
1049     ///
1050     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
1051                             SelectionDAG &DAG) const override;
1052 
1053     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
1054 
1055     bool preferABDSToABSWithNSW(EVT VT) const override;
1056 
1057     /// Return true if the target has native support for
1058     /// the specified value type and it is 'desirable' to use the type for the
1059     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
1060     /// instruction encodings are longer and some i16 instructions are slow.
1061     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
1062 
1063     /// Return true if the target has native support for the
1064     /// specified value type and it is 'desirable' to use the type. e.g. On x86
1065     /// i16 is legal, but undesirable since i16 instruction encodings are longer
1066     /// and some i16 instructions are slow.
1067     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
1068 
1069     /// Return prefered fold type, Abs if this is a vector, AddAnd if its an
1070     /// integer, None otherwise.
1071     TargetLowering::AndOrSETCCFoldKind
1072     isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
1073                                        const SDNode *SETCC0,
1074                                        const SDNode *SETCC1) const override;
1075 
1076     /// Return the newly negated expression if the cost is not expensive and
1077     /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
1078     /// do the negation.
1079     SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
1080                                  bool LegalOperations, bool ForCodeSize,
1081                                  NegatibleCost &Cost,
1082                                  unsigned Depth) const override;
1083 
1084     MachineBasicBlock *
1085     EmitInstrWithCustomInserter(MachineInstr &MI,
1086                                 MachineBasicBlock *MBB) const override;
1087 
1088     /// This method returns the name of a target specific DAG node.
1089     const char *getTargetNodeName(unsigned Opcode) const override;
1090 
1091     /// Do not merge vector stores after legalization because that may conflict
1092     /// with x86-specific store splitting optimizations.
1093     bool mergeStoresAfterLegalization(EVT MemVT) const override {
1094       return !MemVT.isVector();
1095     }
1096 
1097     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
1098                           const MachineFunction &MF) const override;
1099 
1100     bool isCheapToSpeculateCttz(Type *Ty) const override;
1101 
1102     bool isCheapToSpeculateCtlz(Type *Ty) const override;
1103 
1104     bool isCtlzFast() const override;
1105 
1106     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1107       // If the pair to store is a mixture of float and int values, we will
1108       // save two bitwise instructions and one float-to-int instruction and
1109       // increase one store instruction. There is potentially a more
1110       // significant benefit because it avoids the float->int domain switch
1111       // for input value. So It is more likely a win.
1112       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
1113           (LTy.isInteger() && HTy.isFloatingPoint()))
1114         return true;
1115       // If the pair only contains int values, we will save two bitwise
1116       // instructions and increase one store instruction (costing one more
1117       // store buffer). Since the benefit is more blurred so we leave
1118       // such pair out until we get testcase to prove it is a win.
1119       return false;
1120     }
1121 
1122     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1123 
1124     bool hasAndNotCompare(SDValue Y) const override;
1125 
1126     bool hasAndNot(SDValue Y) const override;
1127 
1128     bool hasBitTest(SDValue X, SDValue Y) const override;
1129 
1130     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1131         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1132         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1133         SelectionDAG &DAG) const override;
1134 
1135     bool preferScalarizeSplat(SDNode *N) const override;
1136 
1137     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1138                                            CombineLevel Level) const override;
1139 
1140     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1141 
1142     bool
1143     shouldTransformSignedTruncationCheck(EVT XVT,
1144                                          unsigned KeptBits) const override {
1145       // For vectors, we don't have a preference..
1146       if (XVT.isVector())
1147         return false;
1148 
1149       auto VTIsOk = [](EVT VT) -> bool {
1150         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1151                VT == MVT::i64;
1152       };
1153 
1154       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1155       // XVT will be larger than KeptBitsVT.
1156       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
1157       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1158     }
1159 
1160     ShiftLegalizationStrategy
1161     preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
1162                                        unsigned ExpansionFactor) const override;
1163 
1164     bool shouldSplatInsEltVarIndex(EVT VT) const override;
1165 
1166     bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
1167       // Converting to sat variants holds little benefit on X86 as we will just
1168       // need to saturate the value back using fp arithmatic.
1169       return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT);
1170     }
1171 
1172     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1173       return VT.isScalarInteger();
1174     }
1175 
1176     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1177     MVT hasFastEqualityCompare(unsigned NumBits) const override;
1178 
1179     /// Return the value type to use for ISD::SETCC.
1180     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1181                            EVT VT) const override;
1182 
1183     bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1184                                       const APInt &DemandedElts,
1185                                       TargetLoweringOpt &TLO) const override;
1186 
1187     /// Determine which of the bits specified in Mask are known to be either
1188     /// zero or one and return them in the KnownZero/KnownOne bitsets.
1189     void computeKnownBitsForTargetNode(const SDValue Op,
1190                                        KnownBits &Known,
1191                                        const APInt &DemandedElts,
1192                                        const SelectionDAG &DAG,
1193                                        unsigned Depth = 0) const override;
1194 
1195     /// Determine the number of bits in the operation that are sign bits.
1196     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1197                                              const APInt &DemandedElts,
1198                                              const SelectionDAG &DAG,
1199                                              unsigned Depth) const override;
1200 
1201     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1202                                                  const APInt &DemandedElts,
1203                                                  APInt &KnownUndef,
1204                                                  APInt &KnownZero,
1205                                                  TargetLoweringOpt &TLO,
1206                                                  unsigned Depth) const override;
1207 
1208     bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1209                                                     const APInt &DemandedElts,
1210                                                     unsigned MaskIndex,
1211                                                     TargetLoweringOpt &TLO,
1212                                                     unsigned Depth) const;
1213 
1214     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1215                                            const APInt &DemandedBits,
1216                                            const APInt &DemandedElts,
1217                                            KnownBits &Known,
1218                                            TargetLoweringOpt &TLO,
1219                                            unsigned Depth) const override;
1220 
1221     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1222         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1223         SelectionDAG &DAG, unsigned Depth) const override;
1224 
1225     bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(
1226         SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1227         bool PoisonOnly, unsigned Depth) const override;
1228 
1229     bool canCreateUndefOrPoisonForTargetNode(
1230         SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1231         bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override;
1232 
1233     bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
1234                                    APInt &UndefElts, const SelectionDAG &DAG,
1235                                    unsigned Depth) const override;
1236 
1237     bool isTargetCanonicalConstantNode(SDValue Op) const override {
1238       // Peek through bitcasts/extracts/inserts to see if we have a broadcast
1239       // vector from memory.
1240       while (Op.getOpcode() == ISD::BITCAST ||
1241              Op.getOpcode() == ISD::EXTRACT_SUBVECTOR ||
1242              (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
1243               Op.getOperand(0).isUndef()))
1244         Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0);
1245 
1246       return Op.getOpcode() == X86ISD::VBROADCAST_LOAD ||
1247              TargetLowering::isTargetCanonicalConstantNode(Op);
1248     }
1249 
1250     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1251 
1252     SDValue unwrapAddress(SDValue N) const override;
1253 
1254     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1255 
1256     bool ExpandInlineAsm(CallInst *CI) const override;
1257 
1258     ConstraintType getConstraintType(StringRef Constraint) const override;
1259 
1260     /// Examine constraint string and operand type and determine a weight value.
1261     /// The operand object must already have been set up with the operand type.
1262     ConstraintWeight
1263       getSingleConstraintMatchWeight(AsmOperandInfo &info,
1264                                      const char *constraint) const override;
1265 
1266     const char *LowerXConstraint(EVT ConstraintVT) const override;
1267 
1268     /// Lower the specified operand into the Ops vector. If it is invalid, don't
1269     /// add anything to Ops. If hasMemory is true it means one of the asm
1270     /// constraint of the inline asm instruction being processed is 'm'.
1271     void LowerAsmOperandForConstraint(SDValue Op,
1272                                       std::string &Constraint,
1273                                       std::vector<SDValue> &Ops,
1274                                       SelectionDAG &DAG) const override;
1275 
1276     unsigned
1277     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1278       if (ConstraintCode == "v")
1279         return InlineAsm::Constraint_v;
1280       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1281     }
1282 
1283     /// Handle Lowering flag assembly outputs.
1284     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1285                                         const SDLoc &DL,
1286                                         const AsmOperandInfo &Constraint,
1287                                         SelectionDAG &DAG) const override;
1288 
1289     /// Given a physical register constraint
1290     /// (e.g. {edx}), return the register number and the register class for the
1291     /// register.  This should only be used for C_Register constraints.  On
1292     /// error, this returns a register number of 0.
1293     std::pair<unsigned, const TargetRegisterClass *>
1294     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1295                                  StringRef Constraint, MVT VT) const override;
1296 
1297     /// Return true if the addressing mode represented
1298     /// by AM is legal for this target, for a load/store of the specified type.
1299     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1300                                Type *Ty, unsigned AS,
1301                                Instruction *I = nullptr) const override;
1302 
1303     /// Return true if the specified immediate is legal
1304     /// icmp immediate, that is the target has icmp instructions which can
1305     /// compare a register against the immediate without having to materialize
1306     /// the immediate into a register.
1307     bool isLegalICmpImmediate(int64_t Imm) const override;
1308 
1309     /// Return true if the specified immediate is legal
1310     /// add immediate, that is the target has add instructions which can
1311     /// add a register and the immediate without having to materialize
1312     /// the immediate into a register.
1313     bool isLegalAddImmediate(int64_t Imm) const override;
1314 
1315     bool isLegalStoreImmediate(int64_t Imm) const override;
1316 
1317     /// This is used to enable splatted operand transforms for vector shifts
1318     /// and vector funnel shifts.
1319     bool isVectorShiftByScalarCheap(Type *Ty) const override;
1320 
1321     /// Add x86-specific opcodes to the default list.
1322     bool isBinOp(unsigned Opcode) const override;
1323 
1324     /// Returns true if the opcode is a commutative binary operation.
1325     bool isCommutativeBinOp(unsigned Opcode) const override;
1326 
1327     /// Return true if it's free to truncate a value of
1328     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1329     /// register EAX to i16 by referencing its sub-register AX.
1330     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1331     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1332 
1333     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1334 
1335     /// Return true if any actual instruction that defines a
1336     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1337     /// register. This does not necessarily include registers defined in
1338     /// unknown ways, such as incoming arguments, or copies from unknown
1339     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1340     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1341     /// all instructions that define 32-bit values implicit zero-extend the
1342     /// result out to 64 bits.
1343     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1344     bool isZExtFree(EVT VT1, EVT VT2) const override;
1345     bool isZExtFree(SDValue Val, EVT VT2) const override;
1346 
1347     bool shouldSinkOperands(Instruction *I,
1348                             SmallVectorImpl<Use *> &Ops) const override;
1349     bool shouldConvertPhiType(Type *From, Type *To) const override;
1350 
1351     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1352     /// extend node) is profitable.
1353     bool isVectorLoadExtDesirable(SDValue) const override;
1354 
1355     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1356     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1357     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1358     bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1359                                     EVT VT) const override;
1360 
1361     /// Return true if it's profitable to narrow operations of type SrcVT to
1362     /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not
1363     /// from i32 to i16.
1364     bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const override;
1365 
1366     bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
1367                                               EVT VT) const override;
1368 
1369     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1370     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1371     /// true and stores the intrinsic information into the IntrinsicInfo that was
1372     /// passed to the function.
1373     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1374                             MachineFunction &MF,
1375                             unsigned Intrinsic) const override;
1376 
1377     /// Returns true if the target can instruction select the
1378     /// specified FP immediate natively. If false, the legalizer will
1379     /// materialize the FP immediate as a load from a constant pool.
1380     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1381                       bool ForCodeSize) const override;
1382 
1383     /// Targets can use this to indicate that they only support *some*
1384     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1385     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1386     /// be legal.
1387     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1388 
1389     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1390     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1391     /// constant pool entry.
1392     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1393 
1394     /// Returns true if lowering to a jump table is allowed.
1395     bool areJTsAllowed(const Function *Fn) const override;
1396 
1397     MVT getPreferredSwitchConditionType(LLVMContext &Context,
1398                                         EVT ConditionVT) const override;
1399 
1400     /// If true, then instruction selection should
1401     /// seek to shrink the FP constant of the specified type to a smaller type
1402     /// in order to save space and / or reduce runtime.
1403     bool ShouldShrinkFPConstant(EVT VT) const override;
1404 
1405     /// Return true if we believe it is correct and profitable to reduce the
1406     /// load node to a smaller type.
1407     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1408                                EVT NewVT) const override;
1409 
1410     /// Return true if the specified scalar FP type is computed in an SSE
1411     /// register, not on the X87 floating point stack.
1412     bool isScalarFPTypeInSSEReg(EVT VT) const;
1413 
1414     /// Returns true if it is beneficial to convert a load of a constant
1415     /// to just the constant itself.
1416     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1417                                            Type *Ty) const override;
1418 
1419     bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1420 
1421     bool convertSelectOfConstantsToMath(EVT VT) const override;
1422 
1423     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1424                                 SDValue C) const override;
1425 
1426     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1427     /// with this index.
1428     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1429                                  unsigned Index) const override;
1430 
1431     /// Scalar ops always have equal or better analysis/performance/power than
1432     /// the vector equivalent, so this always makes sense if the scalar op is
1433     /// supported.
1434     bool shouldScalarizeBinop(SDValue) const override;
1435 
1436     /// Extract of a scalar FP value from index 0 of a vector is free.
1437     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1438       EVT EltVT = VT.getScalarType();
1439       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1440     }
1441 
1442     /// Overflow nodes should get combined/lowered to optimal instructions
1443     /// (they should allow eliminating explicit compares by getting flags from
1444     /// math ops).
1445     bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1446                               bool MathUsed) const override;
1447 
1448     bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
1449                                       unsigned AddrSpace) const override {
1450       // If we can replace more than 2 scalar stores, there will be a reduction
1451       // in instructions even after we add a vector constant load.
1452       return IsZero || NumElem > 2;
1453     }
1454 
1455     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1456                                  const SelectionDAG &DAG,
1457                                  const MachineMemOperand &MMO) const override;
1458 
1459     /// Intel processors have a unified instruction and data cache
1460     const char * getClearCacheBuiltinName() const override {
1461       return nullptr; // nothing to do, move along.
1462     }
1463 
1464     Register getRegisterByName(const char* RegName, LLT VT,
1465                                const MachineFunction &MF) const override;
1466 
1467     /// If a physical register, this returns the register that receives the
1468     /// exception address on entry to an EH pad.
1469     Register
1470     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1471 
1472     /// If a physical register, this returns the register that receives the
1473     /// exception typeid on entry to a landing pad.
1474     Register
1475     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1476 
1477     bool needsFixedCatchObjects() const override;
1478 
1479     /// This method returns a target specific FastISel object,
1480     /// or null if the target does not support "fast" ISel.
1481     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1482                              const TargetLibraryInfo *libInfo) const override;
1483 
1484     /// If the target has a standard location for the stack protector cookie,
1485     /// returns the address of that location. Otherwise, returns nullptr.
1486     Value *getIRStackGuard(IRBuilderBase &IRB) const override;
1487 
1488     bool useLoadStackGuardNode() const override;
1489     bool useStackGuardXorFP() const override;
1490     void insertSSPDeclarations(Module &M) const override;
1491     Value *getSDagStackGuard(const Module &M) const override;
1492     Function *getSSPStackGuardCheck(const Module &M) const override;
1493     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1494                                 const SDLoc &DL) const override;
1495 
1496 
1497     /// Return true if the target stores SafeStack pointer at a fixed offset in
1498     /// some non-standard address space, and populates the address space and
1499     /// offset as appropriate.
1500     Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
1501 
1502     std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1503                                           SDValue Chain, SDValue Pointer,
1504                                           MachinePointerInfo PtrInfo,
1505                                           Align Alignment,
1506                                           SelectionDAG &DAG) const;
1507 
1508     /// Customize the preferred legalization strategy for certain types.
1509     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1510 
1511     bool softPromoteHalfType() const override { return true; }
1512 
1513     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1514                                       EVT VT) const override;
1515 
1516     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1517                                            CallingConv::ID CC,
1518                                            EVT VT) const override;
1519 
1520     unsigned getVectorTypeBreakdownForCallingConv(
1521         LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1522         unsigned &NumIntermediates, MVT &RegisterVT) const override;
1523 
1524     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1525 
1526     bool supportSwiftError() const override;
1527 
1528     bool supportKCFIBundles() const override { return true; }
1529 
1530     MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
1531                                 MachineBasicBlock::instr_iterator &MBBI,
1532                                 const TargetInstrInfo *TII) const override;
1533 
1534     bool hasStackProbeSymbol(const MachineFunction &MF) const override;
1535     bool hasInlineStackProbe(const MachineFunction &MF) const override;
1536     StringRef getStackProbeSymbolName(const MachineFunction &MF) const override;
1537 
1538     unsigned getStackProbeSize(const MachineFunction &MF) const;
1539 
1540     bool hasVectorBlend() const override { return true; }
1541 
1542     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1543 
1544     bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs,
1545                                  unsigned OpNo) const override;
1546 
1547     /// Lower interleaved load(s) into target specific
1548     /// instructions/intrinsics.
1549     bool lowerInterleavedLoad(LoadInst *LI,
1550                               ArrayRef<ShuffleVectorInst *> Shuffles,
1551                               ArrayRef<unsigned> Indices,
1552                               unsigned Factor) const override;
1553 
1554     /// Lower interleaved store(s) into target specific
1555     /// instructions/intrinsics.
1556     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1557                                unsigned Factor) const override;
1558 
1559     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1560                                    SDValue Addr, SelectionDAG &DAG)
1561                                    const override;
1562 
1563     Align getPrefLoopAlignment(MachineLoop *ML) const override;
1564 
1565     EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override {
1566       if (VT == MVT::f80)
1567         return EVT::getIntegerVT(Context, 96);
1568       return TargetLoweringBase::getTypeToTransformTo(Context, VT);
1569     }
1570 
1571   protected:
1572     std::pair<const TargetRegisterClass *, uint8_t>
1573     findRepresentativeClass(const TargetRegisterInfo *TRI,
1574                             MVT VT) const override;
1575 
1576   private:
1577     /// Keep a reference to the X86Subtarget around so that we can
1578     /// make the right decision when generating code for different targets.
1579     const X86Subtarget &Subtarget;
1580 
1581     /// A list of legal FP immediates.
1582     std::vector<APFloat> LegalFPImmediates;
1583 
1584     /// Indicate that this x86 target can instruction
1585     /// select the specified FP immediate natively.
1586     void addLegalFPImmediate(const APFloat& Imm) {
1587       LegalFPImmediates.push_back(Imm);
1588     }
1589 
1590     SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1591                             CallingConv::ID CallConv, bool isVarArg,
1592                             const SmallVectorImpl<ISD::InputArg> &Ins,
1593                             const SDLoc &dl, SelectionDAG &DAG,
1594                             SmallVectorImpl<SDValue> &InVals,
1595                             uint32_t *RegMask) const;
1596     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1597                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1598                              const SDLoc &dl, SelectionDAG &DAG,
1599                              const CCValAssign &VA, MachineFrameInfo &MFI,
1600                              unsigned i) const;
1601     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1602                              const SDLoc &dl, SelectionDAG &DAG,
1603                              const CCValAssign &VA,
1604                              ISD::ArgFlagsTy Flags, bool isByval) const;
1605 
1606     // Call lowering helpers.
1607 
1608     /// Check whether the call is eligible for tail call optimization. Targets
1609     /// that want to do tail call optimization should implement this function.
1610     bool IsEligibleForTailCallOptimization(
1611         SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleeStackStructRet,
1612         bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
1613         const SmallVectorImpl<SDValue> &OutVals,
1614         const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
1615     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1616                                     SDValue Chain, bool IsTailCall,
1617                                     bool Is64Bit, int FPDiff,
1618                                     const SDLoc &dl) const;
1619 
1620     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1621                                          SelectionDAG &DAG) const;
1622 
1623     unsigned getAddressSpace() const;
1624 
1625     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1626                             SDValue &Chain) const;
1627     SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1628 
1629     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1630     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1631     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1632     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1633 
1634     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1635                                   const unsigned char OpFlags = 0) const;
1636     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1637     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1638     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1639     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1640     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1641 
1642     /// Creates target global address or external symbol nodes for calls or
1643     /// other uses.
1644     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1645                                   bool ForCall) const;
1646 
1647     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1648     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1649     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1650     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1651     SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1652     SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1653     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1654     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1655     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1656     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1657     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1658     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1659     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1660     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1661     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1662     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1663     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1664     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1665     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1666     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1667     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1668     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1669     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1670     SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1671     SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1672     SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1673     SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1674     SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const;
1675     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1676     SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
1677                                     SDValue &Chain) const;
1678     SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1679     SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1680     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1681     SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1682     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1683     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1684     SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
1685 
1686     SDValue
1687     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1688                          const SmallVectorImpl<ISD::InputArg> &Ins,
1689                          const SDLoc &dl, SelectionDAG &DAG,
1690                          SmallVectorImpl<SDValue> &InVals) const override;
1691     SDValue LowerCall(CallLoweringInfo &CLI,
1692                       SmallVectorImpl<SDValue> &InVals) const override;
1693 
1694     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1695                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1696                         const SmallVectorImpl<SDValue> &OutVals,
1697                         const SDLoc &dl, SelectionDAG &DAG) const override;
1698 
1699     bool supportSplitCSR(MachineFunction *MF) const override {
1700       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1701           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1702     }
1703     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1704     void insertCopiesSplitCSR(
1705       MachineBasicBlock *Entry,
1706       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1707 
1708     bool splitValueIntoRegisterParts(
1709         SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1710         unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC)
1711         const override;
1712 
1713     SDValue joinRegisterPartsIntoValue(
1714         SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts,
1715         unsigned NumParts, MVT PartVT, EVT ValueVT,
1716         std::optional<CallingConv::ID> CC) const override;
1717 
1718     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1719 
1720     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1721 
1722     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1723                             ISD::NodeType ExtendKind) const override;
1724 
1725     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1726                         bool isVarArg,
1727                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1728                         LLVMContext &Context) const override;
1729 
1730     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1731     ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
1732 
1733     TargetLoweringBase::AtomicExpansionKind
1734     shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1735     TargetLoweringBase::AtomicExpansionKind
1736     shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1737     TargetLoweringBase::AtomicExpansionKind
1738     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1739     TargetLoweringBase::AtomicExpansionKind
1740     shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
1741     void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1742     void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1743 
1744     LoadInst *
1745     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1746 
1747     bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
1748     bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;
1749 
1750     bool needsCmpXchgNb(Type *MemType) const;
1751 
1752     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1753                                 MachineBasicBlock *DispatchBB, int FI) const;
1754 
1755     // Utility function to emit the low-level va_arg code for X86-64.
1756     MachineBasicBlock *
1757     EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
1758 
1759     /// Utility function to emit the xmm reg save portion of va_start.
1760     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1761                                                  MachineInstr &MI2,
1762                                                  MachineBasicBlock *BB) const;
1763 
1764     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1765                                          MachineBasicBlock *BB) const;
1766 
1767     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1768                                            MachineBasicBlock *BB) const;
1769 
1770     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1771                                             MachineBasicBlock *BB) const;
1772 
1773     MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1774                                                MachineBasicBlock *BB) const;
1775 
1776     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1777                                           MachineBasicBlock *BB) const;
1778 
1779     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1780                                           MachineBasicBlock *BB) const;
1781 
1782     MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1783                                                 MachineBasicBlock *BB) const;
1784 
1785     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1786                                         MachineBasicBlock *MBB) const;
1787 
1788     void emitSetJmpShadowStackFix(MachineInstr &MI,
1789                                   MachineBasicBlock *MBB) const;
1790 
1791     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1792                                          MachineBasicBlock *MBB) const;
1793 
1794     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1795                                                  MachineBasicBlock *MBB) const;
1796 
1797     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1798                                              MachineBasicBlock *MBB) const;
1799 
1800     /// Emit flags for the given setcc condition and operands. Also returns the
1801     /// corresponding X86 condition code constant in X86CC.
1802     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1803                               const SDLoc &dl, SelectionDAG &DAG,
1804                               SDValue &X86CC) const;
1805 
1806     /// Check if replacement of SQRT with RSQRT should be disabled.
1807     bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1808 
1809     /// Use rsqrt* to speed up sqrt calculations.
1810     SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1811                             int &RefinementSteps, bool &UseOneConstNR,
1812                             bool Reciprocal) const override;
1813 
1814     /// Use rcp* to speed up fdiv calculations.
1815     SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1816                              int &RefinementSteps) const override;
1817 
1818     /// Reassociate floating point divisions into multiply by reciprocal.
1819     unsigned combineRepeatedFPDivisors() const override;
1820 
1821     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1822                           SmallVectorImpl<SDNode *> &Created) const override;
1823   };
1824 
1825   namespace X86 {
1826     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1827                              const TargetLibraryInfo *libInfo);
1828   } // end namespace X86
1829 
1830   // X86 specific Gather/Scatter nodes.
1831   // The class has the same order of operands as MaskedGatherScatterSDNode for
1832   // convenience.
1833   class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1834   public:
1835     // This is a intended as a utility and should never be directly created.
1836     X86MaskedGatherScatterSDNode() = delete;
1837     ~X86MaskedGatherScatterSDNode() = delete;
1838 
1839     const SDValue &getBasePtr() const { return getOperand(3); }
1840     const SDValue &getIndex()   const { return getOperand(4); }
1841     const SDValue &getMask()    const { return getOperand(2); }
1842     const SDValue &getScale()   const { return getOperand(5); }
1843 
1844     static bool classof(const SDNode *N) {
1845       return N->getOpcode() == X86ISD::MGATHER ||
1846              N->getOpcode() == X86ISD::MSCATTER;
1847     }
1848   };
1849 
1850   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1851   public:
1852     const SDValue &getPassThru() const { return getOperand(1); }
1853 
1854     static bool classof(const SDNode *N) {
1855       return N->getOpcode() == X86ISD::MGATHER;
1856     }
1857   };
1858 
1859   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1860   public:
1861     const SDValue &getValue() const { return getOperand(1); }
1862 
1863     static bool classof(const SDNode *N) {
1864       return N->getOpcode() == X86ISD::MSCATTER;
1865     }
1866   };
1867 
1868   /// Generate unpacklo/unpackhi shuffle mask.
1869   void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1870                                bool Unary);
1871 
1872   /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1873   /// imposed by AVX and specific to the unary pattern. Example:
1874   /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1875   /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1876   void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1877 
1878 } // end namespace llvm
1879 
1880 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1881