1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that X86 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16 
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/TargetLowering.h"
19 
20 namespace llvm {
21   class X86Subtarget;
22   class X86TargetMachine;
23 
24   namespace X86ISD {
25     // X86 Specific DAG Nodes
26   enum NodeType : unsigned {
27     // Start the numbering where the builtin ops leave off.
28     FIRST_NUMBER = ISD::BUILTIN_OP_END,
29 
30     /// Bit scan forward.
31     BSF,
32     /// Bit scan reverse.
33     BSR,
34 
35     /// X86 funnel/double shift i16 instructions. These correspond to
36     /// X86::SHLDW and X86::SHRDW instructions which have different amt
37     /// modulo rules to generic funnel shifts.
38     /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
39     FSHL,
40     FSHR,
41 
42     /// Bitwise logical AND of floating point values. This corresponds
43     /// to X86::ANDPS or X86::ANDPD.
44     FAND,
45 
46     /// Bitwise logical OR of floating point values. This corresponds
47     /// to X86::ORPS or X86::ORPD.
48     FOR,
49 
50     /// Bitwise logical XOR of floating point values. This corresponds
51     /// to X86::XORPS or X86::XORPD.
52     FXOR,
53 
54     ///  Bitwise logical ANDNOT of floating point values. This
55     /// corresponds to X86::ANDNPS or X86::ANDNPD.
56     FANDN,
57 
58     /// These operations represent an abstract X86 call
59     /// instruction, which includes a bunch of information.  In particular the
60     /// operands of these node are:
61     ///
62     ///     #0 - The incoming token chain
63     ///     #1 - The callee
64     ///     #2 - The number of arg bytes the caller pushes on the stack.
65     ///     #3 - The number of arg bytes the callee pops off the stack.
66     ///     #4 - The value to pass in AL/AX/EAX (optional)
67     ///     #5 - The value to pass in DL/DX/EDX (optional)
68     ///
69     /// The result values of these nodes are:
70     ///
71     ///     #0 - The outgoing token chain
72     ///     #1 - The first register result value (optional)
73     ///     #2 - The second register result value (optional)
74     ///
75     CALL,
76 
77     /// Same as call except it adds the NoTrack prefix.
78     NT_CALL,
79 
80     // Pseudo for a OBJC call that gets emitted together with a special
81     // marker instruction.
82     CALL_RVMARKER,
83 
84     /// X86 compare and logical compare instructions.
85     CMP,
86     FCMP,
87     COMI,
88     UCOMI,
89 
90     /// X86 bit-test instructions.
91     BT,
92 
93     /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
94     /// operand, usually produced by a CMP instruction.
95     SETCC,
96 
97     /// X86 Select
98     SELECTS,
99 
100     // Same as SETCC except it's materialized with a sbb and the value is all
101     // one's or all zero's.
102     SETCC_CARRY, // R = carry_bit ? ~0 : 0
103 
104     /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
105     /// Operands are two FP values to compare; result is a mask of
106     /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
107     FSETCC,
108 
109     /// X86 FP SETCC, similar to above, but with output as an i1 mask and
110     /// and a version with SAE.
111     FSETCCM,
112     FSETCCM_SAE,
113 
114     /// X86 conditional moves. Operand 0 and operand 1 are the two values
115     /// to select from. Operand 2 is the condition code, and operand 3 is the
116     /// flag operand produced by a CMP or TEST instruction.
117     CMOV,
118 
119     /// X86 conditional branches. Operand 0 is the chain operand, operand 1
120     /// is the block to branch if condition is true, operand 2 is the
121     /// condition code, and operand 3 is the flag operand produced by a CMP
122     /// or TEST instruction.
123     BRCOND,
124 
125     /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
126     /// operand 1 is the target address.
127     NT_BRIND,
128 
129     /// Return with a glue operand. Operand 0 is the chain operand, operand
130     /// 1 is the number of bytes of stack to pop.
131     RET_GLUE,
132 
133     /// Return from interrupt. Operand 0 is the number of bytes to pop.
134     IRET,
135 
136     /// Repeat fill, corresponds to X86::REP_STOSx.
137     REP_STOS,
138 
139     /// Repeat move, corresponds to X86::REP_MOVSx.
140     REP_MOVS,
141 
142     /// On Darwin, this node represents the result of the popl
143     /// at function entry, used for PIC code.
144     GlobalBaseReg,
145 
146     /// A wrapper node for TargetConstantPool, TargetJumpTable,
147     /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
148     /// MCSymbol and TargetBlockAddress.
149     Wrapper,
150 
151     /// Special wrapper used under X86-64 PIC mode for RIP
152     /// relative displacements.
153     WrapperRIP,
154 
155     /// Copies a 64-bit value from an MMX vector to the low word
156     /// of an XMM vector, with the high word zero filled.
157     MOVQ2DQ,
158 
159     /// Copies a 64-bit value from the low word of an XMM vector
160     /// to an MMX vector.
161     MOVDQ2Q,
162 
163     /// Copies a 32-bit value from the low word of a MMX
164     /// vector to a GPR.
165     MMX_MOVD2W,
166 
167     /// Copies a GPR into the low 32-bit word of a MMX vector
168     /// and zero out the high word.
169     MMX_MOVW2D,
170 
171     /// Extract an 8-bit value from a vector and zero extend it to
172     /// i32, corresponds to X86::PEXTRB.
173     PEXTRB,
174 
175     /// Extract a 16-bit value from a vector and zero extend it to
176     /// i32, corresponds to X86::PEXTRW.
177     PEXTRW,
178 
179     /// Insert any element of a 4 x float vector into any element
180     /// of a destination 4 x floatvector.
181     INSERTPS,
182 
183     /// Insert the lower 8-bits of a 32-bit value to a vector,
184     /// corresponds to X86::PINSRB.
185     PINSRB,
186 
187     /// Insert the lower 16-bits of a 32-bit value to a vector,
188     /// corresponds to X86::PINSRW.
189     PINSRW,
190 
191     /// Shuffle 16 8-bit values within a vector.
192     PSHUFB,
193 
194     /// Compute Sum of Absolute Differences.
195     PSADBW,
196     /// Compute Double Block Packed Sum-Absolute-Differences
197     DBPSADBW,
198 
199     /// Bitwise Logical AND NOT of Packed FP values.
200     ANDNP,
201 
202     /// Blend where the selector is an immediate.
203     BLENDI,
204 
205     /// Dynamic (non-constant condition) vector blend where only the sign bits
206     /// of the condition elements are used. This is used to enforce that the
207     /// condition mask is not valid for generic VSELECT optimizations. This
208     /// is also used to implement the intrinsics.
209     /// Operands are in VSELECT order: MASK, TRUE, FALSE
210     BLENDV,
211 
212     /// Combined add and sub on an FP vector.
213     ADDSUB,
214 
215     //  FP vector ops with rounding mode.
216     FADD_RND,
217     FADDS,
218     FADDS_RND,
219     FSUB_RND,
220     FSUBS,
221     FSUBS_RND,
222     FMUL_RND,
223     FMULS,
224     FMULS_RND,
225     FDIV_RND,
226     FDIVS,
227     FDIVS_RND,
228     FMAX_SAE,
229     FMAXS_SAE,
230     FMIN_SAE,
231     FMINS_SAE,
232     FSQRT_RND,
233     FSQRTS,
234     FSQRTS_RND,
235 
236     // FP vector get exponent.
237     FGETEXP,
238     FGETEXP_SAE,
239     FGETEXPS,
240     FGETEXPS_SAE,
241     // Extract Normalized Mantissas.
242     VGETMANT,
243     VGETMANT_SAE,
244     VGETMANTS,
245     VGETMANTS_SAE,
246     // FP Scale.
247     SCALEF,
248     SCALEF_RND,
249     SCALEFS,
250     SCALEFS_RND,
251 
252     /// Integer horizontal add/sub.
253     HADD,
254     HSUB,
255 
256     /// Floating point horizontal add/sub.
257     FHADD,
258     FHSUB,
259 
260     // Detect Conflicts Within a Vector
261     CONFLICT,
262 
263     /// Floating point max and min.
264     FMAX,
265     FMIN,
266 
267     /// Commutative FMIN and FMAX.
268     FMAXC,
269     FMINC,
270 
271     /// Scalar intrinsic floating point max and min.
272     FMAXS,
273     FMINS,
274 
275     /// Floating point reciprocal-sqrt and reciprocal approximation.
276     /// Note that these typically require refinement
277     /// in order to obtain suitable precision.
278     FRSQRT,
279     FRCP,
280 
281     // AVX-512 reciprocal approximations with a little more precision.
282     RSQRT14,
283     RSQRT14S,
284     RCP14,
285     RCP14S,
286 
287     // Thread Local Storage.
288     TLSADDR,
289 
290     // Thread Local Storage. A call to get the start address
291     // of the TLS block for the current module.
292     TLSBASEADDR,
293 
294     // Thread Local Storage.  When calling to an OS provided
295     // thunk at the address from an earlier relocation.
296     TLSCALL,
297 
298     // Exception Handling helpers.
299     EH_RETURN,
300 
301     // SjLj exception handling setjmp.
302     EH_SJLJ_SETJMP,
303 
304     // SjLj exception handling longjmp.
305     EH_SJLJ_LONGJMP,
306 
307     // SjLj exception handling dispatch.
308     EH_SJLJ_SETUP_DISPATCH,
309 
310     /// Tail call return. See X86TargetLowering::LowerCall for
311     /// the list of operands.
312     TC_RETURN,
313 
314     // Vector move to low scalar and zero higher vector elements.
315     VZEXT_MOVL,
316 
317     // Vector integer truncate.
318     VTRUNC,
319     // Vector integer truncate with unsigned/signed saturation.
320     VTRUNCUS,
321     VTRUNCS,
322 
323     // Masked version of the above. Used when less than a 128-bit result is
324     // produced since the mask only applies to the lower elements and can't
325     // be represented by a select.
326     // SRC, PASSTHRU, MASK
327     VMTRUNC,
328     VMTRUNCUS,
329     VMTRUNCS,
330 
331     // Vector FP extend.
332     VFPEXT,
333     VFPEXT_SAE,
334     VFPEXTS,
335     VFPEXTS_SAE,
336 
337     // Vector FP round.
338     VFPROUND,
339     VFPROUND_RND,
340     VFPROUNDS,
341     VFPROUNDS_RND,
342 
343     // Masked version of above. Used for v2f64->v4f32.
344     // SRC, PASSTHRU, MASK
345     VMFPROUND,
346 
347     // 128-bit vector logical left / right shift
348     VSHLDQ,
349     VSRLDQ,
350 
351     // Vector shift elements
352     VSHL,
353     VSRL,
354     VSRA,
355 
356     // Vector variable shift
357     VSHLV,
358     VSRLV,
359     VSRAV,
360 
361     // Vector shift elements by immediate
362     VSHLI,
363     VSRLI,
364     VSRAI,
365 
366     // Shifts of mask registers.
367     KSHIFTL,
368     KSHIFTR,
369 
370     // Bit rotate by immediate
371     VROTLI,
372     VROTRI,
373 
374     // Vector packed double/float comparison.
375     CMPP,
376 
377     // Vector integer comparisons.
378     PCMPEQ,
379     PCMPGT,
380 
381     // v8i16 Horizontal minimum and position.
382     PHMINPOS,
383 
384     MULTISHIFT,
385 
386     /// Vector comparison generating mask bits for fp and
387     /// integer signed and unsigned data types.
388     CMPM,
389     // Vector mask comparison generating mask bits for FP values.
390     CMPMM,
391     // Vector mask comparison with SAE for FP values.
392     CMPMM_SAE,
393 
394     // Arithmetic operations with FLAGS results.
395     ADD,
396     SUB,
397     ADC,
398     SBB,
399     SMUL,
400     UMUL,
401     OR,
402     XOR,
403     AND,
404 
405     // Bit field extract.
406     BEXTR,
407     BEXTRI,
408 
409     // Zero High Bits Starting with Specified Bit Position.
410     BZHI,
411 
412     // Parallel extract and deposit.
413     PDEP,
414     PEXT,
415 
416     // X86-specific multiply by immediate.
417     MUL_IMM,
418 
419     // Vector sign bit extraction.
420     MOVMSK,
421 
422     // Vector bitwise comparisons.
423     PTEST,
424 
425     // Vector packed fp sign bitwise comparisons.
426     TESTP,
427 
428     // OR/AND test for masks.
429     KORTEST,
430     KTEST,
431 
432     // ADD for masks.
433     KADD,
434 
435     // Several flavors of instructions with vector shuffle behaviors.
436     // Saturated signed/unnsigned packing.
437     PACKSS,
438     PACKUS,
439     // Intra-lane alignr.
440     PALIGNR,
441     // AVX512 inter-lane alignr.
442     VALIGN,
443     PSHUFD,
444     PSHUFHW,
445     PSHUFLW,
446     SHUFP,
447     // VBMI2 Concat & Shift.
448     VSHLD,
449     VSHRD,
450     VSHLDV,
451     VSHRDV,
452     // Shuffle Packed Values at 128-bit granularity.
453     SHUF128,
454     MOVDDUP,
455     MOVSHDUP,
456     MOVSLDUP,
457     MOVLHPS,
458     MOVHLPS,
459     MOVSD,
460     MOVSS,
461     MOVSH,
462     UNPCKL,
463     UNPCKH,
464     VPERMILPV,
465     VPERMILPI,
466     VPERMI,
467     VPERM2X128,
468 
469     // Variable Permute (VPERM).
470     // Res = VPERMV MaskV, V0
471     VPERMV,
472 
473     // 3-op Variable Permute (VPERMT2).
474     // Res = VPERMV3 V0, MaskV, V1
475     VPERMV3,
476 
477     // Bitwise ternary logic.
478     VPTERNLOG,
479     // Fix Up Special Packed Float32/64 values.
480     VFIXUPIMM,
481     VFIXUPIMM_SAE,
482     VFIXUPIMMS,
483     VFIXUPIMMS_SAE,
484     // Range Restriction Calculation For Packed Pairs of Float32/64 values.
485     VRANGE,
486     VRANGE_SAE,
487     VRANGES,
488     VRANGES_SAE,
489     // Reduce - Perform Reduction Transformation on scalar\packed FP.
490     VREDUCE,
491     VREDUCE_SAE,
492     VREDUCES,
493     VREDUCES_SAE,
494     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
495     // Also used by the legacy (V)ROUND intrinsics where we mask out the
496     // scaling part of the immediate.
497     VRNDSCALE,
498     VRNDSCALE_SAE,
499     VRNDSCALES,
500     VRNDSCALES_SAE,
501     // Tests Types Of a FP Values for packed types.
502     VFPCLASS,
503     // Tests Types Of a FP Values for scalar types.
504     VFPCLASSS,
505 
506     // Broadcast (splat) scalar or element 0 of a vector. If the operand is
507     // a vector, this node may change the vector length as part of the splat.
508     VBROADCAST,
509     // Broadcast mask to vector.
510     VBROADCASTM,
511 
512     /// SSE4A Extraction and Insertion.
513     EXTRQI,
514     INSERTQI,
515 
516     // XOP arithmetic/logical shifts.
517     VPSHA,
518     VPSHL,
519     // XOP signed/unsigned integer comparisons.
520     VPCOM,
521     VPCOMU,
522     // XOP packed permute bytes.
523     VPPERM,
524     // XOP two source permutation.
525     VPERMIL2,
526 
527     // Vector multiply packed unsigned doubleword integers.
528     PMULUDQ,
529     // Vector multiply packed signed doubleword integers.
530     PMULDQ,
531     // Vector Multiply Packed UnsignedIntegers with Round and Scale.
532     MULHRS,
533 
534     // Multiply and Add Packed Integers.
535     VPMADDUBSW,
536     VPMADDWD,
537 
538     // AVX512IFMA multiply and add.
539     // NOTE: These are different than the instruction and perform
540     // op0 x op1 + op2.
541     VPMADD52L,
542     VPMADD52H,
543 
544     // VNNI
545     VPDPBUSD,
546     VPDPBUSDS,
547     VPDPWSSD,
548     VPDPWSSDS,
549 
550     // FMA nodes.
551     // We use the target independent ISD::FMA for the non-inverted case.
552     FNMADD,
553     FMSUB,
554     FNMSUB,
555     FMADDSUB,
556     FMSUBADD,
557 
558     // FMA with rounding mode.
559     FMADD_RND,
560     FNMADD_RND,
561     FMSUB_RND,
562     FNMSUB_RND,
563     FMADDSUB_RND,
564     FMSUBADD_RND,
565 
566     // AVX512-FP16 complex addition and multiplication.
567     VFMADDC,
568     VFMADDC_RND,
569     VFCMADDC,
570     VFCMADDC_RND,
571 
572     VFMULC,
573     VFMULC_RND,
574     VFCMULC,
575     VFCMULC_RND,
576 
577     VFMADDCSH,
578     VFMADDCSH_RND,
579     VFCMADDCSH,
580     VFCMADDCSH_RND,
581 
582     VFMULCSH,
583     VFMULCSH_RND,
584     VFCMULCSH,
585     VFCMULCSH_RND,
586 
587     VPDPBSUD,
588     VPDPBSUDS,
589     VPDPBUUD,
590     VPDPBUUDS,
591     VPDPBSSD,
592     VPDPBSSDS,
593 
594     // Compress and expand.
595     COMPRESS,
596     EXPAND,
597 
598     // Bits shuffle
599     VPSHUFBITQMB,
600 
601     // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
602     SINT_TO_FP_RND,
603     UINT_TO_FP_RND,
604     SCALAR_SINT_TO_FP,
605     SCALAR_UINT_TO_FP,
606     SCALAR_SINT_TO_FP_RND,
607     SCALAR_UINT_TO_FP_RND,
608 
609     // Vector float/double to signed/unsigned integer.
610     CVTP2SI,
611     CVTP2UI,
612     CVTP2SI_RND,
613     CVTP2UI_RND,
614     // Scalar float/double to signed/unsigned integer.
615     CVTS2SI,
616     CVTS2UI,
617     CVTS2SI_RND,
618     CVTS2UI_RND,
619 
620     // Vector float/double to signed/unsigned integer with truncation.
621     CVTTP2SI,
622     CVTTP2UI,
623     CVTTP2SI_SAE,
624     CVTTP2UI_SAE,
625     // Scalar float/double to signed/unsigned integer with truncation.
626     CVTTS2SI,
627     CVTTS2UI,
628     CVTTS2SI_SAE,
629     CVTTS2UI_SAE,
630 
631     // Vector signed/unsigned integer to float/double.
632     CVTSI2P,
633     CVTUI2P,
634 
635     // Masked versions of above. Used for v2f64->v4f32.
636     // SRC, PASSTHRU, MASK
637     MCVTP2SI,
638     MCVTP2UI,
639     MCVTTP2SI,
640     MCVTTP2UI,
641     MCVTSI2P,
642     MCVTUI2P,
643 
644     // Vector float to bfloat16.
645     // Convert TWO packed single data to one packed BF16 data
646     CVTNE2PS2BF16,
647     // Convert packed single data to packed BF16 data
648     CVTNEPS2BF16,
649     // Masked version of above.
650     // SRC, PASSTHRU, MASK
651     MCVTNEPS2BF16,
652 
653     // Dot product of BF16 pairs to accumulated into
654     // packed single precision.
655     DPBF16PS,
656 
657     // A stack checking function call. On Windows it's _chkstk call.
658     DYN_ALLOCA,
659 
660     // For allocating variable amounts of stack space when using
661     // segmented stacks. Check if the current stacklet has enough space, and
662     // falls back to heap allocation if not.
663     SEG_ALLOCA,
664 
665     // For allocating stack space when using stack clash protector.
666     // Allocation is performed by block, and each block is probed.
667     PROBED_ALLOCA,
668 
669     // Memory barriers.
670     MFENCE,
671 
672     // Get a random integer and indicate whether it is valid in CF.
673     RDRAND,
674 
675     // Get a NIST SP800-90B & C compliant random integer and
676     // indicate whether it is valid in CF.
677     RDSEED,
678 
679     // Protection keys
680     // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
681     // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
682     // value for ECX.
683     RDPKRU,
684     WRPKRU,
685 
686     // SSE42 string comparisons.
687     // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
688     // will emit one or two instructions based on which results are used. If
689     // flags and index/mask this allows us to use a single instruction since
690     // we won't have to pick and opcode for flags. Instead we can rely on the
691     // DAG to CSE everything and decide at isel.
692     PCMPISTR,
693     PCMPESTR,
694 
695     // Test if in transactional execution.
696     XTEST,
697 
698     // ERI instructions.
699     RSQRT28,
700     RSQRT28_SAE,
701     RSQRT28S,
702     RSQRT28S_SAE,
703     RCP28,
704     RCP28_SAE,
705     RCP28S,
706     RCP28S_SAE,
707     EXP2,
708     EXP2_SAE,
709 
710     // Conversions between float and half-float.
711     CVTPS2PH,
712     CVTPS2PH_SAE,
713     CVTPH2PS,
714     CVTPH2PS_SAE,
715 
716     // Masked version of above.
717     // SRC, RND, PASSTHRU, MASK
718     MCVTPS2PH,
719     MCVTPS2PH_SAE,
720 
721     // Galois Field Arithmetic Instructions
722     GF2P8AFFINEINVQB,
723     GF2P8AFFINEQB,
724     GF2P8MULB,
725 
726     // LWP insert record.
727     LWPINS,
728 
729     // User level wait
730     UMWAIT,
731     TPAUSE,
732 
733     // Enqueue Stores Instructions
734     ENQCMD,
735     ENQCMDS,
736 
737     // For avx512-vp2intersect
738     VP2INTERSECT,
739 
740     // User level interrupts - testui
741     TESTUI,
742 
743     // Perform an FP80 add after changing precision control in FPCW.
744     FP80_ADD,
745 
746     /// X86 strict FP compare instructions.
747     STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
748     STRICT_FCMPS,
749 
750     // Vector packed double/float comparison.
751     STRICT_CMPP,
752 
753     /// Vector comparison generating mask bits for fp and
754     /// integer signed and unsigned data types.
755     STRICT_CMPM,
756 
757     // Vector float/double to signed/unsigned integer with truncation.
758     STRICT_CVTTP2SI,
759     STRICT_CVTTP2UI,
760 
761     // Vector FP extend.
762     STRICT_VFPEXT,
763 
764     // Vector FP round.
765     STRICT_VFPROUND,
766 
767     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
768     // Also used by the legacy (V)ROUND intrinsics where we mask out the
769     // scaling part of the immediate.
770     STRICT_VRNDSCALE,
771 
772     // Vector signed/unsigned integer to float/double.
773     STRICT_CVTSI2P,
774     STRICT_CVTUI2P,
775 
776     // Strict FMA nodes.
777     STRICT_FNMADD,
778     STRICT_FMSUB,
779     STRICT_FNMSUB,
780 
781     // Conversions between float and half-float.
782     STRICT_CVTPS2PH,
783     STRICT_CVTPH2PS,
784 
785     // Perform an FP80 add after changing precision control in FPCW.
786     STRICT_FP80_ADD,
787 
788     // WARNING: Only add nodes here if they are strict FP nodes. Non-memory and
789     // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
790 
791     // Compare and swap.
792     LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
793     LCMPXCHG8_DAG,
794     LCMPXCHG16_DAG,
795     LCMPXCHG16_SAVE_RBX_DAG,
796 
797     /// LOCK-prefixed arithmetic read-modify-write instructions.
798     /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
799     LADD,
800     LSUB,
801     LOR,
802     LXOR,
803     LAND,
804     LBTS,
805     LBTC,
806     LBTR,
807     LBTS_RM,
808     LBTC_RM,
809     LBTR_RM,
810 
811     /// RAO arithmetic instructions.
812     /// OUTCHAIN = AADD(INCHAIN, PTR, RHS)
813     AADD,
814     AOR,
815     AXOR,
816     AAND,
817 
818     // Load, scalar_to_vector, and zero extend.
819     VZEXT_LOAD,
820 
821     // extract_vector_elt, store.
822     VEXTRACT_STORE,
823 
824     // scalar broadcast from memory.
825     VBROADCAST_LOAD,
826 
827     // subvector broadcast from memory.
828     SUBV_BROADCAST_LOAD,
829 
830     // Store FP control word into i16 memory.
831     FNSTCW16m,
832 
833     // Load FP control word from i16 memory.
834     FLDCW16m,
835 
836     // Store x87 FPU environment into memory.
837     FNSTENVm,
838 
839     // Load x87 FPU environment from memory.
840     FLDENVm,
841 
842     /// This instruction implements FP_TO_SINT with the
843     /// integer destination in memory and a FP reg source.  This corresponds
844     /// to the X86::FIST*m instructions and the rounding mode change stuff. It
845     /// has two inputs (token chain and address) and two outputs (int value
846     /// and token chain). Memory VT specifies the type to store to.
847     FP_TO_INT_IN_MEM,
848 
849     /// This instruction implements SINT_TO_FP with the
850     /// integer source in memory and FP reg result.  This corresponds to the
851     /// X86::FILD*m instructions. It has two inputs (token chain and address)
852     /// and two outputs (FP value and token chain). The integer source type is
853     /// specified by the memory VT.
854     FILD,
855 
856     /// This instruction implements a fp->int store from FP stack
857     /// slots. This corresponds to the fist instruction. It takes a
858     /// chain operand, value to store, address, and glue. The memory VT
859     /// specifies the type to store as.
860     FIST,
861 
862     /// This instruction implements an extending load to FP stack slots.
863     /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
864     /// operand, and ptr to load from. The memory VT specifies the type to
865     /// load from.
866     FLD,
867 
868     /// This instruction implements a truncating store from FP stack
869     /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
870     /// chain operand, value to store, address, and glue. The memory VT
871     /// specifies the type to store as.
872     FST,
873 
874     /// These instructions grab the address of the next argument
875     /// from a va_list. (reads and modifies the va_list in memory)
876     VAARG_64,
877     VAARG_X32,
878 
879     // Vector truncating store with unsigned/signed saturation
880     VTRUNCSTOREUS,
881     VTRUNCSTORES,
882     // Vector truncating masked store with unsigned/signed saturation
883     VMTRUNCSTOREUS,
884     VMTRUNCSTORES,
885 
886     // X86 specific gather and scatter
887     MGATHER,
888     MSCATTER,
889 
890     // Key locker nodes that produce flags.
891     AESENC128KL,
892     AESDEC128KL,
893     AESENC256KL,
894     AESDEC256KL,
895     AESENCWIDE128KL,
896     AESDECWIDE128KL,
897     AESENCWIDE256KL,
898     AESDECWIDE256KL,
899 
900     /// Compare and Add if Condition is Met. Compare value in operand 2 with
901     /// value in memory of operand 1. If condition of operand 4 is met, add
902     /// value operand 3 to m32 and write new value in operand 1. Operand 2 is
903     /// always updated with the original value from operand 1.
904     CMPCCXADD,
905 
906     // Save xmm argument registers to the stack, according to %al. An operator
907     // is needed so that this can be expanded with control flow.
908     VASTART_SAVE_XMM_REGS,
909 
910     // WARNING: Do not add anything in the end unless you want the node to
911     // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
912     // opcodes will be thought as target memory ops!
913   };
914   } // end namespace X86ISD
915 
916   namespace X86 {
917     /// Current rounding mode is represented in bits 11:10 of FPSR. These
918     /// values are same as corresponding constants for rounding mode used
919     /// in glibc.
920     enum RoundingMode {
921       rmToNearest   = 0,        // FE_TONEAREST
922       rmDownward    = 1 << 10,  // FE_DOWNWARD
923       rmUpward      = 2 << 10,  // FE_UPWARD
924       rmTowardZero  = 3 << 10,  // FE_TOWARDZERO
925       rmMask        = 3 << 10   // Bit mask selecting rounding mode
926     };
927   }
928 
929   /// Define some predicates that are used for node matching.
930   namespace X86 {
931     /// Returns true if Elt is a constant zero or floating point constant +0.0.
932     bool isZeroNode(SDValue Elt);
933 
934     /// Returns true of the given offset can be
935     /// fit into displacement field of the instruction.
936     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
937                                       bool hasSymbolicDisplacement);
938 
939     /// Determines whether the callee is required to pop its
940     /// own arguments. Callee pop is necessary to support tail calls.
941     bool isCalleePop(CallingConv::ID CallingConv,
942                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
943 
944     /// If Op is a constant whose elements are all the same constant or
945     /// undefined, return true and return the constant value in \p SplatVal.
946     /// If we have undef bits that don't cover an entire element, we treat these
947     /// as zero if AllowPartialUndefs is set, else we fail and return false.
948     bool isConstantSplat(SDValue Op, APInt &SplatVal,
949                          bool AllowPartialUndefs = true);
950 
951     /// Check if Op is a load operation that could be folded into some other x86
952     /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
953     bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
954                      bool AssumeSingleUse = false);
955 
956     /// Check if Op is a load operation that could be folded into a vector splat
957     /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
958     bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
959                                          const X86Subtarget &Subtarget,
960                                          bool AssumeSingleUse = false);
961 
962     /// Check if Op is a value that could be used to fold a store into some
963     /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
964     bool mayFoldIntoStore(SDValue Op);
965 
966     /// Check if Op is an operation that could be folded into a zero extend x86
967     /// instruction.
968     bool mayFoldIntoZeroExtend(SDValue Op);
969   } // end namespace X86
970 
971   //===--------------------------------------------------------------------===//
972   //  X86 Implementation of the TargetLowering interface
973   class X86TargetLowering final : public TargetLowering {
974   public:
975     explicit X86TargetLowering(const X86TargetMachine &TM,
976                                const X86Subtarget &STI);
977 
978     unsigned getJumpTableEncoding() const override;
979     bool useSoftFloat() const override;
980 
981     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
982                                ArgListTy &Args) const override;
983 
getScalarShiftAmountTy(const DataLayout &,EVT VT)984     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
985       return MVT::i8;
986     }
987 
988     const MCExpr *
989     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
990                               const MachineBasicBlock *MBB, unsigned uid,
991                               MCContext &Ctx) const override;
992 
993     /// Returns relocation base for the given PIC jumptable.
994     SDValue getPICJumpTableRelocBase(SDValue Table,
995                                      SelectionDAG &DAG) const override;
996     const MCExpr *
997     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
998                                  unsigned JTI, MCContext &Ctx) const override;
999 
1000     /// Return the desired alignment for ByVal aggregate
1001     /// function arguments in the caller parameter area. For X86, aggregates
1002     /// that contains are placed at 16-byte boundaries while the rest are at
1003     /// 4-byte boundaries.
1004     uint64_t getByValTypeAlignment(Type *Ty,
1005                                    const DataLayout &DL) const override;
1006 
1007     EVT getOptimalMemOpType(const MemOp &Op,
1008                             const AttributeList &FuncAttributes) const override;
1009 
1010     /// Returns true if it's safe to use load / store of the
1011     /// specified type to expand memcpy / memset inline. This is mostly true
1012     /// for all types except for some special cases. For example, on X86
1013     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
1014     /// also does type conversion. Note the specified type doesn't have to be
1015     /// legal as the hook is used before type legalization.
1016     bool isSafeMemOpType(MVT VT) const override;
1017 
1018     bool isMemoryAccessFast(EVT VT, Align Alignment) const;
1019 
1020     /// Returns true if the target allows unaligned memory accesses of the
1021     /// specified type. Returns whether it is "fast" in the last argument.
1022     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
1023                                         MachineMemOperand::Flags Flags,
1024                                         unsigned *Fast) const override;
1025 
1026     /// This function returns true if the memory access is aligned or if the
1027     /// target allows this specific unaligned memory access. If the access is
1028     /// allowed, the optional final parameter returns a relative speed of the
1029     /// access (as defined by the target).
1030     bool allowsMemoryAccess(
1031         LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
1032         Align Alignment,
1033         MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1034         unsigned *Fast = nullptr) const override;
1035 
allowsMemoryAccess(LLVMContext & Context,const DataLayout & DL,EVT VT,const MachineMemOperand & MMO,unsigned * Fast)1036     bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1037                             const MachineMemOperand &MMO,
1038                             unsigned *Fast) const {
1039       return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(),
1040                                 MMO.getAlign(), MMO.getFlags(), Fast);
1041     }
1042 
1043     /// Provide custom lowering hooks for some operations.
1044     ///
1045     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
1046 
1047     /// Replace the results of node with an illegal result
1048     /// type with new values built out of custom code.
1049     ///
1050     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
1051                             SelectionDAG &DAG) const override;
1052 
1053     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
1054 
1055     bool preferABDSToABSWithNSW(EVT VT) const override;
1056 
1057     bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT,
1058                                    EVT ExtVT) const override;
1059 
1060     bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond,
1061                                            EVT VT) const override;
1062 
1063     /// Return true if the target has native support for
1064     /// the specified value type and it is 'desirable' to use the type for the
1065     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
1066     /// instruction encodings are longer and some i16 instructions are slow.
1067     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
1068 
1069     /// Return true if the target has native support for the
1070     /// specified value type and it is 'desirable' to use the type. e.g. On x86
1071     /// i16 is legal, but undesirable since i16 instruction encodings are longer
1072     /// and some i16 instructions are slow.
1073     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
1074 
1075     /// Return prefered fold type, Abs if this is a vector, AddAnd if its an
1076     /// integer, None otherwise.
1077     TargetLowering::AndOrSETCCFoldKind
1078     isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
1079                                        const SDNode *SETCC0,
1080                                        const SDNode *SETCC1) const override;
1081 
1082     /// Return the newly negated expression if the cost is not expensive and
1083     /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
1084     /// do the negation.
1085     SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
1086                                  bool LegalOperations, bool ForCodeSize,
1087                                  NegatibleCost &Cost,
1088                                  unsigned Depth) const override;
1089 
1090     MachineBasicBlock *
1091     EmitInstrWithCustomInserter(MachineInstr &MI,
1092                                 MachineBasicBlock *MBB) const override;
1093 
1094     /// This method returns the name of a target specific DAG node.
1095     const char *getTargetNodeName(unsigned Opcode) const override;
1096 
1097     /// Do not merge vector stores after legalization because that may conflict
1098     /// with x86-specific store splitting optimizations.
mergeStoresAfterLegalization(EVT MemVT)1099     bool mergeStoresAfterLegalization(EVT MemVT) const override {
1100       return !MemVT.isVector();
1101     }
1102 
1103     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
1104                           const MachineFunction &MF) const override;
1105 
1106     bool isCheapToSpeculateCttz(Type *Ty) const override;
1107 
1108     bool isCheapToSpeculateCtlz(Type *Ty) const override;
1109 
1110     bool isCtlzFast() const override;
1111 
isMultiStoresCheaperThanBitsMerge(EVT LTy,EVT HTy)1112     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1113       // If the pair to store is a mixture of float and int values, we will
1114       // save two bitwise instructions and one float-to-int instruction and
1115       // increase one store instruction. There is potentially a more
1116       // significant benefit because it avoids the float->int domain switch
1117       // for input value. So It is more likely a win.
1118       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
1119           (LTy.isInteger() && HTy.isFloatingPoint()))
1120         return true;
1121       // If the pair only contains int values, we will save two bitwise
1122       // instructions and increase one store instruction (costing one more
1123       // store buffer). Since the benefit is more blurred so we leave
1124       // such pair out until we get testcase to prove it is a win.
1125       return false;
1126     }
1127 
1128     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1129 
1130     bool hasAndNotCompare(SDValue Y) const override;
1131 
1132     bool hasAndNot(SDValue Y) const override;
1133 
1134     bool hasBitTest(SDValue X, SDValue Y) const override;
1135 
1136     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1137         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1138         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1139         SelectionDAG &DAG) const override;
1140 
1141     unsigned preferedOpcodeForCmpEqPiecesOfOperand(
1142         EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
1143         const APInt &ShiftOrRotateAmt,
1144         const std::optional<APInt> &AndMask) const override;
1145 
1146     bool preferScalarizeSplat(SDNode *N) const override;
1147 
1148     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1149                                            CombineLevel Level) const override;
1150 
1151     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1152 
1153     bool
shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)1154     shouldTransformSignedTruncationCheck(EVT XVT,
1155                                          unsigned KeptBits) const override {
1156       // For vectors, we don't have a preference..
1157       if (XVT.isVector())
1158         return false;
1159 
1160       auto VTIsOk = [](EVT VT) -> bool {
1161         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1162                VT == MVT::i64;
1163       };
1164 
1165       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1166       // XVT will be larger than KeptBitsVT.
1167       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
1168       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1169     }
1170 
1171     ShiftLegalizationStrategy
1172     preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
1173                                        unsigned ExpansionFactor) const override;
1174 
1175     bool shouldSplatInsEltVarIndex(EVT VT) const override;
1176 
shouldConvertFpToSat(unsigned Op,EVT FPVT,EVT VT)1177     bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
1178       // Converting to sat variants holds little benefit on X86 as we will just
1179       // need to saturate the value back using fp arithmatic.
1180       return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT);
1181     }
1182 
convertSetCCLogicToBitwiseLogic(EVT VT)1183     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1184       return VT.isScalarInteger();
1185     }
1186 
1187     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1188     MVT hasFastEqualityCompare(unsigned NumBits) const override;
1189 
1190     /// Return the value type to use for ISD::SETCC.
1191     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1192                            EVT VT) const override;
1193 
1194     bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1195                                       const APInt &DemandedElts,
1196                                       TargetLoweringOpt &TLO) const override;
1197 
1198     /// Determine which of the bits specified in Mask are known to be either
1199     /// zero or one and return them in the KnownZero/KnownOne bitsets.
1200     void computeKnownBitsForTargetNode(const SDValue Op,
1201                                        KnownBits &Known,
1202                                        const APInt &DemandedElts,
1203                                        const SelectionDAG &DAG,
1204                                        unsigned Depth = 0) const override;
1205 
1206     /// Determine the number of bits in the operation that are sign bits.
1207     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1208                                              const APInt &DemandedElts,
1209                                              const SelectionDAG &DAG,
1210                                              unsigned Depth) const override;
1211 
1212     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1213                                                  const APInt &DemandedElts,
1214                                                  APInt &KnownUndef,
1215                                                  APInt &KnownZero,
1216                                                  TargetLoweringOpt &TLO,
1217                                                  unsigned Depth) const override;
1218 
1219     bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1220                                                     const APInt &DemandedElts,
1221                                                     unsigned MaskIndex,
1222                                                     TargetLoweringOpt &TLO,
1223                                                     unsigned Depth) const;
1224 
1225     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1226                                            const APInt &DemandedBits,
1227                                            const APInt &DemandedElts,
1228                                            KnownBits &Known,
1229                                            TargetLoweringOpt &TLO,
1230                                            unsigned Depth) const override;
1231 
1232     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1233         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1234         SelectionDAG &DAG, unsigned Depth) const override;
1235 
1236     bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(
1237         SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1238         bool PoisonOnly, unsigned Depth) const override;
1239 
1240     bool canCreateUndefOrPoisonForTargetNode(
1241         SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1242         bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override;
1243 
1244     bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
1245                                    APInt &UndefElts, const SelectionDAG &DAG,
1246                                    unsigned Depth) const override;
1247 
isTargetCanonicalConstantNode(SDValue Op)1248     bool isTargetCanonicalConstantNode(SDValue Op) const override {
1249       // Peek through bitcasts/extracts/inserts to see if we have a broadcast
1250       // vector from memory.
1251       while (Op.getOpcode() == ISD::BITCAST ||
1252              Op.getOpcode() == ISD::EXTRACT_SUBVECTOR ||
1253              (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
1254               Op.getOperand(0).isUndef()))
1255         Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0);
1256 
1257       return Op.getOpcode() == X86ISD::VBROADCAST_LOAD ||
1258              TargetLowering::isTargetCanonicalConstantNode(Op);
1259     }
1260 
1261     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1262 
1263     SDValue unwrapAddress(SDValue N) const override;
1264 
1265     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1266 
1267     bool ExpandInlineAsm(CallInst *CI) const override;
1268 
1269     ConstraintType getConstraintType(StringRef Constraint) const override;
1270 
1271     /// Examine constraint string and operand type and determine a weight value.
1272     /// The operand object must already have been set up with the operand type.
1273     ConstraintWeight
1274       getSingleConstraintMatchWeight(AsmOperandInfo &Info,
1275                                      const char *Constraint) const override;
1276 
1277     const char *LowerXConstraint(EVT ConstraintVT) const override;
1278 
1279     /// Lower the specified operand into the Ops vector. If it is invalid, don't
1280     /// add anything to Ops. If hasMemory is true it means one of the asm
1281     /// constraint of the inline asm instruction being processed is 'm'.
1282     void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1283                                       std::vector<SDValue> &Ops,
1284                                       SelectionDAG &DAG) const override;
1285 
1286     InlineAsm::ConstraintCode
getInlineAsmMemConstraint(StringRef ConstraintCode)1287     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1288       if (ConstraintCode == "v")
1289         return InlineAsm::ConstraintCode::v;
1290       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1291     }
1292 
1293     /// Handle Lowering flag assembly outputs.
1294     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1295                                         const SDLoc &DL,
1296                                         const AsmOperandInfo &Constraint,
1297                                         SelectionDAG &DAG) const override;
1298 
1299     /// Given a physical register constraint
1300     /// (e.g. {edx}), return the register number and the register class for the
1301     /// register.  This should only be used for C_Register constraints.  On
1302     /// error, this returns a register number of 0.
1303     std::pair<unsigned, const TargetRegisterClass *>
1304     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1305                                  StringRef Constraint, MVT VT) const override;
1306 
1307     /// Return true if the addressing mode represented
1308     /// by AM is legal for this target, for a load/store of the specified type.
1309     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1310                                Type *Ty, unsigned AS,
1311                                Instruction *I = nullptr) const override;
1312 
1313     /// Return true if the specified immediate is legal
1314     /// icmp immediate, that is the target has icmp instructions which can
1315     /// compare a register against the immediate without having to materialize
1316     /// the immediate into a register.
1317     bool isLegalICmpImmediate(int64_t Imm) const override;
1318 
1319     /// Return true if the specified immediate is legal
1320     /// add immediate, that is the target has add instructions which can
1321     /// add a register and the immediate without having to materialize
1322     /// the immediate into a register.
1323     bool isLegalAddImmediate(int64_t Imm) const override;
1324 
1325     bool isLegalStoreImmediate(int64_t Imm) const override;
1326 
1327     /// This is used to enable splatted operand transforms for vector shifts
1328     /// and vector funnel shifts.
1329     bool isVectorShiftByScalarCheap(Type *Ty) const override;
1330 
1331     /// Add x86-specific opcodes to the default list.
1332     bool isBinOp(unsigned Opcode) const override;
1333 
1334     /// Returns true if the opcode is a commutative binary operation.
1335     bool isCommutativeBinOp(unsigned Opcode) const override;
1336 
1337     /// Return true if it's free to truncate a value of
1338     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1339     /// register EAX to i16 by referencing its sub-register AX.
1340     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1341     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1342 
1343     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1344 
1345     /// Return true if any actual instruction that defines a
1346     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1347     /// register. This does not necessarily include registers defined in
1348     /// unknown ways, such as incoming arguments, or copies from unknown
1349     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1350     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1351     /// all instructions that define 32-bit values implicit zero-extend the
1352     /// result out to 64 bits.
1353     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1354     bool isZExtFree(EVT VT1, EVT VT2) const override;
1355     bool isZExtFree(SDValue Val, EVT VT2) const override;
1356 
1357     bool shouldSinkOperands(Instruction *I,
1358                             SmallVectorImpl<Use *> &Ops) const override;
1359     bool shouldConvertPhiType(Type *From, Type *To) const override;
1360 
1361     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1362     /// extend node) is profitable.
1363     bool isVectorLoadExtDesirable(SDValue) const override;
1364 
1365     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1366     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1367     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1368     bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1369                                     EVT VT) const override;
1370 
1371     /// Return true if it's profitable to narrow operations of type SrcVT to
1372     /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not
1373     /// from i32 to i16.
1374     bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const override;
1375 
1376     bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
1377                                               EVT VT) const override;
1378 
1379     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1380     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1381     /// true and stores the intrinsic information into the IntrinsicInfo that was
1382     /// passed to the function.
1383     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1384                             MachineFunction &MF,
1385                             unsigned Intrinsic) const override;
1386 
1387     /// Returns true if the target can instruction select the
1388     /// specified FP immediate natively. If false, the legalizer will
1389     /// materialize the FP immediate as a load from a constant pool.
1390     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1391                       bool ForCodeSize) const override;
1392 
1393     /// Targets can use this to indicate that they only support *some*
1394     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1395     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1396     /// be legal.
1397     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1398 
1399     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1400     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1401     /// constant pool entry.
1402     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1403 
1404     /// Returns true if lowering to a jump table is allowed.
1405     bool areJTsAllowed(const Function *Fn) const override;
1406 
1407     MVT getPreferredSwitchConditionType(LLVMContext &Context,
1408                                         EVT ConditionVT) const override;
1409 
1410     /// If true, then instruction selection should
1411     /// seek to shrink the FP constant of the specified type to a smaller type
1412     /// in order to save space and / or reduce runtime.
1413     bool ShouldShrinkFPConstant(EVT VT) const override;
1414 
1415     /// Return true if we believe it is correct and profitable to reduce the
1416     /// load node to a smaller type.
1417     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1418                                EVT NewVT) const override;
1419 
1420     /// Return true if the specified scalar FP type is computed in an SSE
1421     /// register, not on the X87 floating point stack.
1422     bool isScalarFPTypeInSSEReg(EVT VT) const;
1423 
1424     /// Returns true if it is beneficial to convert a load of a constant
1425     /// to just the constant itself.
1426     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1427                                            Type *Ty) const override;
1428 
1429     bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1430 
1431     bool convertSelectOfConstantsToMath(EVT VT) const override;
1432 
1433     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1434                                 SDValue C) const override;
1435 
1436     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1437     /// with this index.
1438     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1439                                  unsigned Index) const override;
1440 
1441     /// Scalar ops always have equal or better analysis/performance/power than
1442     /// the vector equivalent, so this always makes sense if the scalar op is
1443     /// supported.
shouldScalarizeBinop(SDValue)1444     bool shouldScalarizeBinop(SDValue) const override;
1445 
1446     /// Extract of a scalar FP value from index 0 of a vector is free.
1447     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1448       EVT EltVT = VT.getScalarType();
1449       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1450     }
1451 
1452     /// Overflow nodes should get combined/lowered to optimal instructions
1453     /// (they should allow eliminating explicit compares by getting flags from
1454     /// math ops).
1455     bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1456                               bool MathUsed) const override;
1457 
storeOfVectorConstantIsCheap(bool IsZero,EVT MemVT,unsigned NumElem,unsigned AddrSpace)1458     bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
1459                                       unsigned AddrSpace) const override {
1460       // If we can replace more than 2 scalar stores, there will be a reduction
1461       // in instructions even after we add a vector constant load.
1462       return IsZero || NumElem > 2;
1463     }
1464 
1465     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1466                                  const SelectionDAG &DAG,
1467                                  const MachineMemOperand &MMO) const override;
1468 
1469     /// Intel processors have a unified instruction and data cache
getClearCacheBuiltinName()1470     const char * getClearCacheBuiltinName() const override {
1471       return nullptr; // nothing to do, move along.
1472     }
1473 
1474     Register getRegisterByName(const char* RegName, LLT VT,
1475                                const MachineFunction &MF) const override;
1476 
1477     /// If a physical register, this returns the register that receives the
1478     /// exception address on entry to an EH pad.
1479     Register
1480     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1481 
1482     /// If a physical register, this returns the register that receives the
1483     /// exception typeid on entry to a landing pad.
1484     Register
1485     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1486 
1487     bool needsFixedCatchObjects() const override;
1488 
1489     /// This method returns a target specific FastISel object,
1490     /// or null if the target does not support "fast" ISel.
1491     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1492                              const TargetLibraryInfo *libInfo) const override;
1493 
1494     /// If the target has a standard location for the stack protector cookie,
1495     /// returns the address of that location. Otherwise, returns nullptr.
1496     Value *getIRStackGuard(IRBuilderBase &IRB) const override;
1497 
1498     bool useLoadStackGuardNode() const override;
1499     bool useStackGuardXorFP() const override;
1500     void insertSSPDeclarations(Module &M) const override;
1501     Value *getSDagStackGuard(const Module &M) const override;
1502     Function *getSSPStackGuardCheck(const Module &M) const override;
1503     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1504                                 const SDLoc &DL) const override;
1505 
1506 
1507     /// Return true if the target stores SafeStack pointer at a fixed offset in
1508     /// some non-standard address space, and populates the address space and
1509     /// offset as appropriate.
1510     Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
1511 
1512     std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1513                                           SDValue Chain, SDValue Pointer,
1514                                           MachinePointerInfo PtrInfo,
1515                                           Align Alignment,
1516                                           SelectionDAG &DAG) const;
1517 
1518     /// Customize the preferred legalization strategy for certain types.
1519     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1520 
softPromoteHalfType()1521     bool softPromoteHalfType() const override { return true; }
1522 
1523     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1524                                       EVT VT) const override;
1525 
1526     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1527                                            CallingConv::ID CC,
1528                                            EVT VT) const override;
1529 
1530     unsigned getVectorTypeBreakdownForCallingConv(
1531         LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1532         unsigned &NumIntermediates, MVT &RegisterVT) const override;
1533 
1534     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1535 
1536     bool supportSwiftError() const override;
1537 
supportKCFIBundles()1538     bool supportKCFIBundles() const override { return true; }
1539 
1540     MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
1541                                 MachineBasicBlock::instr_iterator &MBBI,
1542                                 const TargetInstrInfo *TII) const override;
1543 
1544     bool hasStackProbeSymbol(const MachineFunction &MF) const override;
1545     bool hasInlineStackProbe(const MachineFunction &MF) const override;
1546     StringRef getStackProbeSymbolName(const MachineFunction &MF) const override;
1547 
1548     unsigned getStackProbeSize(const MachineFunction &MF) const;
1549 
hasVectorBlend()1550     bool hasVectorBlend() const override { return true; }
1551 
getMaxSupportedInterleaveFactor()1552     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1553 
1554     bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs,
1555                                  unsigned OpNo) const override;
1556 
1557     /// Lower interleaved load(s) into target specific
1558     /// instructions/intrinsics.
1559     bool lowerInterleavedLoad(LoadInst *LI,
1560                               ArrayRef<ShuffleVectorInst *> Shuffles,
1561                               ArrayRef<unsigned> Indices,
1562                               unsigned Factor) const override;
1563 
1564     /// Lower interleaved store(s) into target specific
1565     /// instructions/intrinsics.
1566     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1567                                unsigned Factor) const override;
1568 
1569     SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
1570                                    int JTI, SelectionDAG &DAG) const override;
1571 
1572     Align getPrefLoopAlignment(MachineLoop *ML) const override;
1573 
getTypeToTransformTo(LLVMContext & Context,EVT VT)1574     EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override {
1575       if (VT == MVT::f80)
1576         return EVT::getIntegerVT(Context, 96);
1577       return TargetLoweringBase::getTypeToTransformTo(Context, VT);
1578     }
1579 
1580   protected:
1581     std::pair<const TargetRegisterClass *, uint8_t>
1582     findRepresentativeClass(const TargetRegisterInfo *TRI,
1583                             MVT VT) const override;
1584 
1585   private:
1586     /// Keep a reference to the X86Subtarget around so that we can
1587     /// make the right decision when generating code for different targets.
1588     const X86Subtarget &Subtarget;
1589 
1590     /// A list of legal FP immediates.
1591     std::vector<APFloat> LegalFPImmediates;
1592 
1593     /// Indicate that this x86 target can instruction
1594     /// select the specified FP immediate natively.
addLegalFPImmediate(const APFloat & Imm)1595     void addLegalFPImmediate(const APFloat& Imm) {
1596       LegalFPImmediates.push_back(Imm);
1597     }
1598 
1599     SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1600                             CallingConv::ID CallConv, bool isVarArg,
1601                             const SmallVectorImpl<ISD::InputArg> &Ins,
1602                             const SDLoc &dl, SelectionDAG &DAG,
1603                             SmallVectorImpl<SDValue> &InVals,
1604                             uint32_t *RegMask) const;
1605     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1606                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1607                              const SDLoc &dl, SelectionDAG &DAG,
1608                              const CCValAssign &VA, MachineFrameInfo &MFI,
1609                              unsigned i) const;
1610     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1611                              const SDLoc &dl, SelectionDAG &DAG,
1612                              const CCValAssign &VA,
1613                              ISD::ArgFlagsTy Flags, bool isByval) const;
1614 
1615     // Call lowering helpers.
1616 
1617     /// Check whether the call is eligible for tail call optimization. Targets
1618     /// that want to do tail call optimization should implement this function.
1619     bool IsEligibleForTailCallOptimization(
1620         SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleeStackStructRet,
1621         bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
1622         const SmallVectorImpl<SDValue> &OutVals,
1623         const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
1624     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1625                                     SDValue Chain, bool IsTailCall,
1626                                     bool Is64Bit, int FPDiff,
1627                                     const SDLoc &dl) const;
1628 
1629     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1630                                          SelectionDAG &DAG) const;
1631 
1632     unsigned getAddressSpace() const;
1633 
1634     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1635                             SDValue &Chain) const;
1636     SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1637 
1638     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1639     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1640     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1641     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1642 
1643     unsigned getGlobalWrapperKind(const GlobalValue *GV,
1644                                   const unsigned char OpFlags) const;
1645     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1646     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1647     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1648     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1649     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1650 
1651     /// Creates target global address or external symbol nodes for calls or
1652     /// other uses.
1653     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1654                                   bool ForCall) const;
1655 
1656     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1657     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1658     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1659     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1660     SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1661     SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1662     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1663     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1664     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1665     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1666     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1667     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1668     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1669     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1670     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1671     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1672     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1673     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1674     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1675     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1676     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1677     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1678     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1679     SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1680     SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1681     SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1682     SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1683     SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const;
1684     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1685     SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
1686                                     SDValue &Chain) const;
1687     SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1688     SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1689     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1690     SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1691     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1692     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1693     SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
1694 
1695     SDValue
1696     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1697                          const SmallVectorImpl<ISD::InputArg> &Ins,
1698                          const SDLoc &dl, SelectionDAG &DAG,
1699                          SmallVectorImpl<SDValue> &InVals) const override;
1700     SDValue LowerCall(CallLoweringInfo &CLI,
1701                       SmallVectorImpl<SDValue> &InVals) const override;
1702 
1703     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1704                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1705                         const SmallVectorImpl<SDValue> &OutVals,
1706                         const SDLoc &dl, SelectionDAG &DAG) const override;
1707 
supportSplitCSR(MachineFunction * MF)1708     bool supportSplitCSR(MachineFunction *MF) const override {
1709       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1710           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1711     }
1712     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1713     void insertCopiesSplitCSR(
1714       MachineBasicBlock *Entry,
1715       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1716 
1717     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1718 
1719     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1720 
1721     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1722                             ISD::NodeType ExtendKind) const override;
1723 
1724     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1725                         bool isVarArg,
1726                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1727                         LLVMContext &Context) const override;
1728 
1729     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1730     ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
1731 
1732     TargetLoweringBase::AtomicExpansionKind
1733     shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1734     TargetLoweringBase::AtomicExpansionKind
1735     shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1736     TargetLoweringBase::AtomicExpansionKind
1737     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1738     TargetLoweringBase::AtomicExpansionKind
1739     shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
1740     void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1741     void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1742 
1743     LoadInst *
1744     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1745 
1746     bool needsCmpXchgNb(Type *MemType) const;
1747 
1748     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1749                                 MachineBasicBlock *DispatchBB, int FI) const;
1750 
1751     // Utility function to emit the low-level va_arg code for X86-64.
1752     MachineBasicBlock *
1753     EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
1754 
1755     /// Utility function to emit the xmm reg save portion of va_start.
1756     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1757                                                  MachineInstr &MI2,
1758                                                  MachineBasicBlock *BB) const;
1759 
1760     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1761                                          MachineBasicBlock *BB) const;
1762 
1763     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1764                                            MachineBasicBlock *BB) const;
1765 
1766     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1767                                             MachineBasicBlock *BB) const;
1768 
1769     MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1770                                                MachineBasicBlock *BB) const;
1771 
1772     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1773                                           MachineBasicBlock *BB) const;
1774 
1775     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1776                                           MachineBasicBlock *BB) const;
1777 
1778     MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1779                                                 MachineBasicBlock *BB) const;
1780 
1781     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1782                                         MachineBasicBlock *MBB) const;
1783 
1784     void emitSetJmpShadowStackFix(MachineInstr &MI,
1785                                   MachineBasicBlock *MBB) const;
1786 
1787     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1788                                          MachineBasicBlock *MBB) const;
1789 
1790     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1791                                                  MachineBasicBlock *MBB) const;
1792 
1793     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1794                                              MachineBasicBlock *MBB) const;
1795 
1796     /// Emit flags for the given setcc condition and operands. Also returns the
1797     /// corresponding X86 condition code constant in X86CC.
1798     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1799                               const SDLoc &dl, SelectionDAG &DAG,
1800                               SDValue &X86CC) const;
1801 
1802     bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst,
1803                                              SDValue IntPow2) const override;
1804 
1805     /// Check if replacement of SQRT with RSQRT should be disabled.
1806     bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1807 
1808     /// Use rsqrt* to speed up sqrt calculations.
1809     SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1810                             int &RefinementSteps, bool &UseOneConstNR,
1811                             bool Reciprocal) const override;
1812 
1813     /// Use rcp* to speed up fdiv calculations.
1814     SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1815                              int &RefinementSteps) const override;
1816 
1817     /// Reassociate floating point divisions into multiply by reciprocal.
1818     unsigned combineRepeatedFPDivisors() const override;
1819 
1820     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1821                           SmallVectorImpl<SDNode *> &Created) const override;
1822 
1823     SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
1824                     SDValue V2) const;
1825   };
1826 
1827   namespace X86 {
1828     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1829                              const TargetLibraryInfo *libInfo);
1830   } // end namespace X86
1831 
1832   // X86 specific Gather/Scatter nodes.
1833   // The class has the same order of operands as MaskedGatherScatterSDNode for
1834   // convenience.
1835   class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1836   public:
1837     // This is a intended as a utility and should never be directly created.
1838     X86MaskedGatherScatterSDNode() = delete;
1839     ~X86MaskedGatherScatterSDNode() = delete;
1840 
getBasePtr()1841     const SDValue &getBasePtr() const { return getOperand(3); }
getIndex()1842     const SDValue &getIndex()   const { return getOperand(4); }
getMask()1843     const SDValue &getMask()    const { return getOperand(2); }
getScale()1844     const SDValue &getScale()   const { return getOperand(5); }
1845 
classof(const SDNode * N)1846     static bool classof(const SDNode *N) {
1847       return N->getOpcode() == X86ISD::MGATHER ||
1848              N->getOpcode() == X86ISD::MSCATTER;
1849     }
1850   };
1851 
1852   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1853   public:
getPassThru()1854     const SDValue &getPassThru() const { return getOperand(1); }
1855 
classof(const SDNode * N)1856     static bool classof(const SDNode *N) {
1857       return N->getOpcode() == X86ISD::MGATHER;
1858     }
1859   };
1860 
1861   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1862   public:
getValue()1863     const SDValue &getValue() const { return getOperand(1); }
1864 
classof(const SDNode * N)1865     static bool classof(const SDNode *N) {
1866       return N->getOpcode() == X86ISD::MSCATTER;
1867     }
1868   };
1869 
1870   /// Generate unpacklo/unpackhi shuffle mask.
1871   void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1872                                bool Unary);
1873 
1874   /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1875   /// imposed by AVX and specific to the unary pattern. Example:
1876   /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1877   /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1878   void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1879 
1880 } // end namespace llvm
1881 
1882 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1883