1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that X86 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16 
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/TargetLowering.h"
19 
20 namespace llvm {
21   class X86Subtarget;
22   class X86TargetMachine;
23 
24   namespace X86ISD {
25     // X86 Specific DAG Nodes
26   enum NodeType : unsigned {
27     // Start the numbering where the builtin ops leave off.
28     FIRST_NUMBER = ISD::BUILTIN_OP_END,
29 
30     /// Bit scan forward.
31     BSF,
32     /// Bit scan reverse.
33     BSR,
34 
35     /// X86 funnel/double shift i16 instructions. These correspond to
36     /// X86::SHLDW and X86::SHRDW instructions which have different amt
37     /// modulo rules to generic funnel shifts.
38     /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
39     FSHL,
40     FSHR,
41 
42     /// Bitwise logical AND of floating point values. This corresponds
43     /// to X86::ANDPS or X86::ANDPD.
44     FAND,
45 
46     /// Bitwise logical OR of floating point values. This corresponds
47     /// to X86::ORPS or X86::ORPD.
48     FOR,
49 
50     /// Bitwise logical XOR of floating point values. This corresponds
51     /// to X86::XORPS or X86::XORPD.
52     FXOR,
53 
54     ///  Bitwise logical ANDNOT of floating point values. This
55     /// corresponds to X86::ANDNPS or X86::ANDNPD.
56     FANDN,
57 
58     /// These operations represent an abstract X86 call
59     /// instruction, which includes a bunch of information.  In particular the
60     /// operands of these node are:
61     ///
62     ///     #0 - The incoming token chain
63     ///     #1 - The callee
64     ///     #2 - The number of arg bytes the caller pushes on the stack.
65     ///     #3 - The number of arg bytes the callee pops off the stack.
66     ///     #4 - The value to pass in AL/AX/EAX (optional)
67     ///     #5 - The value to pass in DL/DX/EDX (optional)
68     ///
69     /// The result values of these nodes are:
70     ///
71     ///     #0 - The outgoing token chain
72     ///     #1 - The first register result value (optional)
73     ///     #2 - The second register result value (optional)
74     ///
75     CALL,
76 
77     /// Same as call except it adds the NoTrack prefix.
78     NT_CALL,
79 
80     // Pseudo for a OBJC call that gets emitted together with a special
81     // marker instruction.
82     CALL_RVMARKER,
83 
84     /// X86 compare and logical compare instructions.
85     CMP,
86     FCMP,
87     COMI,
88     UCOMI,
89 
90     /// X86 bit-test instructions.
91     BT,
92 
93     /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
94     /// operand, usually produced by a CMP instruction.
95     SETCC,
96 
97     /// X86 Select
98     SELECTS,
99 
100     // Same as SETCC except it's materialized with a sbb and the value is all
101     // one's or all zero's.
102     SETCC_CARRY, // R = carry_bit ? ~0 : 0
103 
104     /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
105     /// Operands are two FP values to compare; result is a mask of
106     /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
107     FSETCC,
108 
109     /// X86 FP SETCC, similar to above, but with output as an i1 mask and
110     /// and a version with SAE.
111     FSETCCM,
112     FSETCCM_SAE,
113 
114     /// X86 conditional moves. Operand 0 and operand 1 are the two values
115     /// to select from. Operand 2 is the condition code, and operand 3 is the
116     /// flag operand produced by a CMP or TEST instruction.
117     CMOV,
118 
119     /// X86 conditional branches. Operand 0 is the chain operand, operand 1
120     /// is the block to branch if condition is true, operand 2 is the
121     /// condition code, and operand 3 is the flag operand produced by a CMP
122     /// or TEST instruction.
123     BRCOND,
124 
125     /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
126     /// operand 1 is the target address.
127     NT_BRIND,
128 
129     /// Return with a flag operand. Operand 0 is the chain operand, operand
130     /// 1 is the number of bytes of stack to pop.
131     RET_FLAG,
132 
133     /// Return from interrupt. Operand 0 is the number of bytes to pop.
134     IRET,
135 
136     /// Repeat fill, corresponds to X86::REP_STOSx.
137     REP_STOS,
138 
139     /// Repeat move, corresponds to X86::REP_MOVSx.
140     REP_MOVS,
141 
142     /// On Darwin, this node represents the result of the popl
143     /// at function entry, used for PIC code.
144     GlobalBaseReg,
145 
146     /// A wrapper node for TargetConstantPool, TargetJumpTable,
147     /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
148     /// MCSymbol and TargetBlockAddress.
149     Wrapper,
150 
151     /// Special wrapper used under X86-64 PIC mode for RIP
152     /// relative displacements.
153     WrapperRIP,
154 
155     /// Copies a 64-bit value from an MMX vector to the low word
156     /// of an XMM vector, with the high word zero filled.
157     MOVQ2DQ,
158 
159     /// Copies a 64-bit value from the low word of an XMM vector
160     /// to an MMX vector.
161     MOVDQ2Q,
162 
163     /// Copies a 32-bit value from the low word of a MMX
164     /// vector to a GPR.
165     MMX_MOVD2W,
166 
167     /// Copies a GPR into the low 32-bit word of a MMX vector
168     /// and zero out the high word.
169     MMX_MOVW2D,
170 
171     /// Extract an 8-bit value from a vector and zero extend it to
172     /// i32, corresponds to X86::PEXTRB.
173     PEXTRB,
174 
175     /// Extract a 16-bit value from a vector and zero extend it to
176     /// i32, corresponds to X86::PEXTRW.
177     PEXTRW,
178 
179     /// Insert any element of a 4 x float vector into any element
180     /// of a destination 4 x floatvector.
181     INSERTPS,
182 
183     /// Insert the lower 8-bits of a 32-bit value to a vector,
184     /// corresponds to X86::PINSRB.
185     PINSRB,
186 
187     /// Insert the lower 16-bits of a 32-bit value to a vector,
188     /// corresponds to X86::PINSRW.
189     PINSRW,
190 
191     /// Shuffle 16 8-bit values within a vector.
192     PSHUFB,
193 
194     /// Compute Sum of Absolute Differences.
195     PSADBW,
196     /// Compute Double Block Packed Sum-Absolute-Differences
197     DBPSADBW,
198 
199     /// Bitwise Logical AND NOT of Packed FP values.
200     ANDNP,
201 
202     /// Blend where the selector is an immediate.
203     BLENDI,
204 
205     /// Dynamic (non-constant condition) vector blend where only the sign bits
206     /// of the condition elements are used. This is used to enforce that the
207     /// condition mask is not valid for generic VSELECT optimizations. This
208     /// is also used to implement the intrinsics.
209     /// Operands are in VSELECT order: MASK, TRUE, FALSE
210     BLENDV,
211 
212     /// Combined add and sub on an FP vector.
213     ADDSUB,
214 
215     //  FP vector ops with rounding mode.
216     FADD_RND,
217     FADDS,
218     FADDS_RND,
219     FSUB_RND,
220     FSUBS,
221     FSUBS_RND,
222     FMUL_RND,
223     FMULS,
224     FMULS_RND,
225     FDIV_RND,
226     FDIVS,
227     FDIVS_RND,
228     FMAX_SAE,
229     FMAXS_SAE,
230     FMIN_SAE,
231     FMINS_SAE,
232     FSQRT_RND,
233     FSQRTS,
234     FSQRTS_RND,
235 
236     // FP vector get exponent.
237     FGETEXP,
238     FGETEXP_SAE,
239     FGETEXPS,
240     FGETEXPS_SAE,
241     // Extract Normalized Mantissas.
242     VGETMANT,
243     VGETMANT_SAE,
244     VGETMANTS,
245     VGETMANTS_SAE,
246     // FP Scale.
247     SCALEF,
248     SCALEF_RND,
249     SCALEFS,
250     SCALEFS_RND,
251 
252     /// Integer horizontal add/sub.
253     HADD,
254     HSUB,
255 
256     /// Floating point horizontal add/sub.
257     FHADD,
258     FHSUB,
259 
260     // Detect Conflicts Within a Vector
261     CONFLICT,
262 
263     /// Floating point max and min.
264     FMAX,
265     FMIN,
266 
267     /// Commutative FMIN and FMAX.
268     FMAXC,
269     FMINC,
270 
271     /// Scalar intrinsic floating point max and min.
272     FMAXS,
273     FMINS,
274 
275     /// Floating point reciprocal-sqrt and reciprocal approximation.
276     /// Note that these typically require refinement
277     /// in order to obtain suitable precision.
278     FRSQRT,
279     FRCP,
280 
281     // AVX-512 reciprocal approximations with a little more precision.
282     RSQRT14,
283     RSQRT14S,
284     RCP14,
285     RCP14S,
286 
287     // Thread Local Storage.
288     TLSADDR,
289 
290     // Thread Local Storage. A call to get the start address
291     // of the TLS block for the current module.
292     TLSBASEADDR,
293 
294     // Thread Local Storage.  When calling to an OS provided
295     // thunk at the address from an earlier relocation.
296     TLSCALL,
297 
298     // Exception Handling helpers.
299     EH_RETURN,
300 
301     // SjLj exception handling setjmp.
302     EH_SJLJ_SETJMP,
303 
304     // SjLj exception handling longjmp.
305     EH_SJLJ_LONGJMP,
306 
307     // SjLj exception handling dispatch.
308     EH_SJLJ_SETUP_DISPATCH,
309 
310     /// Tail call return. See X86TargetLowering::LowerCall for
311     /// the list of operands.
312     TC_RETURN,
313 
314     // Vector move to low scalar and zero higher vector elements.
315     VZEXT_MOVL,
316 
317     // Vector integer truncate.
318     VTRUNC,
319     // Vector integer truncate with unsigned/signed saturation.
320     VTRUNCUS,
321     VTRUNCS,
322 
323     // Masked version of the above. Used when less than a 128-bit result is
324     // produced since the mask only applies to the lower elements and can't
325     // be represented by a select.
326     // SRC, PASSTHRU, MASK
327     VMTRUNC,
328     VMTRUNCUS,
329     VMTRUNCS,
330 
331     // Vector FP extend.
332     VFPEXT,
333     VFPEXT_SAE,
334     VFPEXTS,
335     VFPEXTS_SAE,
336 
337     // Vector FP round.
338     VFPROUND,
339     VFPROUND_RND,
340     VFPROUNDS,
341     VFPROUNDS_RND,
342 
343     // Masked version of above. Used for v2f64->v4f32.
344     // SRC, PASSTHRU, MASK
345     VMFPROUND,
346 
347     // 128-bit vector logical left / right shift
348     VSHLDQ,
349     VSRLDQ,
350 
351     // Vector shift elements
352     VSHL,
353     VSRL,
354     VSRA,
355 
356     // Vector variable shift
357     VSHLV,
358     VSRLV,
359     VSRAV,
360 
361     // Vector shift elements by immediate
362     VSHLI,
363     VSRLI,
364     VSRAI,
365 
366     // Shifts of mask registers.
367     KSHIFTL,
368     KSHIFTR,
369 
370     // Bit rotate by immediate
371     VROTLI,
372     VROTRI,
373 
374     // Vector packed double/float comparison.
375     CMPP,
376 
377     // Vector integer comparisons.
378     PCMPEQ,
379     PCMPGT,
380 
381     // v8i16 Horizontal minimum and position.
382     PHMINPOS,
383 
384     MULTISHIFT,
385 
386     /// Vector comparison generating mask bits for fp and
387     /// integer signed and unsigned data types.
388     CMPM,
389     // Vector mask comparison generating mask bits for FP values.
390     CMPMM,
391     // Vector mask comparison with SAE for FP values.
392     CMPMM_SAE,
393 
394     // Arithmetic operations with FLAGS results.
395     ADD,
396     SUB,
397     ADC,
398     SBB,
399     SMUL,
400     UMUL,
401     OR,
402     XOR,
403     AND,
404 
405     // Bit field extract.
406     BEXTR,
407     BEXTRI,
408 
409     // Zero High Bits Starting with Specified Bit Position.
410     BZHI,
411 
412     // Parallel extract and deposit.
413     PDEP,
414     PEXT,
415 
416     // X86-specific multiply by immediate.
417     MUL_IMM,
418 
419     // Vector sign bit extraction.
420     MOVMSK,
421 
422     // Vector bitwise comparisons.
423     PTEST,
424 
425     // Vector packed fp sign bitwise comparisons.
426     TESTP,
427 
428     // OR/AND test for masks.
429     KORTEST,
430     KTEST,
431 
432     // ADD for masks.
433     KADD,
434 
435     // Several flavors of instructions with vector shuffle behaviors.
436     // Saturated signed/unnsigned packing.
437     PACKSS,
438     PACKUS,
439     // Intra-lane alignr.
440     PALIGNR,
441     // AVX512 inter-lane alignr.
442     VALIGN,
443     PSHUFD,
444     PSHUFHW,
445     PSHUFLW,
446     SHUFP,
447     // VBMI2 Concat & Shift.
448     VSHLD,
449     VSHRD,
450     VSHLDV,
451     VSHRDV,
452     // Shuffle Packed Values at 128-bit granularity.
453     SHUF128,
454     MOVDDUP,
455     MOVSHDUP,
456     MOVSLDUP,
457     MOVLHPS,
458     MOVHLPS,
459     MOVSD,
460     MOVSS,
461     MOVSH,
462     UNPCKL,
463     UNPCKH,
464     VPERMILPV,
465     VPERMILPI,
466     VPERMI,
467     VPERM2X128,
468 
469     // Variable Permute (VPERM).
470     // Res = VPERMV MaskV, V0
471     VPERMV,
472 
473     // 3-op Variable Permute (VPERMT2).
474     // Res = VPERMV3 V0, MaskV, V1
475     VPERMV3,
476 
477     // Bitwise ternary logic.
478     VPTERNLOG,
479     // Fix Up Special Packed Float32/64 values.
480     VFIXUPIMM,
481     VFIXUPIMM_SAE,
482     VFIXUPIMMS,
483     VFIXUPIMMS_SAE,
484     // Range Restriction Calculation For Packed Pairs of Float32/64 values.
485     VRANGE,
486     VRANGE_SAE,
487     VRANGES,
488     VRANGES_SAE,
489     // Reduce - Perform Reduction Transformation on scalar\packed FP.
490     VREDUCE,
491     VREDUCE_SAE,
492     VREDUCES,
493     VREDUCES_SAE,
494     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
495     // Also used by the legacy (V)ROUND intrinsics where we mask out the
496     // scaling part of the immediate.
497     VRNDSCALE,
498     VRNDSCALE_SAE,
499     VRNDSCALES,
500     VRNDSCALES_SAE,
501     // Tests Types Of a FP Values for packed types.
502     VFPCLASS,
503     // Tests Types Of a FP Values for scalar types.
504     VFPCLASSS,
505 
506     // Broadcast (splat) scalar or element 0 of a vector. If the operand is
507     // a vector, this node may change the vector length as part of the splat.
508     VBROADCAST,
509     // Broadcast mask to vector.
510     VBROADCASTM,
511 
512     /// SSE4A Extraction and Insertion.
513     EXTRQI,
514     INSERTQI,
515 
516     // XOP arithmetic/logical shifts.
517     VPSHA,
518     VPSHL,
519     // XOP signed/unsigned integer comparisons.
520     VPCOM,
521     VPCOMU,
522     // XOP packed permute bytes.
523     VPPERM,
524     // XOP two source permutation.
525     VPERMIL2,
526 
527     // Vector multiply packed unsigned doubleword integers.
528     PMULUDQ,
529     // Vector multiply packed signed doubleword integers.
530     PMULDQ,
531     // Vector Multiply Packed UnsignedIntegers with Round and Scale.
532     MULHRS,
533 
534     // Multiply and Add Packed Integers.
535     VPMADDUBSW,
536     VPMADDWD,
537 
538     // AVX512IFMA multiply and add.
539     // NOTE: These are different than the instruction and perform
540     // op0 x op1 + op2.
541     VPMADD52L,
542     VPMADD52H,
543 
544     // VNNI
545     VPDPBUSD,
546     VPDPBUSDS,
547     VPDPWSSD,
548     VPDPWSSDS,
549 
550     // FMA nodes.
551     // We use the target independent ISD::FMA for the non-inverted case.
552     FNMADD,
553     FMSUB,
554     FNMSUB,
555     FMADDSUB,
556     FMSUBADD,
557 
558     // FMA with rounding mode.
559     FMADD_RND,
560     FNMADD_RND,
561     FMSUB_RND,
562     FNMSUB_RND,
563     FMADDSUB_RND,
564     FMSUBADD_RND,
565 
566     // AVX512-FP16 complex addition and multiplication.
567     VFMADDC,
568     VFMADDC_RND,
569     VFCMADDC,
570     VFCMADDC_RND,
571 
572     VFMULC,
573     VFMULC_RND,
574     VFCMULC,
575     VFCMULC_RND,
576 
577     VFMADDCSH,
578     VFMADDCSH_RND,
579     VFCMADDCSH,
580     VFCMADDCSH_RND,
581 
582     VFMULCSH,
583     VFMULCSH_RND,
584     VFCMULCSH,
585     VFCMULCSH_RND,
586 
587     VPDPBSUD,
588     VPDPBSUDS,
589     VPDPBUUD,
590     VPDPBUUDS,
591     VPDPBSSD,
592     VPDPBSSDS,
593 
594     // Compress and expand.
595     COMPRESS,
596     EXPAND,
597 
598     // Bits shuffle
599     VPSHUFBITQMB,
600 
601     // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
602     SINT_TO_FP_RND,
603     UINT_TO_FP_RND,
604     SCALAR_SINT_TO_FP,
605     SCALAR_UINT_TO_FP,
606     SCALAR_SINT_TO_FP_RND,
607     SCALAR_UINT_TO_FP_RND,
608 
609     // Vector float/double to signed/unsigned integer.
610     CVTP2SI,
611     CVTP2UI,
612     CVTP2SI_RND,
613     CVTP2UI_RND,
614     // Scalar float/double to signed/unsigned integer.
615     CVTS2SI,
616     CVTS2UI,
617     CVTS2SI_RND,
618     CVTS2UI_RND,
619 
620     // Vector float/double to signed/unsigned integer with truncation.
621     CVTTP2SI,
622     CVTTP2UI,
623     CVTTP2SI_SAE,
624     CVTTP2UI_SAE,
625     // Scalar float/double to signed/unsigned integer with truncation.
626     CVTTS2SI,
627     CVTTS2UI,
628     CVTTS2SI_SAE,
629     CVTTS2UI_SAE,
630 
631     // Vector signed/unsigned integer to float/double.
632     CVTSI2P,
633     CVTUI2P,
634 
635     // Masked versions of above. Used for v2f64->v4f32.
636     // SRC, PASSTHRU, MASK
637     MCVTP2SI,
638     MCVTP2UI,
639     MCVTTP2SI,
640     MCVTTP2UI,
641     MCVTSI2P,
642     MCVTUI2P,
643 
644     // Vector float to bfloat16.
645     // Convert TWO packed single data to one packed BF16 data
646     CVTNE2PS2BF16,
647     // Convert packed single data to packed BF16 data
648     CVTNEPS2BF16,
649     // Masked version of above.
650     // SRC, PASSTHRU, MASK
651     MCVTNEPS2BF16,
652 
653     // Dot product of BF16 pairs to accumulated into
654     // packed single precision.
655     DPBF16PS,
656 
657     // A stack checking function call. On Windows it's _chkstk call.
658     DYN_ALLOCA,
659 
660     // For allocating variable amounts of stack space when using
661     // segmented stacks. Check if the current stacklet has enough space, and
662     // falls back to heap allocation if not.
663     SEG_ALLOCA,
664 
665     // For allocating stack space when using stack clash protector.
666     // Allocation is performed by block, and each block is probed.
667     PROBED_ALLOCA,
668 
669     // Memory barriers.
670     MFENCE,
671 
672     // Get a random integer and indicate whether it is valid in CF.
673     RDRAND,
674 
675     // Get a NIST SP800-90B & C compliant random integer and
676     // indicate whether it is valid in CF.
677     RDSEED,
678 
679     // Protection keys
680     // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
681     // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
682     // value for ECX.
683     RDPKRU,
684     WRPKRU,
685 
686     // SSE42 string comparisons.
687     // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
688     // will emit one or two instructions based on which results are used. If
689     // flags and index/mask this allows us to use a single instruction since
690     // we won't have to pick and opcode for flags. Instead we can rely on the
691     // DAG to CSE everything and decide at isel.
692     PCMPISTR,
693     PCMPESTR,
694 
695     // Test if in transactional execution.
696     XTEST,
697 
698     // ERI instructions.
699     RSQRT28,
700     RSQRT28_SAE,
701     RSQRT28S,
702     RSQRT28S_SAE,
703     RCP28,
704     RCP28_SAE,
705     RCP28S,
706     RCP28S_SAE,
707     EXP2,
708     EXP2_SAE,
709 
710     // Conversions between float and half-float.
711     CVTPS2PH,
712     CVTPS2PH_SAE,
713     CVTPH2PS,
714     CVTPH2PS_SAE,
715 
716     // Masked version of above.
717     // SRC, RND, PASSTHRU, MASK
718     MCVTPS2PH,
719     MCVTPS2PH_SAE,
720 
721     // Galois Field Arithmetic Instructions
722     GF2P8AFFINEINVQB,
723     GF2P8AFFINEQB,
724     GF2P8MULB,
725 
726     // LWP insert record.
727     LWPINS,
728 
729     // User level wait
730     UMWAIT,
731     TPAUSE,
732 
733     // Enqueue Stores Instructions
734     ENQCMD,
735     ENQCMDS,
736 
737     // For avx512-vp2intersect
738     VP2INTERSECT,
739 
740     // User level interrupts - testui
741     TESTUI,
742 
743     // Perform an FP80 add after changing precision control in FPCW.
744     FP80_ADD,
745 
746     /// X86 strict FP compare instructions.
747     STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
748     STRICT_FCMPS,
749 
750     // Vector packed double/float comparison.
751     STRICT_CMPP,
752 
753     /// Vector comparison generating mask bits for fp and
754     /// integer signed and unsigned data types.
755     STRICT_CMPM,
756 
757     // Vector float/double to signed/unsigned integer with truncation.
758     STRICT_CVTTP2SI,
759     STRICT_CVTTP2UI,
760 
761     // Vector FP extend.
762     STRICT_VFPEXT,
763 
764     // Vector FP round.
765     STRICT_VFPROUND,
766 
767     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
768     // Also used by the legacy (V)ROUND intrinsics where we mask out the
769     // scaling part of the immediate.
770     STRICT_VRNDSCALE,
771 
772     // Vector signed/unsigned integer to float/double.
773     STRICT_CVTSI2P,
774     STRICT_CVTUI2P,
775 
776     // Strict FMA nodes.
777     STRICT_FNMADD,
778     STRICT_FMSUB,
779     STRICT_FNMSUB,
780 
781     // Conversions between float and half-float.
782     STRICT_CVTPS2PH,
783     STRICT_CVTPH2PS,
784 
785     // Perform an FP80 add after changing precision control in FPCW.
786     STRICT_FP80_ADD,
787 
788     // WARNING: Only add nodes here if they are strict FP nodes. Non-memory and
789     // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
790 
791     // Compare and swap.
792     LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
793     LCMPXCHG8_DAG,
794     LCMPXCHG16_DAG,
795     LCMPXCHG16_SAVE_RBX_DAG,
796 
797     /// LOCK-prefixed arithmetic read-modify-write instructions.
798     /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
799     LADD,
800     LSUB,
801     LOR,
802     LXOR,
803     LAND,
804     LBTS,
805     LBTC,
806     LBTR,
807     LBTS_RM,
808     LBTC_RM,
809     LBTR_RM,
810 
811     /// RAO arithmetic instructions.
812     /// OUTCHAIN = AADD(INCHAIN, PTR, RHS)
813     AADD,
814     AOR,
815     AXOR,
816     AAND,
817 
818     // Load, scalar_to_vector, and zero extend.
819     VZEXT_LOAD,
820 
821     // extract_vector_elt, store.
822     VEXTRACT_STORE,
823 
824     // scalar broadcast from memory.
825     VBROADCAST_LOAD,
826 
827     // subvector broadcast from memory.
828     SUBV_BROADCAST_LOAD,
829 
830     // Store FP control word into i16 memory.
831     FNSTCW16m,
832 
833     // Load FP control word from i16 memory.
834     FLDCW16m,
835 
836     /// This instruction implements FP_TO_SINT with the
837     /// integer destination in memory and a FP reg source.  This corresponds
838     /// to the X86::FIST*m instructions and the rounding mode change stuff. It
839     /// has two inputs (token chain and address) and two outputs (int value
840     /// and token chain). Memory VT specifies the type to store to.
841     FP_TO_INT_IN_MEM,
842 
843     /// This instruction implements SINT_TO_FP with the
844     /// integer source in memory and FP reg result.  This corresponds to the
845     /// X86::FILD*m instructions. It has two inputs (token chain and address)
846     /// and two outputs (FP value and token chain). The integer source type is
847     /// specified by the memory VT.
848     FILD,
849 
850     /// This instruction implements a fp->int store from FP stack
851     /// slots. This corresponds to the fist instruction. It takes a
852     /// chain operand, value to store, address, and glue. The memory VT
853     /// specifies the type to store as.
854     FIST,
855 
856     /// This instruction implements an extending load to FP stack slots.
857     /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
858     /// operand, and ptr to load from. The memory VT specifies the type to
859     /// load from.
860     FLD,
861 
862     /// This instruction implements a truncating store from FP stack
863     /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
864     /// chain operand, value to store, address, and glue. The memory VT
865     /// specifies the type to store as.
866     FST,
867 
868     /// These instructions grab the address of the next argument
869     /// from a va_list. (reads and modifies the va_list in memory)
870     VAARG_64,
871     VAARG_X32,
872 
873     // Vector truncating store with unsigned/signed saturation
874     VTRUNCSTOREUS,
875     VTRUNCSTORES,
876     // Vector truncating masked store with unsigned/signed saturation
877     VMTRUNCSTOREUS,
878     VMTRUNCSTORES,
879 
880     // X86 specific gather and scatter
881     MGATHER,
882     MSCATTER,
883 
884     // Key locker nodes that produce flags.
885     AESENC128KL,
886     AESDEC128KL,
887     AESENC256KL,
888     AESDEC256KL,
889     AESENCWIDE128KL,
890     AESDECWIDE128KL,
891     AESENCWIDE256KL,
892     AESDECWIDE256KL,
893 
894     /// Compare and Add if Condition is Met. Compare value in operand 2 with
895     /// value in memory of operand 1. If condition of operand 4 is met, add value
896     /// operand 3 to m32 and write new value in operand 1. Operand 2 is
897     /// always updated with the original value from operand 1.
898     CMPCCXADD,
899 
900     // Save xmm argument registers to the stack, according to %al. An operator
901     // is needed so that this can be expanded with control flow.
902     VASTART_SAVE_XMM_REGS,
903 
904     // WARNING: Do not add anything in the end unless you want the node to
905     // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
906     // opcodes will be thought as target memory ops!
907   };
908   } // end namespace X86ISD
909 
910   namespace X86 {
911     /// Current rounding mode is represented in bits 11:10 of FPSR. These
912     /// values are same as corresponding constants for rounding mode used
913     /// in glibc.
914     enum RoundingMode {
915       rmToNearest   = 0,        // FE_TONEAREST
916       rmDownward    = 1 << 10,  // FE_DOWNWARD
917       rmUpward      = 2 << 10,  // FE_UPWARD
918       rmTowardZero  = 3 << 10,  // FE_TOWARDZERO
919       rmMask        = 3 << 10   // Bit mask selecting rounding mode
920     };
921   }
922 
923   /// Define some predicates that are used for node matching.
924   namespace X86 {
925     /// Returns true if Elt is a constant zero or floating point constant +0.0.
926     bool isZeroNode(SDValue Elt);
927 
928     /// Returns true of the given offset can be
929     /// fit into displacement field of the instruction.
930     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
931                                       bool hasSymbolicDisplacement);
932 
933     /// Determines whether the callee is required to pop its
934     /// own arguments. Callee pop is necessary to support tail calls.
935     bool isCalleePop(CallingConv::ID CallingConv,
936                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
937 
938     /// If Op is a constant whose elements are all the same constant or
939     /// undefined, return true and return the constant value in \p SplatVal.
940     /// If we have undef bits that don't cover an entire element, we treat these
941     /// as zero if AllowPartialUndefs is set, else we fail and return false.
942     bool isConstantSplat(SDValue Op, APInt &SplatVal,
943                          bool AllowPartialUndefs = true);
944 
945     /// Check if Op is a load operation that could be folded into some other x86
946     /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
947     bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
948                      bool AssumeSingleUse = false);
949 
950     /// Check if Op is a load operation that could be folded into a vector splat
951     /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
952     bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
953                                          const X86Subtarget &Subtarget,
954                                          bool AssumeSingleUse = false);
955 
956     /// Check if Op is a value that could be used to fold a store into some
957     /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
958     bool mayFoldIntoStore(SDValue Op);
959 
960     /// Check if Op is an operation that could be folded into a zero extend x86
961     /// instruction.
962     bool mayFoldIntoZeroExtend(SDValue Op);
963   } // end namespace X86
964 
965   //===--------------------------------------------------------------------===//
966   //  X86 Implementation of the TargetLowering interface
967   class X86TargetLowering final : public TargetLowering {
968   public:
969     explicit X86TargetLowering(const X86TargetMachine &TM,
970                                const X86Subtarget &STI);
971 
972     unsigned getJumpTableEncoding() const override;
973     bool useSoftFloat() const override;
974 
975     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
976                                ArgListTy &Args) const override;
977 
getScalarShiftAmountTy(const DataLayout &,EVT VT)978     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
979       return MVT::i8;
980     }
981 
982     const MCExpr *
983     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
984                               const MachineBasicBlock *MBB, unsigned uid,
985                               MCContext &Ctx) const override;
986 
987     /// Returns relocation base for the given PIC jumptable.
988     SDValue getPICJumpTableRelocBase(SDValue Table,
989                                      SelectionDAG &DAG) const override;
990     const MCExpr *
991     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
992                                  unsigned JTI, MCContext &Ctx) const override;
993 
994     /// Return the desired alignment for ByVal aggregate
995     /// function arguments in the caller parameter area. For X86, aggregates
996     /// that contains are placed at 16-byte boundaries while the rest are at
997     /// 4-byte boundaries.
998     uint64_t getByValTypeAlignment(Type *Ty,
999                                    const DataLayout &DL) const override;
1000 
1001     EVT getOptimalMemOpType(const MemOp &Op,
1002                             const AttributeList &FuncAttributes) const override;
1003 
1004     /// Returns true if it's safe to use load / store of the
1005     /// specified type to expand memcpy / memset inline. This is mostly true
1006     /// for all types except for some special cases. For example, on X86
1007     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
1008     /// also does type conversion. Note the specified type doesn't have to be
1009     /// legal as the hook is used before type legalization.
1010     bool isSafeMemOpType(MVT VT) const override;
1011 
1012     bool isMemoryAccessFast(EVT VT, Align Alignment) const;
1013 
1014     /// Returns true if the target allows unaligned memory accesses of the
1015     /// specified type. Returns whether it is "fast" in the last argument.
1016     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
1017                                         MachineMemOperand::Flags Flags,
1018                                         unsigned *Fast) const override;
1019 
1020     /// This function returns true if the memory access is aligned or if the
1021     /// target allows this specific unaligned memory access. If the access is
1022     /// allowed, the optional final parameter returns a relative speed of the
1023     /// access (as defined by the target).
1024     bool allowsMemoryAccess(
1025         LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
1026         Align Alignment,
1027         MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1028         unsigned *Fast = nullptr) const override;
1029 
allowsMemoryAccess(LLVMContext & Context,const DataLayout & DL,EVT VT,const MachineMemOperand & MMO,unsigned * Fast)1030     bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1031                             const MachineMemOperand &MMO,
1032                             unsigned *Fast) const {
1033       return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(),
1034                                 MMO.getAlign(), MMO.getFlags(), Fast);
1035     }
1036 
1037     /// Provide custom lowering hooks for some operations.
1038     ///
1039     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
1040 
1041     /// Replace the results of node with an illegal result
1042     /// type with new values built out of custom code.
1043     ///
1044     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
1045                             SelectionDAG &DAG) const override;
1046 
1047     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
1048 
1049     /// Return true if the target has native support for
1050     /// the specified value type and it is 'desirable' to use the type for the
1051     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
1052     /// instruction encodings are longer and some i16 instructions are slow.
1053     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
1054 
1055     /// Return true if the target has native support for the
1056     /// specified value type and it is 'desirable' to use the type. e.g. On x86
1057     /// i16 is legal, but undesirable since i16 instruction encodings are longer
1058     /// and some i16 instructions are slow.
1059     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
1060 
1061     /// Return the newly negated expression if the cost is not expensive and
1062     /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
1063     /// do the negation.
1064     SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
1065                                  bool LegalOperations, bool ForCodeSize,
1066                                  NegatibleCost &Cost,
1067                                  unsigned Depth) const override;
1068 
1069     MachineBasicBlock *
1070     EmitInstrWithCustomInserter(MachineInstr &MI,
1071                                 MachineBasicBlock *MBB) const override;
1072 
1073     /// This method returns the name of a target specific DAG node.
1074     const char *getTargetNodeName(unsigned Opcode) const override;
1075 
1076     /// Do not merge vector stores after legalization because that may conflict
1077     /// with x86-specific store splitting optimizations.
mergeStoresAfterLegalization(EVT MemVT)1078     bool mergeStoresAfterLegalization(EVT MemVT) const override {
1079       return !MemVT.isVector();
1080     }
1081 
1082     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
1083                           const MachineFunction &MF) const override;
1084 
1085     bool isCheapToSpeculateCttz(Type *Ty) const override;
1086 
1087     bool isCheapToSpeculateCtlz(Type *Ty) const override;
1088 
1089     bool isCtlzFast() const override;
1090 
1091     bool hasBitPreservingFPLogic(EVT VT) const override;
1092 
isMultiStoresCheaperThanBitsMerge(EVT LTy,EVT HTy)1093     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1094       // If the pair to store is a mixture of float and int values, we will
1095       // save two bitwise instructions and one float-to-int instruction and
1096       // increase one store instruction. There is potentially a more
1097       // significant benefit because it avoids the float->int domain switch
1098       // for input value. So It is more likely a win.
1099       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
1100           (LTy.isInteger() && HTy.isFloatingPoint()))
1101         return true;
1102       // If the pair only contains int values, we will save two bitwise
1103       // instructions and increase one store instruction (costing one more
1104       // store buffer). Since the benefit is more blurred so we leave
1105       // such pair out until we get testcase to prove it is a win.
1106       return false;
1107     }
1108 
1109     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1110 
1111     bool hasAndNotCompare(SDValue Y) const override;
1112 
1113     bool hasAndNot(SDValue Y) const override;
1114 
1115     bool hasBitTest(SDValue X, SDValue Y) const override;
1116 
1117     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1118         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1119         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1120         SelectionDAG &DAG) const override;
1121 
1122     bool preferScalarizeSplat(unsigned Opc) const override;
1123 
1124     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1125                                            CombineLevel Level) const override;
1126 
1127     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1128 
1129     bool
shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)1130     shouldTransformSignedTruncationCheck(EVT XVT,
1131                                          unsigned KeptBits) const override {
1132       // For vectors, we don't have a preference..
1133       if (XVT.isVector())
1134         return false;
1135 
1136       auto VTIsOk = [](EVT VT) -> bool {
1137         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1138                VT == MVT::i64;
1139       };
1140 
1141       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1142       // XVT will be larger than KeptBitsVT.
1143       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
1144       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1145     }
1146 
1147     ShiftLegalizationStrategy
1148     preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
1149                                        unsigned ExpansionFactor) const override;
1150 
1151     bool shouldSplatInsEltVarIndex(EVT VT) const override;
1152 
shouldConvertFpToSat(unsigned Op,EVT FPVT,EVT VT)1153     bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
1154       // Converting to sat variants holds little benefit on X86 as we will just
1155       // need to saturate the value back using fp arithmatic.
1156       return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT);
1157     }
1158 
convertSetCCLogicToBitwiseLogic(EVT VT)1159     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1160       return VT.isScalarInteger();
1161     }
1162 
1163     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1164     MVT hasFastEqualityCompare(unsigned NumBits) const override;
1165 
1166     /// Return the value type to use for ISD::SETCC.
1167     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1168                            EVT VT) const override;
1169 
1170     bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1171                                       const APInt &DemandedElts,
1172                                       TargetLoweringOpt &TLO) const override;
1173 
1174     /// Determine which of the bits specified in Mask are known to be either
1175     /// zero or one and return them in the KnownZero/KnownOne bitsets.
1176     void computeKnownBitsForTargetNode(const SDValue Op,
1177                                        KnownBits &Known,
1178                                        const APInt &DemandedElts,
1179                                        const SelectionDAG &DAG,
1180                                        unsigned Depth = 0) const override;
1181 
1182     /// Determine the number of bits in the operation that are sign bits.
1183     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1184                                              const APInt &DemandedElts,
1185                                              const SelectionDAG &DAG,
1186                                              unsigned Depth) const override;
1187 
1188     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1189                                                  const APInt &DemandedElts,
1190                                                  APInt &KnownUndef,
1191                                                  APInt &KnownZero,
1192                                                  TargetLoweringOpt &TLO,
1193                                                  unsigned Depth) const override;
1194 
1195     bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1196                                                     const APInt &DemandedElts,
1197                                                     unsigned MaskIndex,
1198                                                     TargetLoweringOpt &TLO,
1199                                                     unsigned Depth) const;
1200 
1201     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1202                                            const APInt &DemandedBits,
1203                                            const APInt &DemandedElts,
1204                                            KnownBits &Known,
1205                                            TargetLoweringOpt &TLO,
1206                                            unsigned Depth) const override;
1207 
1208     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1209         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1210         SelectionDAG &DAG, unsigned Depth) const override;
1211 
1212     bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(
1213         SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1214         bool PoisonOnly, unsigned Depth) const override;
1215 
1216     bool canCreateUndefOrPoisonForTargetNode(
1217         SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1218         bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override;
1219 
1220     bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
1221                                    APInt &UndefElts, const SelectionDAG &DAG,
1222                                    unsigned Depth) const override;
1223 
isTargetCanonicalConstantNode(SDValue Op)1224     bool isTargetCanonicalConstantNode(SDValue Op) const override {
1225       // Peek through bitcasts/extracts/inserts to see if we have a broadcast
1226       // vector from memory.
1227       while (Op.getOpcode() == ISD::BITCAST ||
1228              Op.getOpcode() == ISD::EXTRACT_SUBVECTOR ||
1229              (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
1230               Op.getOperand(0).isUndef()))
1231         Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0);
1232 
1233       return Op.getOpcode() == X86ISD::VBROADCAST_LOAD ||
1234              TargetLowering::isTargetCanonicalConstantNode(Op);
1235     }
1236 
1237     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1238 
1239     SDValue unwrapAddress(SDValue N) const override;
1240 
1241     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1242 
1243     bool ExpandInlineAsm(CallInst *CI) const override;
1244 
1245     ConstraintType getConstraintType(StringRef Constraint) const override;
1246 
1247     /// Examine constraint string and operand type and determine a weight value.
1248     /// The operand object must already have been set up with the operand type.
1249     ConstraintWeight
1250       getSingleConstraintMatchWeight(AsmOperandInfo &info,
1251                                      const char *constraint) const override;
1252 
1253     const char *LowerXConstraint(EVT ConstraintVT) const override;
1254 
1255     /// Lower the specified operand into the Ops vector. If it is invalid, don't
1256     /// add anything to Ops. If hasMemory is true it means one of the asm
1257     /// constraint of the inline asm instruction being processed is 'm'.
1258     void LowerAsmOperandForConstraint(SDValue Op,
1259                                       std::string &Constraint,
1260                                       std::vector<SDValue> &Ops,
1261                                       SelectionDAG &DAG) const override;
1262 
1263     unsigned
getInlineAsmMemConstraint(StringRef ConstraintCode)1264     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1265       if (ConstraintCode == "v")
1266         return InlineAsm::Constraint_v;
1267       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1268     }
1269 
1270     /// Handle Lowering flag assembly outputs.
1271     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1272                                         const SDLoc &DL,
1273                                         const AsmOperandInfo &Constraint,
1274                                         SelectionDAG &DAG) const override;
1275 
1276     /// Given a physical register constraint
1277     /// (e.g. {edx}), return the register number and the register class for the
1278     /// register.  This should only be used for C_Register constraints.  On
1279     /// error, this returns a register number of 0.
1280     std::pair<unsigned, const TargetRegisterClass *>
1281     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1282                                  StringRef Constraint, MVT VT) const override;
1283 
1284     /// Return true if the addressing mode represented
1285     /// by AM is legal for this target, for a load/store of the specified type.
1286     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1287                                Type *Ty, unsigned AS,
1288                                Instruction *I = nullptr) const override;
1289 
1290     /// Return true if the specified immediate is legal
1291     /// icmp immediate, that is the target has icmp instructions which can
1292     /// compare a register against the immediate without having to materialize
1293     /// the immediate into a register.
1294     bool isLegalICmpImmediate(int64_t Imm) const override;
1295 
1296     /// Return true if the specified immediate is legal
1297     /// add immediate, that is the target has add instructions which can
1298     /// add a register and the immediate without having to materialize
1299     /// the immediate into a register.
1300     bool isLegalAddImmediate(int64_t Imm) const override;
1301 
1302     bool isLegalStoreImmediate(int64_t Imm) const override;
1303 
1304     /// This is used to enable splatted operand transforms for vector shifts
1305     /// and vector funnel shifts.
1306     bool isVectorShiftByScalarCheap(Type *Ty) const override;
1307 
1308     /// Add x86-specific opcodes to the default list.
1309     bool isBinOp(unsigned Opcode) const override;
1310 
1311     /// Returns true if the opcode is a commutative binary operation.
1312     bool isCommutativeBinOp(unsigned Opcode) const override;
1313 
1314     /// Return true if it's free to truncate a value of
1315     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1316     /// register EAX to i16 by referencing its sub-register AX.
1317     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1318     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1319 
1320     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1321 
1322     /// Return true if any actual instruction that defines a
1323     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1324     /// register. This does not necessarily include registers defined in
1325     /// unknown ways, such as incoming arguments, or copies from unknown
1326     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1327     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1328     /// all instructions that define 32-bit values implicit zero-extend the
1329     /// result out to 64 bits.
1330     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1331     bool isZExtFree(EVT VT1, EVT VT2) const override;
1332     bool isZExtFree(SDValue Val, EVT VT2) const override;
1333 
1334     bool shouldSinkOperands(Instruction *I,
1335                             SmallVectorImpl<Use *> &Ops) const override;
1336     bool shouldConvertPhiType(Type *From, Type *To) const override;
1337 
1338     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1339     /// extend node) is profitable.
1340     bool isVectorLoadExtDesirable(SDValue) const override;
1341 
1342     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1343     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1344     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1345     bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1346                                     EVT VT) const override;
1347 
1348     /// Return true if it's profitable to narrow
1349     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1350     /// from i32 to i8 but not from i32 to i16.
1351     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1352 
1353     bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
1354                                               EVT VT) const override;
1355 
1356     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1357     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1358     /// true and stores the intrinsic information into the IntrinsicInfo that was
1359     /// passed to the function.
1360     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1361                             MachineFunction &MF,
1362                             unsigned Intrinsic) const override;
1363 
1364     /// Returns true if the target can instruction select the
1365     /// specified FP immediate natively. If false, the legalizer will
1366     /// materialize the FP immediate as a load from a constant pool.
1367     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1368                       bool ForCodeSize) const override;
1369 
1370     /// Targets can use this to indicate that they only support *some*
1371     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1372     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1373     /// be legal.
1374     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1375 
1376     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1377     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1378     /// constant pool entry.
1379     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1380 
1381     /// Returns true if lowering to a jump table is allowed.
1382     bool areJTsAllowed(const Function *Fn) const override;
1383 
1384     MVT getPreferredSwitchConditionType(LLVMContext &Context,
1385                                         EVT ConditionVT) const override;
1386 
1387     /// If true, then instruction selection should
1388     /// seek to shrink the FP constant of the specified type to a smaller type
1389     /// in order to save space and / or reduce runtime.
1390     bool ShouldShrinkFPConstant(EVT VT) const override;
1391 
1392     /// Return true if we believe it is correct and profitable to reduce the
1393     /// load node to a smaller type.
1394     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1395                                EVT NewVT) const override;
1396 
1397     /// Return true if the specified scalar FP type is computed in an SSE
1398     /// register, not on the X87 floating point stack.
1399     bool isScalarFPTypeInSSEReg(EVT VT) const;
1400 
1401     /// Returns true if it is beneficial to convert a load of a constant
1402     /// to just the constant itself.
1403     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1404                                            Type *Ty) const override;
1405 
1406     bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1407 
1408     bool convertSelectOfConstantsToMath(EVT VT) const override;
1409 
1410     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1411                                 SDValue C) const override;
1412 
1413     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1414     /// with this index.
1415     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1416                                  unsigned Index) const override;
1417 
1418     /// Scalar ops always have equal or better analysis/performance/power than
1419     /// the vector equivalent, so this always makes sense if the scalar op is
1420     /// supported.
shouldScalarizeBinop(SDValue)1421     bool shouldScalarizeBinop(SDValue) const override;
1422 
1423     /// Extract of a scalar FP value from index 0 of a vector is free.
1424     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1425       EVT EltVT = VT.getScalarType();
1426       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1427     }
1428 
1429     /// Overflow nodes should get combined/lowered to optimal instructions
1430     /// (they should allow eliminating explicit compares by getting flags from
1431     /// math ops).
1432     bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1433                               bool MathUsed) const override;
1434 
storeOfVectorConstantIsCheap(EVT MemVT,unsigned NumElem,unsigned AddrSpace)1435     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1436                                       unsigned AddrSpace) const override {
1437       // If we can replace more than 2 scalar stores, there will be a reduction
1438       // in instructions even after we add a vector constant load.
1439       return NumElem > 2;
1440     }
1441 
1442     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1443                                  const SelectionDAG &DAG,
1444                                  const MachineMemOperand &MMO) const override;
1445 
1446     /// Intel processors have a unified instruction and data cache
getClearCacheBuiltinName()1447     const char * getClearCacheBuiltinName() const override {
1448       return nullptr; // nothing to do, move along.
1449     }
1450 
1451     Register getRegisterByName(const char* RegName, LLT VT,
1452                                const MachineFunction &MF) const override;
1453 
1454     /// If a physical register, this returns the register that receives the
1455     /// exception address on entry to an EH pad.
1456     Register
1457     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1458 
1459     /// If a physical register, this returns the register that receives the
1460     /// exception typeid on entry to a landing pad.
1461     Register
1462     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1463 
1464     bool needsFixedCatchObjects() const override;
1465 
1466     /// This method returns a target specific FastISel object,
1467     /// or null if the target does not support "fast" ISel.
1468     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1469                              const TargetLibraryInfo *libInfo) const override;
1470 
1471     /// If the target has a standard location for the stack protector cookie,
1472     /// returns the address of that location. Otherwise, returns nullptr.
1473     Value *getIRStackGuard(IRBuilderBase &IRB) const override;
1474 
1475     bool useLoadStackGuardNode() const override;
1476     bool useStackGuardXorFP() const override;
1477     void insertSSPDeclarations(Module &M) const override;
1478     Value *getSDagStackGuard(const Module &M) const override;
1479     Function *getSSPStackGuardCheck(const Module &M) const override;
1480     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1481                                 const SDLoc &DL) const override;
1482 
1483 
1484     /// Return true if the target stores SafeStack pointer at a fixed offset in
1485     /// some non-standard address space, and populates the address space and
1486     /// offset as appropriate.
1487     Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
1488 
1489     std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1490                                           SDValue Chain, SDValue Pointer,
1491                                           MachinePointerInfo PtrInfo,
1492                                           Align Alignment,
1493                                           SelectionDAG &DAG) const;
1494 
1495     /// Customize the preferred legalization strategy for certain types.
1496     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1497 
softPromoteHalfType()1498     bool softPromoteHalfType() const override { return true; }
1499 
1500     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1501                                       EVT VT) const override;
1502 
1503     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1504                                            CallingConv::ID CC,
1505                                            EVT VT) const override;
1506 
1507     unsigned getVectorTypeBreakdownForCallingConv(
1508         LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1509         unsigned &NumIntermediates, MVT &RegisterVT) const override;
1510 
1511     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1512 
1513     bool supportSwiftError() const override;
1514 
supportKCFIBundles()1515     bool supportKCFIBundles() const override { return true; }
1516 
1517     bool hasStackProbeSymbol(const MachineFunction &MF) const override;
1518     bool hasInlineStackProbe(const MachineFunction &MF) const override;
1519     StringRef getStackProbeSymbolName(const MachineFunction &MF) const override;
1520 
1521     unsigned getStackProbeSize(const MachineFunction &MF) const;
1522 
hasVectorBlend()1523     bool hasVectorBlend() const override { return true; }
1524 
getMaxSupportedInterleaveFactor()1525     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1526 
1527     bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs,
1528                                  unsigned OpNo) const override;
1529 
1530     /// Lower interleaved load(s) into target specific
1531     /// instructions/intrinsics.
1532     bool lowerInterleavedLoad(LoadInst *LI,
1533                               ArrayRef<ShuffleVectorInst *> Shuffles,
1534                               ArrayRef<unsigned> Indices,
1535                               unsigned Factor) const override;
1536 
1537     /// Lower interleaved store(s) into target specific
1538     /// instructions/intrinsics.
1539     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1540                                unsigned Factor) const override;
1541 
1542     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1543                                    SDValue Addr, SelectionDAG &DAG)
1544                                    const override;
1545 
1546     Align getPrefLoopAlignment(MachineLoop *ML) const override;
1547 
getTypeToTransformTo(LLVMContext & Context,EVT VT)1548     EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override {
1549       if (VT == MVT::f80)
1550         return EVT::getIntegerVT(Context, 96);
1551       return TargetLoweringBase::getTypeToTransformTo(Context, VT);
1552     }
1553 
1554   protected:
1555     std::pair<const TargetRegisterClass *, uint8_t>
1556     findRepresentativeClass(const TargetRegisterInfo *TRI,
1557                             MVT VT) const override;
1558 
1559   private:
1560     /// Keep a reference to the X86Subtarget around so that we can
1561     /// make the right decision when generating code for different targets.
1562     const X86Subtarget &Subtarget;
1563 
1564     /// A list of legal FP immediates.
1565     std::vector<APFloat> LegalFPImmediates;
1566 
1567     /// Indicate that this x86 target can instruction
1568     /// select the specified FP immediate natively.
addLegalFPImmediate(const APFloat & Imm)1569     void addLegalFPImmediate(const APFloat& Imm) {
1570       LegalFPImmediates.push_back(Imm);
1571     }
1572 
1573     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1574                             CallingConv::ID CallConv, bool isVarArg,
1575                             const SmallVectorImpl<ISD::InputArg> &Ins,
1576                             const SDLoc &dl, SelectionDAG &DAG,
1577                             SmallVectorImpl<SDValue> &InVals,
1578                             uint32_t *RegMask) const;
1579     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1580                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1581                              const SDLoc &dl, SelectionDAG &DAG,
1582                              const CCValAssign &VA, MachineFrameInfo &MFI,
1583                              unsigned i) const;
1584     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1585                              const SDLoc &dl, SelectionDAG &DAG,
1586                              const CCValAssign &VA,
1587                              ISD::ArgFlagsTy Flags, bool isByval) const;
1588 
1589     // Call lowering helpers.
1590 
1591     /// Check whether the call is eligible for tail call optimization. Targets
1592     /// that want to do tail call optimization should implement this function.
1593     bool IsEligibleForTailCallOptimization(
1594         SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleeStackStructRet,
1595         bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
1596         const SmallVectorImpl<SDValue> &OutVals,
1597         const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
1598     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1599                                     SDValue Chain, bool IsTailCall,
1600                                     bool Is64Bit, int FPDiff,
1601                                     const SDLoc &dl) const;
1602 
1603     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1604                                          SelectionDAG &DAG) const;
1605 
1606     unsigned getAddressSpace() const;
1607 
1608     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1609                             SDValue &Chain) const;
1610     SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1611 
1612     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1613     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1614     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1615     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1616 
1617     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1618                                   const unsigned char OpFlags = 0) const;
1619     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1620     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1621     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1622     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1623     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1624 
1625     /// Creates target global address or external symbol nodes for calls or
1626     /// other uses.
1627     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1628                                   bool ForCall) const;
1629 
1630     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1631     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1632     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1633     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1634     SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1635     SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1636     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1637     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1638     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1639     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1640     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1641     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1642     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1643     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1644     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1645     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1646     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1647     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1648     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1649     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1650     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1651     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1652     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1653     SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1654     SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1655     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1656     SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
1657                                     SDValue &Chain) const;
1658     SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1659     SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1660     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1661     SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1662     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1663     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1664     SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
1665 
1666     SDValue
1667     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1668                          const SmallVectorImpl<ISD::InputArg> &Ins,
1669                          const SDLoc &dl, SelectionDAG &DAG,
1670                          SmallVectorImpl<SDValue> &InVals) const override;
1671     SDValue LowerCall(CallLoweringInfo &CLI,
1672                       SmallVectorImpl<SDValue> &InVals) const override;
1673 
1674     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1675                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1676                         const SmallVectorImpl<SDValue> &OutVals,
1677                         const SDLoc &dl, SelectionDAG &DAG) const override;
1678 
supportSplitCSR(MachineFunction * MF)1679     bool supportSplitCSR(MachineFunction *MF) const override {
1680       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1681           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1682     }
1683     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1684     void insertCopiesSplitCSR(
1685       MachineBasicBlock *Entry,
1686       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1687 
1688     bool splitValueIntoRegisterParts(
1689         SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1690         unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC)
1691         const override;
1692 
1693     SDValue joinRegisterPartsIntoValue(
1694         SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts,
1695         unsigned NumParts, MVT PartVT, EVT ValueVT,
1696         std::optional<CallingConv::ID> CC) const override;
1697 
1698     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1699 
1700     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1701 
1702     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1703                             ISD::NodeType ExtendKind) const override;
1704 
1705     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1706                         bool isVarArg,
1707                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1708                         LLVMContext &Context) const override;
1709 
1710     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1711 
1712     TargetLoweringBase::AtomicExpansionKind
1713     shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1714     TargetLoweringBase::AtomicExpansionKind
1715     shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1716     TargetLoweringBase::AtomicExpansionKind
1717     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1718     TargetLoweringBase::AtomicExpansionKind
1719     shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
1720     void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1721     void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1722 
1723     LoadInst *
1724     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1725 
1726     bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
1727     bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;
1728 
1729     bool needsCmpXchgNb(Type *MemType) const;
1730 
1731     template<typename T> bool isSoftFP16(T VT) const;
1732 
1733     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1734                                 MachineBasicBlock *DispatchBB, int FI) const;
1735 
1736     // Utility function to emit the low-level va_arg code for X86-64.
1737     MachineBasicBlock *
1738     EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
1739 
1740     /// Utility function to emit the xmm reg save portion of va_start.
1741     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1742                                                  MachineInstr &MI2,
1743                                                  MachineBasicBlock *BB) const;
1744 
1745     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1746                                          MachineBasicBlock *BB) const;
1747 
1748     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1749                                            MachineBasicBlock *BB) const;
1750 
1751     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1752                                             MachineBasicBlock *BB) const;
1753 
1754     MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1755                                                MachineBasicBlock *BB) const;
1756 
1757     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1758                                           MachineBasicBlock *BB) const;
1759 
1760     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1761                                           MachineBasicBlock *BB) const;
1762 
1763     MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1764                                                 MachineBasicBlock *BB) const;
1765 
1766     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1767                                         MachineBasicBlock *MBB) const;
1768 
1769     void emitSetJmpShadowStackFix(MachineInstr &MI,
1770                                   MachineBasicBlock *MBB) const;
1771 
1772     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1773                                          MachineBasicBlock *MBB) const;
1774 
1775     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1776                                                  MachineBasicBlock *MBB) const;
1777 
1778     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1779                                              MachineBasicBlock *MBB) const;
1780 
1781     /// Emit flags for the given setcc condition and operands. Also returns the
1782     /// corresponding X86 condition code constant in X86CC.
1783     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1784                               const SDLoc &dl, SelectionDAG &DAG,
1785                               SDValue &X86CC) const;
1786 
1787     /// Check if replacement of SQRT with RSQRT should be disabled.
1788     bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1789 
1790     /// Use rsqrt* to speed up sqrt calculations.
1791     SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1792                             int &RefinementSteps, bool &UseOneConstNR,
1793                             bool Reciprocal) const override;
1794 
1795     /// Use rcp* to speed up fdiv calculations.
1796     SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1797                              int &RefinementSteps) const override;
1798 
1799     /// Reassociate floating point divisions into multiply by reciprocal.
1800     unsigned combineRepeatedFPDivisors() const override;
1801 
1802     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1803                           SmallVectorImpl<SDNode *> &Created) const override;
1804   };
1805 
1806   namespace X86 {
1807     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1808                              const TargetLibraryInfo *libInfo);
1809   } // end namespace X86
1810 
1811   // X86 specific Gather/Scatter nodes.
1812   // The class has the same order of operands as MaskedGatherScatterSDNode for
1813   // convenience.
1814   class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1815   public:
1816     // This is a intended as a utility and should never be directly created.
1817     X86MaskedGatherScatterSDNode() = delete;
1818     ~X86MaskedGatherScatterSDNode() = delete;
1819 
getBasePtr()1820     const SDValue &getBasePtr() const { return getOperand(3); }
getIndex()1821     const SDValue &getIndex()   const { return getOperand(4); }
getMask()1822     const SDValue &getMask()    const { return getOperand(2); }
getScale()1823     const SDValue &getScale()   const { return getOperand(5); }
1824 
classof(const SDNode * N)1825     static bool classof(const SDNode *N) {
1826       return N->getOpcode() == X86ISD::MGATHER ||
1827              N->getOpcode() == X86ISD::MSCATTER;
1828     }
1829   };
1830 
1831   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1832   public:
getPassThru()1833     const SDValue &getPassThru() const { return getOperand(1); }
1834 
classof(const SDNode * N)1835     static bool classof(const SDNode *N) {
1836       return N->getOpcode() == X86ISD::MGATHER;
1837     }
1838   };
1839 
1840   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1841   public:
getValue()1842     const SDValue &getValue() const { return getOperand(1); }
1843 
classof(const SDNode * N)1844     static bool classof(const SDNode *N) {
1845       return N->getOpcode() == X86ISD::MSCATTER;
1846     }
1847   };
1848 
1849   /// Generate unpacklo/unpackhi shuffle mask.
1850   void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1851                                bool Unary);
1852 
1853   /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1854   /// imposed by AVX and specific to the unary pattern. Example:
1855   /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1856   /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1857   void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1858 
1859 } // end namespace llvm
1860 
1861 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1862