1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that X86 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16 
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/TargetLowering.h"
19 
20 namespace llvm {
21   class X86Subtarget;
22   class X86TargetMachine;
23 
24   namespace X86ISD {
25     // X86 Specific DAG Nodes
26   enum NodeType : unsigned {
27     // Start the numbering where the builtin ops leave off.
28     FIRST_NUMBER = ISD::BUILTIN_OP_END,
29 
30     /// Bit scan forward.
31     BSF,
32     /// Bit scan reverse.
33     BSR,
34 
35     /// X86 funnel/double shift i16 instructions. These correspond to
36     /// X86::SHLDW and X86::SHRDW instructions which have different amt
37     /// modulo rules to generic funnel shifts.
38     /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
39     FSHL,
40     FSHR,
41 
42     /// Bitwise logical AND of floating point values. This corresponds
43     /// to X86::ANDPS or X86::ANDPD.
44     FAND,
45 
46     /// Bitwise logical OR of floating point values. This corresponds
47     /// to X86::ORPS or X86::ORPD.
48     FOR,
49 
50     /// Bitwise logical XOR of floating point values. This corresponds
51     /// to X86::XORPS or X86::XORPD.
52     FXOR,
53 
54     ///  Bitwise logical ANDNOT of floating point values. This
55     /// corresponds to X86::ANDNPS or X86::ANDNPD.
56     FANDN,
57 
58     /// These operations represent an abstract X86 call
59     /// instruction, which includes a bunch of information.  In particular the
60     /// operands of these node are:
61     ///
62     ///     #0 - The incoming token chain
63     ///     #1 - The callee
64     ///     #2 - The number of arg bytes the caller pushes on the stack.
65     ///     #3 - The number of arg bytes the callee pops off the stack.
66     ///     #4 - The value to pass in AL/AX/EAX (optional)
67     ///     #5 - The value to pass in DL/DX/EDX (optional)
68     ///
69     /// The result values of these nodes are:
70     ///
71     ///     #0 - The outgoing token chain
72     ///     #1 - The first register result value (optional)
73     ///     #2 - The second register result value (optional)
74     ///
75     CALL,
76 
77     /// Same as call except it adds the NoTrack prefix.
78     NT_CALL,
79 
80     // Pseudo for a OBJC call that gets emitted together with a special
81     // marker instruction.
82     CALL_RVMARKER,
83 
84     /// X86 compare and logical compare instructions.
85     CMP,
86     FCMP,
87     COMI,
88     UCOMI,
89 
90     /// X86 bit-test instructions.
91     BT,
92 
93     /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
94     /// operand, usually produced by a CMP instruction.
95     SETCC,
96 
97     /// X86 Select
98     SELECTS,
99 
100     // Same as SETCC except it's materialized with a sbb and the value is all
101     // one's or all zero's.
102     SETCC_CARRY, // R = carry_bit ? ~0 : 0
103 
104     /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
105     /// Operands are two FP values to compare; result is a mask of
106     /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
107     FSETCC,
108 
109     /// X86 FP SETCC, similar to above, but with output as an i1 mask and
110     /// and a version with SAE.
111     FSETCCM,
112     FSETCCM_SAE,
113 
114     /// X86 conditional moves. Operand 0 and operand 1 are the two values
115     /// to select from. Operand 2 is the condition code, and operand 3 is the
116     /// flag operand produced by a CMP or TEST instruction.
117     CMOV,
118 
119     /// X86 conditional branches. Operand 0 is the chain operand, operand 1
120     /// is the block to branch if condition is true, operand 2 is the
121     /// condition code, and operand 3 is the flag operand produced by a CMP
122     /// or TEST instruction.
123     BRCOND,
124 
125     /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
126     /// operand 1 is the target address.
127     NT_BRIND,
128 
129     /// Return with a flag operand. Operand 0 is the chain operand, operand
130     /// 1 is the number of bytes of stack to pop.
131     RET_FLAG,
132 
133     /// Return from interrupt. Operand 0 is the number of bytes to pop.
134     IRET,
135 
136     /// Repeat fill, corresponds to X86::REP_STOSx.
137     REP_STOS,
138 
139     /// Repeat move, corresponds to X86::REP_MOVSx.
140     REP_MOVS,
141 
142     /// On Darwin, this node represents the result of the popl
143     /// at function entry, used for PIC code.
144     GlobalBaseReg,
145 
146     /// A wrapper node for TargetConstantPool, TargetJumpTable,
147     /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
148     /// MCSymbol and TargetBlockAddress.
149     Wrapper,
150 
151     /// Special wrapper used under X86-64 PIC mode for RIP
152     /// relative displacements.
153     WrapperRIP,
154 
155     /// Copies a 64-bit value from an MMX vector to the low word
156     /// of an XMM vector, with the high word zero filled.
157     MOVQ2DQ,
158 
159     /// Copies a 64-bit value from the low word of an XMM vector
160     /// to an MMX vector.
161     MOVDQ2Q,
162 
163     /// Copies a 32-bit value from the low word of a MMX
164     /// vector to a GPR.
165     MMX_MOVD2W,
166 
167     /// Copies a GPR into the low 32-bit word of a MMX vector
168     /// and zero out the high word.
169     MMX_MOVW2D,
170 
171     /// Extract an 8-bit value from a vector and zero extend it to
172     /// i32, corresponds to X86::PEXTRB.
173     PEXTRB,
174 
175     /// Extract a 16-bit value from a vector and zero extend it to
176     /// i32, corresponds to X86::PEXTRW.
177     PEXTRW,
178 
179     /// Insert any element of a 4 x float vector into any element
180     /// of a destination 4 x floatvector.
181     INSERTPS,
182 
183     /// Insert the lower 8-bits of a 32-bit value to a vector,
184     /// corresponds to X86::PINSRB.
185     PINSRB,
186 
187     /// Insert the lower 16-bits of a 32-bit value to a vector,
188     /// corresponds to X86::PINSRW.
189     PINSRW,
190 
191     /// Shuffle 16 8-bit values within a vector.
192     PSHUFB,
193 
194     /// Compute Sum of Absolute Differences.
195     PSADBW,
196     /// Compute Double Block Packed Sum-Absolute-Differences
197     DBPSADBW,
198 
199     /// Bitwise Logical AND NOT of Packed FP values.
200     ANDNP,
201 
202     /// Blend where the selector is an immediate.
203     BLENDI,
204 
205     /// Dynamic (non-constant condition) vector blend where only the sign bits
206     /// of the condition elements are used. This is used to enforce that the
207     /// condition mask is not valid for generic VSELECT optimizations. This
208     /// is also used to implement the intrinsics.
209     /// Operands are in VSELECT order: MASK, TRUE, FALSE
210     BLENDV,
211 
212     /// Combined add and sub on an FP vector.
213     ADDSUB,
214 
215     //  FP vector ops with rounding mode.
216     FADD_RND,
217     FADDS,
218     FADDS_RND,
219     FSUB_RND,
220     FSUBS,
221     FSUBS_RND,
222     FMUL_RND,
223     FMULS,
224     FMULS_RND,
225     FDIV_RND,
226     FDIVS,
227     FDIVS_RND,
228     FMAX_SAE,
229     FMAXS_SAE,
230     FMIN_SAE,
231     FMINS_SAE,
232     FSQRT_RND,
233     FSQRTS,
234     FSQRTS_RND,
235 
236     // FP vector get exponent.
237     FGETEXP,
238     FGETEXP_SAE,
239     FGETEXPS,
240     FGETEXPS_SAE,
241     // Extract Normalized Mantissas.
242     VGETMANT,
243     VGETMANT_SAE,
244     VGETMANTS,
245     VGETMANTS_SAE,
246     // FP Scale.
247     SCALEF,
248     SCALEF_RND,
249     SCALEFS,
250     SCALEFS_RND,
251 
252     // Unsigned Integer average.
253     AVG,
254 
255     /// Integer horizontal add/sub.
256     HADD,
257     HSUB,
258 
259     /// Floating point horizontal add/sub.
260     FHADD,
261     FHSUB,
262 
263     // Detect Conflicts Within a Vector
264     CONFLICT,
265 
266     /// Floating point max and min.
267     FMAX,
268     FMIN,
269 
270     /// Commutative FMIN and FMAX.
271     FMAXC,
272     FMINC,
273 
274     /// Scalar intrinsic floating point max and min.
275     FMAXS,
276     FMINS,
277 
278     /// Floating point reciprocal-sqrt and reciprocal approximation.
279     /// Note that these typically require refinement
280     /// in order to obtain suitable precision.
281     FRSQRT,
282     FRCP,
283 
284     // AVX-512 reciprocal approximations with a little more precision.
285     RSQRT14,
286     RSQRT14S,
287     RCP14,
288     RCP14S,
289 
290     // Thread Local Storage.
291     TLSADDR,
292 
293     // Thread Local Storage. A call to get the start address
294     // of the TLS block for the current module.
295     TLSBASEADDR,
296 
297     // Thread Local Storage.  When calling to an OS provided
298     // thunk at the address from an earlier relocation.
299     TLSCALL,
300 
301     // Exception Handling helpers.
302     EH_RETURN,
303 
304     // SjLj exception handling setjmp.
305     EH_SJLJ_SETJMP,
306 
307     // SjLj exception handling longjmp.
308     EH_SJLJ_LONGJMP,
309 
310     // SjLj exception handling dispatch.
311     EH_SJLJ_SETUP_DISPATCH,
312 
313     /// Tail call return. See X86TargetLowering::LowerCall for
314     /// the list of operands.
315     TC_RETURN,
316 
317     // Vector move to low scalar and zero higher vector elements.
318     VZEXT_MOVL,
319 
320     // Vector integer truncate.
321     VTRUNC,
322     // Vector integer truncate with unsigned/signed saturation.
323     VTRUNCUS,
324     VTRUNCS,
325 
326     // Masked version of the above. Used when less than a 128-bit result is
327     // produced since the mask only applies to the lower elements and can't
328     // be represented by a select.
329     // SRC, PASSTHRU, MASK
330     VMTRUNC,
331     VMTRUNCUS,
332     VMTRUNCS,
333 
334     // Vector FP extend.
335     VFPEXT,
336     VFPEXT_SAE,
337     VFPEXTS,
338     VFPEXTS_SAE,
339 
340     // Vector FP round.
341     VFPROUND,
342     VFPROUND_RND,
343     VFPROUNDS,
344     VFPROUNDS_RND,
345 
346     // Masked version of above. Used for v2f64->v4f32.
347     // SRC, PASSTHRU, MASK
348     VMFPROUND,
349 
350     // 128-bit vector logical left / right shift
351     VSHLDQ,
352     VSRLDQ,
353 
354     // Vector shift elements
355     VSHL,
356     VSRL,
357     VSRA,
358 
359     // Vector variable shift
360     VSHLV,
361     VSRLV,
362     VSRAV,
363 
364     // Vector shift elements by immediate
365     VSHLI,
366     VSRLI,
367     VSRAI,
368 
369     // Shifts of mask registers.
370     KSHIFTL,
371     KSHIFTR,
372 
373     // Bit rotate by immediate
374     VROTLI,
375     VROTRI,
376 
377     // Vector packed double/float comparison.
378     CMPP,
379 
380     // Vector integer comparisons.
381     PCMPEQ,
382     PCMPGT,
383 
384     // v8i16 Horizontal minimum and position.
385     PHMINPOS,
386 
387     MULTISHIFT,
388 
389     /// Vector comparison generating mask bits for fp and
390     /// integer signed and unsigned data types.
391     CMPM,
392     // Vector mask comparison generating mask bits for FP values.
393     CMPMM,
394     // Vector mask comparison with SAE for FP values.
395     CMPMM_SAE,
396 
397     // Arithmetic operations with FLAGS results.
398     ADD,
399     SUB,
400     ADC,
401     SBB,
402     SMUL,
403     UMUL,
404     OR,
405     XOR,
406     AND,
407 
408     // Bit field extract.
409     BEXTR,
410     BEXTRI,
411 
412     // Zero High Bits Starting with Specified Bit Position.
413     BZHI,
414 
415     // Parallel extract and deposit.
416     PDEP,
417     PEXT,
418 
419     // X86-specific multiply by immediate.
420     MUL_IMM,
421 
422     // Vector sign bit extraction.
423     MOVMSK,
424 
425     // Vector bitwise comparisons.
426     PTEST,
427 
428     // Vector packed fp sign bitwise comparisons.
429     TESTP,
430 
431     // OR/AND test for masks.
432     KORTEST,
433     KTEST,
434 
435     // ADD for masks.
436     KADD,
437 
438     // Several flavors of instructions with vector shuffle behaviors.
439     // Saturated signed/unnsigned packing.
440     PACKSS,
441     PACKUS,
442     // Intra-lane alignr.
443     PALIGNR,
444     // AVX512 inter-lane alignr.
445     VALIGN,
446     PSHUFD,
447     PSHUFHW,
448     PSHUFLW,
449     SHUFP,
450     // VBMI2 Concat & Shift.
451     VSHLD,
452     VSHRD,
453     VSHLDV,
454     VSHRDV,
455     // Shuffle Packed Values at 128-bit granularity.
456     SHUF128,
457     MOVDDUP,
458     MOVSHDUP,
459     MOVSLDUP,
460     MOVLHPS,
461     MOVHLPS,
462     MOVSD,
463     MOVSS,
464     MOVSH,
465     UNPCKL,
466     UNPCKH,
467     VPERMILPV,
468     VPERMILPI,
469     VPERMI,
470     VPERM2X128,
471 
472     // Variable Permute (VPERM).
473     // Res = VPERMV MaskV, V0
474     VPERMV,
475 
476     // 3-op Variable Permute (VPERMT2).
477     // Res = VPERMV3 V0, MaskV, V1
478     VPERMV3,
479 
480     // Bitwise ternary logic.
481     VPTERNLOG,
482     // Fix Up Special Packed Float32/64 values.
483     VFIXUPIMM,
484     VFIXUPIMM_SAE,
485     VFIXUPIMMS,
486     VFIXUPIMMS_SAE,
487     // Range Restriction Calculation For Packed Pairs of Float32/64 values.
488     VRANGE,
489     VRANGE_SAE,
490     VRANGES,
491     VRANGES_SAE,
492     // Reduce - Perform Reduction Transformation on scalar\packed FP.
493     VREDUCE,
494     VREDUCE_SAE,
495     VREDUCES,
496     VREDUCES_SAE,
497     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
498     // Also used by the legacy (V)ROUND intrinsics where we mask out the
499     // scaling part of the immediate.
500     VRNDSCALE,
501     VRNDSCALE_SAE,
502     VRNDSCALES,
503     VRNDSCALES_SAE,
504     // Tests Types Of a FP Values for packed types.
505     VFPCLASS,
506     // Tests Types Of a FP Values for scalar types.
507     VFPCLASSS,
508 
509     // Broadcast (splat) scalar or element 0 of a vector. If the operand is
510     // a vector, this node may change the vector length as part of the splat.
511     VBROADCAST,
512     // Broadcast mask to vector.
513     VBROADCASTM,
514 
515     /// SSE4A Extraction and Insertion.
516     EXTRQI,
517     INSERTQI,
518 
519     // XOP arithmetic/logical shifts.
520     VPSHA,
521     VPSHL,
522     // XOP signed/unsigned integer comparisons.
523     VPCOM,
524     VPCOMU,
525     // XOP packed permute bytes.
526     VPPERM,
527     // XOP two source permutation.
528     VPERMIL2,
529 
530     // Vector multiply packed unsigned doubleword integers.
531     PMULUDQ,
532     // Vector multiply packed signed doubleword integers.
533     PMULDQ,
534     // Vector Multiply Packed UnsignedIntegers with Round and Scale.
535     MULHRS,
536 
537     // Multiply and Add Packed Integers.
538     VPMADDUBSW,
539     VPMADDWD,
540 
541     // AVX512IFMA multiply and add.
542     // NOTE: These are different than the instruction and perform
543     // op0 x op1 + op2.
544     VPMADD52L,
545     VPMADD52H,
546 
547     // VNNI
548     VPDPBUSD,
549     VPDPBUSDS,
550     VPDPWSSD,
551     VPDPWSSDS,
552 
553     // FMA nodes.
554     // We use the target independent ISD::FMA for the non-inverted case.
555     FNMADD,
556     FMSUB,
557     FNMSUB,
558     FMADDSUB,
559     FMSUBADD,
560 
561     // FMA with rounding mode.
562     FMADD_RND,
563     FNMADD_RND,
564     FMSUB_RND,
565     FNMSUB_RND,
566     FMADDSUB_RND,
567     FMSUBADD_RND,
568 
569     // AVX512-FP16 complex addition and multiplication.
570     VFMADDC,
571     VFMADDC_RND,
572     VFCMADDC,
573     VFCMADDC_RND,
574 
575     VFMULC,
576     VFMULC_RND,
577     VFCMULC,
578     VFCMULC_RND,
579 
580     VFMADDCSH,
581     VFMADDCSH_RND,
582     VFCMADDCSH,
583     VFCMADDCSH_RND,
584 
585     VFMULCSH,
586     VFMULCSH_RND,
587     VFCMULCSH,
588     VFCMULCSH_RND,
589 
590     // Compress and expand.
591     COMPRESS,
592     EXPAND,
593 
594     // Bits shuffle
595     VPSHUFBITQMB,
596 
597     // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
598     SINT_TO_FP_RND,
599     UINT_TO_FP_RND,
600     SCALAR_SINT_TO_FP,
601     SCALAR_UINT_TO_FP,
602     SCALAR_SINT_TO_FP_RND,
603     SCALAR_UINT_TO_FP_RND,
604 
605     // Vector float/double to signed/unsigned integer.
606     CVTP2SI,
607     CVTP2UI,
608     CVTP2SI_RND,
609     CVTP2UI_RND,
610     // Scalar float/double to signed/unsigned integer.
611     CVTS2SI,
612     CVTS2UI,
613     CVTS2SI_RND,
614     CVTS2UI_RND,
615 
616     // Vector float/double to signed/unsigned integer with truncation.
617     CVTTP2SI,
618     CVTTP2UI,
619     CVTTP2SI_SAE,
620     CVTTP2UI_SAE,
621     // Scalar float/double to signed/unsigned integer with truncation.
622     CVTTS2SI,
623     CVTTS2UI,
624     CVTTS2SI_SAE,
625     CVTTS2UI_SAE,
626 
627     // Vector signed/unsigned integer to float/double.
628     CVTSI2P,
629     CVTUI2P,
630 
631     // Masked versions of above. Used for v2f64->v4f32.
632     // SRC, PASSTHRU, MASK
633     MCVTP2SI,
634     MCVTP2UI,
635     MCVTTP2SI,
636     MCVTTP2UI,
637     MCVTSI2P,
638     MCVTUI2P,
639 
640     // Vector float to bfloat16.
641     // Convert TWO packed single data to one packed BF16 data
642     CVTNE2PS2BF16,
643     // Convert packed single data to packed BF16 data
644     CVTNEPS2BF16,
645     // Masked version of above.
646     // SRC, PASSTHRU, MASK
647     MCVTNEPS2BF16,
648 
649     // Dot product of BF16 pairs to accumulated into
650     // packed single precision.
651     DPBF16PS,
652 
653     // Save xmm argument registers to the stack, according to %al. An operator
654     // is needed so that this can be expanded with control flow.
655     VASTART_SAVE_XMM_REGS,
656 
657     // A stack checking function call. On Windows it's _chkstk call.
658     DYN_ALLOCA,
659 
660     // For allocating variable amounts of stack space when using
661     // segmented stacks. Check if the current stacklet has enough space, and
662     // falls back to heap allocation if not.
663     SEG_ALLOCA,
664 
665     // For allocating stack space when using stack clash protector.
666     // Allocation is performed by block, and each block is probed.
667     PROBED_ALLOCA,
668 
669     // Memory barriers.
670     MEMBARRIER,
671     MFENCE,
672 
673     // Get a random integer and indicate whether it is valid in CF.
674     RDRAND,
675 
676     // Get a NIST SP800-90B & C compliant random integer and
677     // indicate whether it is valid in CF.
678     RDSEED,
679 
680     // Protection keys
681     // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
682     // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
683     // value for ECX.
684     RDPKRU,
685     WRPKRU,
686 
687     // SSE42 string comparisons.
688     // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
689     // will emit one or two instructions based on which results are used. If
690     // flags and index/mask this allows us to use a single instruction since
691     // we won't have to pick and opcode for flags. Instead we can rely on the
692     // DAG to CSE everything and decide at isel.
693     PCMPISTR,
694     PCMPESTR,
695 
696     // Test if in transactional execution.
697     XTEST,
698 
699     // ERI instructions.
700     RSQRT28,
701     RSQRT28_SAE,
702     RSQRT28S,
703     RSQRT28S_SAE,
704     RCP28,
705     RCP28_SAE,
706     RCP28S,
707     RCP28S_SAE,
708     EXP2,
709     EXP2_SAE,
710 
711     // Conversions between float and half-float.
712     CVTPS2PH,
713     CVTPH2PS,
714     CVTPH2PS_SAE,
715 
716     // Masked version of above.
717     // SRC, RND, PASSTHRU, MASK
718     MCVTPS2PH,
719 
720     // Galois Field Arithmetic Instructions
721     GF2P8AFFINEINVQB,
722     GF2P8AFFINEQB,
723     GF2P8MULB,
724 
725     // LWP insert record.
726     LWPINS,
727 
728     // User level wait
729     UMWAIT,
730     TPAUSE,
731 
732     // Enqueue Stores Instructions
733     ENQCMD,
734     ENQCMDS,
735 
736     // For avx512-vp2intersect
737     VP2INTERSECT,
738 
739     // User level interrupts - testui
740     TESTUI,
741 
742     /// X86 strict FP compare instructions.
743     STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
744     STRICT_FCMPS,
745 
746     // Vector packed double/float comparison.
747     STRICT_CMPP,
748 
749     /// Vector comparison generating mask bits for fp and
750     /// integer signed and unsigned data types.
751     STRICT_CMPM,
752 
753     // Vector float/double to signed/unsigned integer with truncation.
754     STRICT_CVTTP2SI,
755     STRICT_CVTTP2UI,
756 
757     // Vector FP extend.
758     STRICT_VFPEXT,
759 
760     // Vector FP round.
761     STRICT_VFPROUND,
762 
763     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
764     // Also used by the legacy (V)ROUND intrinsics where we mask out the
765     // scaling part of the immediate.
766     STRICT_VRNDSCALE,
767 
768     // Vector signed/unsigned integer to float/double.
769     STRICT_CVTSI2P,
770     STRICT_CVTUI2P,
771 
772     // Strict FMA nodes.
773     STRICT_FNMADD,
774     STRICT_FMSUB,
775     STRICT_FNMSUB,
776 
777     // Conversions between float and half-float.
778     STRICT_CVTPS2PH,
779     STRICT_CVTPH2PS,
780 
781     // WARNING: Only add nodes here if they are stric FP nodes. Non-memory and
782     // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
783 
784     // Compare and swap.
785     LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
786     LCMPXCHG8_DAG,
787     LCMPXCHG16_DAG,
788     LCMPXCHG16_SAVE_RBX_DAG,
789 
790     /// LOCK-prefixed arithmetic read-modify-write instructions.
791     /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
792     LADD,
793     LSUB,
794     LOR,
795     LXOR,
796     LAND,
797 
798     // Load, scalar_to_vector, and zero extend.
799     VZEXT_LOAD,
800 
801     // extract_vector_elt, store.
802     VEXTRACT_STORE,
803 
804     // scalar broadcast from memory.
805     VBROADCAST_LOAD,
806 
807     // subvector broadcast from memory.
808     SUBV_BROADCAST_LOAD,
809 
810     // Store FP control word into i16 memory.
811     FNSTCW16m,
812 
813     // Load FP control word from i16 memory.
814     FLDCW16m,
815 
816     /// This instruction implements FP_TO_SINT with the
817     /// integer destination in memory and a FP reg source.  This corresponds
818     /// to the X86::FIST*m instructions and the rounding mode change stuff. It
819     /// has two inputs (token chain and address) and two outputs (int value
820     /// and token chain). Memory VT specifies the type to store to.
821     FP_TO_INT_IN_MEM,
822 
823     /// This instruction implements SINT_TO_FP with the
824     /// integer source in memory and FP reg result.  This corresponds to the
825     /// X86::FILD*m instructions. It has two inputs (token chain and address)
826     /// and two outputs (FP value and token chain). The integer source type is
827     /// specified by the memory VT.
828     FILD,
829 
830     /// This instruction implements a fp->int store from FP stack
831     /// slots. This corresponds to the fist instruction. It takes a
832     /// chain operand, value to store, address, and glue. The memory VT
833     /// specifies the type to store as.
834     FIST,
835 
836     /// This instruction implements an extending load to FP stack slots.
837     /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
838     /// operand, and ptr to load from. The memory VT specifies the type to
839     /// load from.
840     FLD,
841 
842     /// This instruction implements a truncating store from FP stack
843     /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
844     /// chain operand, value to store, address, and glue. The memory VT
845     /// specifies the type to store as.
846     FST,
847 
848     /// These instructions grab the address of the next argument
849     /// from a va_list. (reads and modifies the va_list in memory)
850     VAARG_64,
851     VAARG_X32,
852 
853     // Vector truncating store with unsigned/signed saturation
854     VTRUNCSTOREUS,
855     VTRUNCSTORES,
856     // Vector truncating masked store with unsigned/signed saturation
857     VMTRUNCSTOREUS,
858     VMTRUNCSTORES,
859 
860     // X86 specific gather and scatter
861     MGATHER,
862     MSCATTER,
863 
864     // Key locker nodes that produce flags.
865     AESENC128KL,
866     AESDEC128KL,
867     AESENC256KL,
868     AESDEC256KL,
869     AESENCWIDE128KL,
870     AESDECWIDE128KL,
871     AESENCWIDE256KL,
872     AESDECWIDE256KL,
873 
874     // WARNING: Do not add anything in the end unless you want the node to
875     // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
876     // opcodes will be thought as target memory ops!
877   };
878   } // end namespace X86ISD
879 
880   namespace X86 {
881     /// Current rounding mode is represented in bits 11:10 of FPSR. These
882     /// values are same as corresponding constants for rounding mode used
883     /// in glibc.
884     enum RoundingMode {
885       rmToNearest   = 0,        // FE_TONEAREST
886       rmDownward    = 1 << 10,  // FE_DOWNWARD
887       rmUpward      = 2 << 10,  // FE_UPWARD
888       rmTowardZero  = 3 << 10,  // FE_TOWARDZERO
889       rmMask        = 3 << 10   // Bit mask selecting rounding mode
890     };
891   }
892 
893   /// Define some predicates that are used for node matching.
894   namespace X86 {
895     /// Returns true if Elt is a constant zero or floating point constant +0.0.
896     bool isZeroNode(SDValue Elt);
897 
898     /// Returns true of the given offset can be
899     /// fit into displacement field of the instruction.
900     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
901                                       bool hasSymbolicDisplacement);
902 
903     /// Determines whether the callee is required to pop its
904     /// own arguments. Callee pop is necessary to support tail calls.
905     bool isCalleePop(CallingConv::ID CallingConv,
906                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
907 
908     /// If Op is a constant whose elements are all the same constant or
909     /// undefined, return true and return the constant value in \p SplatVal.
910     /// If we have undef bits that don't cover an entire element, we treat these
911     /// as zero if AllowPartialUndefs is set, else we fail and return false.
912     bool isConstantSplat(SDValue Op, APInt &SplatVal,
913                          bool AllowPartialUndefs = true);
914   } // end namespace X86
915 
916   //===--------------------------------------------------------------------===//
917   //  X86 Implementation of the TargetLowering interface
918   class X86TargetLowering final : public TargetLowering {
919   public:
920     explicit X86TargetLowering(const X86TargetMachine &TM,
921                                const X86Subtarget &STI);
922 
923     unsigned getJumpTableEncoding() const override;
924     bool useSoftFloat() const override;
925 
926     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
927                                ArgListTy &Args) const override;
928 
getScalarShiftAmountTy(const DataLayout &,EVT VT)929     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
930       return MVT::i8;
931     }
932 
933     const MCExpr *
934     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
935                               const MachineBasicBlock *MBB, unsigned uid,
936                               MCContext &Ctx) const override;
937 
938     /// Returns relocation base for the given PIC jumptable.
939     SDValue getPICJumpTableRelocBase(SDValue Table,
940                                      SelectionDAG &DAG) const override;
941     const MCExpr *
942     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
943                                  unsigned JTI, MCContext &Ctx) const override;
944 
945     /// Return the desired alignment for ByVal aggregate
946     /// function arguments in the caller parameter area. For X86, aggregates
947     /// that contains are placed at 16-byte boundaries while the rest are at
948     /// 4-byte boundaries.
949     uint64_t getByValTypeAlignment(Type *Ty,
950                                    const DataLayout &DL) const override;
951 
952     EVT getOptimalMemOpType(const MemOp &Op,
953                             const AttributeList &FuncAttributes) const override;
954 
955     /// Returns true if it's safe to use load / store of the
956     /// specified type to expand memcpy / memset inline. This is mostly true
957     /// for all types except for some special cases. For example, on X86
958     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
959     /// also does type conversion. Note the specified type doesn't have to be
960     /// legal as the hook is used before type legalization.
961     bool isSafeMemOpType(MVT VT) const override;
962 
963     /// Returns true if the target allows unaligned memory accesses of the
964     /// specified type. Returns whether it is "fast" in the last argument.
965     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
966                                         MachineMemOperand::Flags Flags,
967                                         bool *Fast) const override;
968 
969     /// Provide custom lowering hooks for some operations.
970     ///
971     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
972 
973     /// Replace the results of node with an illegal result
974     /// type with new values built out of custom code.
975     ///
976     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
977                             SelectionDAG &DAG) const override;
978 
979     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
980 
981     /// Return true if the target has native support for
982     /// the specified value type and it is 'desirable' to use the type for the
983     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
984     /// instruction encodings are longer and some i16 instructions are slow.
985     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
986 
987     /// Return true if the target has native support for the
988     /// specified value type and it is 'desirable' to use the type. e.g. On x86
989     /// i16 is legal, but undesirable since i16 instruction encodings are longer
990     /// and some i16 instructions are slow.
991     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
992 
993     /// Return the newly negated expression if the cost is not expensive and
994     /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
995     /// do the negation.
996     SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
997                                  bool LegalOperations, bool ForCodeSize,
998                                  NegatibleCost &Cost,
999                                  unsigned Depth) const override;
1000 
1001     MachineBasicBlock *
1002     EmitInstrWithCustomInserter(MachineInstr &MI,
1003                                 MachineBasicBlock *MBB) const override;
1004 
1005     /// This method returns the name of a target specific DAG node.
1006     const char *getTargetNodeName(unsigned Opcode) const override;
1007 
1008     /// Do not merge vector stores after legalization because that may conflict
1009     /// with x86-specific store splitting optimizations.
mergeStoresAfterLegalization(EVT MemVT)1010     bool mergeStoresAfterLegalization(EVT MemVT) const override {
1011       return !MemVT.isVector();
1012     }
1013 
1014     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
1015                           const MachineFunction &MF) const override;
1016 
1017     bool isCheapToSpeculateCttz() const override;
1018 
1019     bool isCheapToSpeculateCtlz() const override;
1020 
1021     bool isCtlzFast() const override;
1022 
hasBitPreservingFPLogic(EVT VT)1023     bool hasBitPreservingFPLogic(EVT VT) const override {
1024       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector() ||
1025              (VT == MVT::f16 && X86ScalarSSEf16);
1026     }
1027 
isMultiStoresCheaperThanBitsMerge(EVT LTy,EVT HTy)1028     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1029       // If the pair to store is a mixture of float and int values, we will
1030       // save two bitwise instructions and one float-to-int instruction and
1031       // increase one store instruction. There is potentially a more
1032       // significant benefit because it avoids the float->int domain switch
1033       // for input value. So It is more likely a win.
1034       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
1035           (LTy.isInteger() && HTy.isFloatingPoint()))
1036         return true;
1037       // If the pair only contains int values, we will save two bitwise
1038       // instructions and increase one store instruction (costing one more
1039       // store buffer). Since the benefit is more blurred so we leave
1040       // such pair out until we get testcase to prove it is a win.
1041       return false;
1042     }
1043 
1044     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1045 
1046     bool hasAndNotCompare(SDValue Y) const override;
1047 
1048     bool hasAndNot(SDValue Y) const override;
1049 
1050     bool hasBitTest(SDValue X, SDValue Y) const override;
1051 
1052     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1053         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1054         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1055         SelectionDAG &DAG) const override;
1056 
1057     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1058                                            CombineLevel Level) const override;
1059 
1060     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1061 
1062     bool
shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)1063     shouldTransformSignedTruncationCheck(EVT XVT,
1064                                          unsigned KeptBits) const override {
1065       // For vectors, we don't have a preference..
1066       if (XVT.isVector())
1067         return false;
1068 
1069       auto VTIsOk = [](EVT VT) -> bool {
1070         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1071                VT == MVT::i64;
1072       };
1073 
1074       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1075       // XVT will be larger than KeptBitsVT.
1076       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
1077       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1078     }
1079 
1080     bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
1081 
1082     bool shouldSplatInsEltVarIndex(EVT VT) const override;
1083 
convertSetCCLogicToBitwiseLogic(EVT VT)1084     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1085       return VT.isScalarInteger();
1086     }
1087 
1088     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1089     MVT hasFastEqualityCompare(unsigned NumBits) const override;
1090 
1091     /// Return the value type to use for ISD::SETCC.
1092     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1093                            EVT VT) const override;
1094 
1095     bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1096                                       const APInt &DemandedElts,
1097                                       TargetLoweringOpt &TLO) const override;
1098 
1099     /// Determine which of the bits specified in Mask are known to be either
1100     /// zero or one and return them in the KnownZero/KnownOne bitsets.
1101     void computeKnownBitsForTargetNode(const SDValue Op,
1102                                        KnownBits &Known,
1103                                        const APInt &DemandedElts,
1104                                        const SelectionDAG &DAG,
1105                                        unsigned Depth = 0) const override;
1106 
1107     /// Determine the number of bits in the operation that are sign bits.
1108     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1109                                              const APInt &DemandedElts,
1110                                              const SelectionDAG &DAG,
1111                                              unsigned Depth) const override;
1112 
1113     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1114                                                  const APInt &DemandedElts,
1115                                                  APInt &KnownUndef,
1116                                                  APInt &KnownZero,
1117                                                  TargetLoweringOpt &TLO,
1118                                                  unsigned Depth) const override;
1119 
1120     bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1121                                                     const APInt &DemandedElts,
1122                                                     unsigned MaskIndex,
1123                                                     TargetLoweringOpt &TLO,
1124                                                     unsigned Depth) const;
1125 
1126     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1127                                            const APInt &DemandedBits,
1128                                            const APInt &DemandedElts,
1129                                            KnownBits &Known,
1130                                            TargetLoweringOpt &TLO,
1131                                            unsigned Depth) const override;
1132 
1133     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1134         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1135         SelectionDAG &DAG, unsigned Depth) const override;
1136 
1137     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1138 
1139     SDValue unwrapAddress(SDValue N) const override;
1140 
1141     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1142 
1143     bool ExpandInlineAsm(CallInst *CI) const override;
1144 
1145     ConstraintType getConstraintType(StringRef Constraint) const override;
1146 
1147     /// Examine constraint string and operand type and determine a weight value.
1148     /// The operand object must already have been set up with the operand type.
1149     ConstraintWeight
1150       getSingleConstraintMatchWeight(AsmOperandInfo &info,
1151                                      const char *constraint) const override;
1152 
1153     const char *LowerXConstraint(EVT ConstraintVT) const override;
1154 
1155     /// Lower the specified operand into the Ops vector. If it is invalid, don't
1156     /// add anything to Ops. If hasMemory is true it means one of the asm
1157     /// constraint of the inline asm instruction being processed is 'm'.
1158     void LowerAsmOperandForConstraint(SDValue Op,
1159                                       std::string &Constraint,
1160                                       std::vector<SDValue> &Ops,
1161                                       SelectionDAG &DAG) const override;
1162 
1163     unsigned
getInlineAsmMemConstraint(StringRef ConstraintCode)1164     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1165       if (ConstraintCode == "v")
1166         return InlineAsm::Constraint_v;
1167       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1168     }
1169 
1170     /// Handle Lowering flag assembly outputs.
1171     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1172                                         const SDLoc &DL,
1173                                         const AsmOperandInfo &Constraint,
1174                                         SelectionDAG &DAG) const override;
1175 
1176     /// Given a physical register constraint
1177     /// (e.g. {edx}), return the register number and the register class for the
1178     /// register.  This should only be used for C_Register constraints.  On
1179     /// error, this returns a register number of 0.
1180     std::pair<unsigned, const TargetRegisterClass *>
1181     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1182                                  StringRef Constraint, MVT VT) const override;
1183 
1184     /// Return true if the addressing mode represented
1185     /// by AM is legal for this target, for a load/store of the specified type.
1186     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1187                                Type *Ty, unsigned AS,
1188                                Instruction *I = nullptr) const override;
1189 
1190     /// Return true if the specified immediate is legal
1191     /// icmp immediate, that is the target has icmp instructions which can
1192     /// compare a register against the immediate without having to materialize
1193     /// the immediate into a register.
1194     bool isLegalICmpImmediate(int64_t Imm) const override;
1195 
1196     /// Return true if the specified immediate is legal
1197     /// add immediate, that is the target has add instructions which can
1198     /// add a register and the immediate without having to materialize
1199     /// the immediate into a register.
1200     bool isLegalAddImmediate(int64_t Imm) const override;
1201 
1202     bool isLegalStoreImmediate(int64_t Imm) const override;
1203 
1204     /// Return the cost of the scaling factor used in the addressing
1205     /// mode represented by AM for this target, for a load/store
1206     /// of the specified type.
1207     /// If the AM is supported, the return value must be >= 0.
1208     /// If the AM is not supported, it returns a negative value.
1209     InstructionCost getScalingFactorCost(const DataLayout &DL,
1210                                          const AddrMode &AM, Type *Ty,
1211                                          unsigned AS) const override;
1212 
1213     /// This is used to enable splatted operand transforms for vector shifts
1214     /// and vector funnel shifts.
1215     bool isVectorShiftByScalarCheap(Type *Ty) const override;
1216 
1217     /// Add x86-specific opcodes to the default list.
1218     bool isBinOp(unsigned Opcode) const override;
1219 
1220     /// Returns true if the opcode is a commutative binary operation.
1221     bool isCommutativeBinOp(unsigned Opcode) const override;
1222 
1223     /// Return true if it's free to truncate a value of
1224     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1225     /// register EAX to i16 by referencing its sub-register AX.
1226     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1227     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1228 
1229     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1230 
1231     /// Return true if any actual instruction that defines a
1232     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1233     /// register. This does not necessarily include registers defined in
1234     /// unknown ways, such as incoming arguments, or copies from unknown
1235     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1236     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1237     /// all instructions that define 32-bit values implicit zero-extend the
1238     /// result out to 64 bits.
1239     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1240     bool isZExtFree(EVT VT1, EVT VT2) const override;
1241     bool isZExtFree(SDValue Val, EVT VT2) const override;
1242 
1243     bool shouldSinkOperands(Instruction *I,
1244                             SmallVectorImpl<Use *> &Ops) const override;
1245     bool shouldConvertPhiType(Type *From, Type *To) const override;
1246 
1247     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1248     /// extend node) is profitable.
1249     bool isVectorLoadExtDesirable(SDValue) const override;
1250 
1251     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1252     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1253     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1254     bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1255                                     EVT VT) const override;
1256 
1257     /// Return true if it's profitable to narrow
1258     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1259     /// from i32 to i8 but not from i32 to i16.
1260     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1261 
1262     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1263     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1264     /// true and stores the intrinsic information into the IntrinsicInfo that was
1265     /// passed to the function.
1266     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1267                             MachineFunction &MF,
1268                             unsigned Intrinsic) const override;
1269 
1270     /// Returns true if the target can instruction select the
1271     /// specified FP immediate natively. If false, the legalizer will
1272     /// materialize the FP immediate as a load from a constant pool.
1273     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1274                       bool ForCodeSize) const override;
1275 
1276     /// Targets can use this to indicate that they only support *some*
1277     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1278     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1279     /// be legal.
1280     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1281 
1282     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1283     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1284     /// constant pool entry.
1285     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1286 
1287     /// Returns true if lowering to a jump table is allowed.
1288     bool areJTsAllowed(const Function *Fn) const override;
1289 
1290     /// If true, then instruction selection should
1291     /// seek to shrink the FP constant of the specified type to a smaller type
1292     /// in order to save space and / or reduce runtime.
ShouldShrinkFPConstant(EVT VT)1293     bool ShouldShrinkFPConstant(EVT VT) const override {
1294       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1295       // expensive than a straight movsd. On the other hand, it's important to
1296       // shrink long double fp constant since fldt is very slow.
1297       return !X86ScalarSSEf64 || VT == MVT::f80;
1298     }
1299 
1300     /// Return true if we believe it is correct and profitable to reduce the
1301     /// load node to a smaller type.
1302     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1303                                EVT NewVT) const override;
1304 
1305     /// Return true if the specified scalar FP type is computed in an SSE
1306     /// register, not on the X87 floating point stack.
isScalarFPTypeInSSEReg(EVT VT)1307     bool isScalarFPTypeInSSEReg(EVT VT) const {
1308       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1309              (VT == MVT::f32 && X86ScalarSSEf32) || // f32 is when SSE1
1310              (VT == MVT::f16 && X86ScalarSSEf16);   // f16 is when AVX512FP16
1311     }
1312 
1313     /// Returns true if it is beneficial to convert a load of a constant
1314     /// to just the constant itself.
1315     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1316                                            Type *Ty) const override;
1317 
1318     bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1319 
1320     bool convertSelectOfConstantsToMath(EVT VT) const override;
1321 
1322     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1323                                 SDValue C) const override;
1324 
1325     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1326     /// with this index.
1327     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1328                                  unsigned Index) const override;
1329 
1330     /// Scalar ops always have equal or better analysis/performance/power than
1331     /// the vector equivalent, so this always makes sense if the scalar op is
1332     /// supported.
shouldScalarizeBinop(SDValue)1333     bool shouldScalarizeBinop(SDValue) const override;
1334 
1335     /// Extract of a scalar FP value from index 0 of a vector is free.
1336     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1337       EVT EltVT = VT.getScalarType();
1338       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1339     }
1340 
1341     /// Overflow nodes should get combined/lowered to optimal instructions
1342     /// (they should allow eliminating explicit compares by getting flags from
1343     /// math ops).
1344     bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1345                               bool MathUsed) const override;
1346 
storeOfVectorConstantIsCheap(EVT MemVT,unsigned NumElem,unsigned AddrSpace)1347     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1348                                       unsigned AddrSpace) const override {
1349       // If we can replace more than 2 scalar stores, there will be a reduction
1350       // in instructions even after we add a vector constant load.
1351       return NumElem > 2;
1352     }
1353 
1354     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1355                                  const SelectionDAG &DAG,
1356                                  const MachineMemOperand &MMO) const override;
1357 
1358     /// Intel processors have a unified instruction and data cache
getClearCacheBuiltinName()1359     const char * getClearCacheBuiltinName() const override {
1360       return nullptr; // nothing to do, move along.
1361     }
1362 
1363     Register getRegisterByName(const char* RegName, LLT VT,
1364                                const MachineFunction &MF) const override;
1365 
1366     /// If a physical register, this returns the register that receives the
1367     /// exception address on entry to an EH pad.
1368     Register
1369     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1370 
1371     /// If a physical register, this returns the register that receives the
1372     /// exception typeid on entry to a landing pad.
1373     Register
1374     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1375 
1376     virtual bool needsFixedCatchObjects() const override;
1377 
1378     /// This method returns a target specific FastISel object,
1379     /// or null if the target does not support "fast" ISel.
1380     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1381                              const TargetLibraryInfo *libInfo) const override;
1382 
1383     /// If the target has a standard location for the stack protector cookie,
1384     /// returns the address of that location. Otherwise, returns nullptr.
1385     Value *getIRStackGuard(IRBuilderBase &IRB) const override;
1386 
1387     bool useLoadStackGuardNode() const override;
1388     bool useStackGuardXorFP() const override;
1389     void insertSSPDeclarations(Module &M) const override;
1390     Value *getSDagStackGuard(const Module &M) const override;
1391     Function *getSSPStackGuardCheck(const Module &M) const override;
1392     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1393                                 const SDLoc &DL) const override;
1394 
1395 
1396     /// Return true if the target stores SafeStack pointer at a fixed offset in
1397     /// some non-standard address space, and populates the address space and
1398     /// offset as appropriate.
1399     Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
1400 
1401     std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1402                                           SDValue Chain, SDValue Pointer,
1403                                           MachinePointerInfo PtrInfo,
1404                                           Align Alignment,
1405                                           SelectionDAG &DAG) const;
1406 
1407     /// Customize the preferred legalization strategy for certain types.
1408     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1409 
softPromoteHalfType()1410     bool softPromoteHalfType() const override { return true; }
1411 
1412     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1413                                       EVT VT) const override;
1414 
1415     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1416                                            CallingConv::ID CC,
1417                                            EVT VT) const override;
1418 
1419     unsigned getVectorTypeBreakdownForCallingConv(
1420         LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1421         unsigned &NumIntermediates, MVT &RegisterVT) const override;
1422 
1423     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1424 
1425     bool supportSwiftError() const override;
1426 
1427     bool hasStackProbeSymbol(MachineFunction &MF) const override;
1428     bool hasInlineStackProbe(MachineFunction &MF) const override;
1429     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1430 
1431     unsigned getStackProbeSize(MachineFunction &MF) const;
1432 
hasVectorBlend()1433     bool hasVectorBlend() const override { return true; }
1434 
getMaxSupportedInterleaveFactor()1435     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1436 
1437     /// Lower interleaved load(s) into target specific
1438     /// instructions/intrinsics.
1439     bool lowerInterleavedLoad(LoadInst *LI,
1440                               ArrayRef<ShuffleVectorInst *> Shuffles,
1441                               ArrayRef<unsigned> Indices,
1442                               unsigned Factor) const override;
1443 
1444     /// Lower interleaved store(s) into target specific
1445     /// instructions/intrinsics.
1446     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1447                                unsigned Factor) const override;
1448 
1449     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1450                                    SDValue Addr, SelectionDAG &DAG)
1451                                    const override;
1452 
1453     Align getPrefLoopAlignment(MachineLoop *ML) const override;
1454 
1455   protected:
1456     std::pair<const TargetRegisterClass *, uint8_t>
1457     findRepresentativeClass(const TargetRegisterInfo *TRI,
1458                             MVT VT) const override;
1459 
1460   private:
1461     /// Keep a reference to the X86Subtarget around so that we can
1462     /// make the right decision when generating code for different targets.
1463     const X86Subtarget &Subtarget;
1464 
1465     /// Select between SSE or x87 floating point ops.
1466     /// When SSE is available, use it for f32 operations.
1467     /// When SSE2 is available, use it for f64 operations.
1468     bool X86ScalarSSEf32;
1469     bool X86ScalarSSEf64;
1470     bool X86ScalarSSEf16;
1471 
1472     /// A list of legal FP immediates.
1473     std::vector<APFloat> LegalFPImmediates;
1474 
1475     /// Indicate that this x86 target can instruction
1476     /// select the specified FP immediate natively.
addLegalFPImmediate(const APFloat & Imm)1477     void addLegalFPImmediate(const APFloat& Imm) {
1478       LegalFPImmediates.push_back(Imm);
1479     }
1480 
1481     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1482                             CallingConv::ID CallConv, bool isVarArg,
1483                             const SmallVectorImpl<ISD::InputArg> &Ins,
1484                             const SDLoc &dl, SelectionDAG &DAG,
1485                             SmallVectorImpl<SDValue> &InVals,
1486                             uint32_t *RegMask) const;
1487     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1488                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1489                              const SDLoc &dl, SelectionDAG &DAG,
1490                              const CCValAssign &VA, MachineFrameInfo &MFI,
1491                              unsigned i) const;
1492     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1493                              const SDLoc &dl, SelectionDAG &DAG,
1494                              const CCValAssign &VA,
1495                              ISD::ArgFlagsTy Flags, bool isByval) const;
1496 
1497     // Call lowering helpers.
1498 
1499     /// Check whether the call is eligible for tail call optimization. Targets
1500     /// that want to do tail call optimization should implement this function.
1501     bool IsEligibleForTailCallOptimization(
1502         SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleeStackStructRet,
1503         bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
1504         const SmallVectorImpl<SDValue> &OutVals,
1505         const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
1506     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1507                                     SDValue Chain, bool IsTailCall,
1508                                     bool Is64Bit, int FPDiff,
1509                                     const SDLoc &dl) const;
1510 
1511     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1512                                          SelectionDAG &DAG) const;
1513 
1514     unsigned getAddressSpace(void) const;
1515 
1516     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1517                             SDValue &Chain) const;
1518     SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1519 
1520     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1521     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1522     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1523     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1524 
1525     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1526                                   const unsigned char OpFlags = 0) const;
1527     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1528     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1529     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1530     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1531     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1532 
1533     /// Creates target global address or external symbol nodes for calls or
1534     /// other uses.
1535     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1536                                   bool ForCall) const;
1537 
1538     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1539     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1540     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1541     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1542     SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1543     SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1544     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1545     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1546     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1547     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1548     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1549     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1550     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1551     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1552     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1553     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1554     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1555     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1556     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1557     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1558     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1559     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1560     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1561     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1562     SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1563     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1564     SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
1565                                     SDValue &Chain) const;
1566     SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1567     SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1568     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1569     SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1570     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1571     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1572 
1573     SDValue
1574     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1575                          const SmallVectorImpl<ISD::InputArg> &Ins,
1576                          const SDLoc &dl, SelectionDAG &DAG,
1577                          SmallVectorImpl<SDValue> &InVals) const override;
1578     SDValue LowerCall(CallLoweringInfo &CLI,
1579                       SmallVectorImpl<SDValue> &InVals) const override;
1580 
1581     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1582                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1583                         const SmallVectorImpl<SDValue> &OutVals,
1584                         const SDLoc &dl, SelectionDAG &DAG) const override;
1585 
supportSplitCSR(MachineFunction * MF)1586     bool supportSplitCSR(MachineFunction *MF) const override {
1587       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1588           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1589     }
1590     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1591     void insertCopiesSplitCSR(
1592       MachineBasicBlock *Entry,
1593       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1594 
1595     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1596 
1597     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1598 
1599     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1600                             ISD::NodeType ExtendKind) const override;
1601 
1602     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1603                         bool isVarArg,
1604                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1605                         LLVMContext &Context) const override;
1606 
1607     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1608 
1609     TargetLoweringBase::AtomicExpansionKind
1610     shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1611     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1612     TargetLoweringBase::AtomicExpansionKind
1613     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1614 
1615     LoadInst *
1616     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1617 
1618     bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
1619     bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;
1620 
1621     bool needsCmpXchgNb(Type *MemType) const;
1622 
1623     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1624                                 MachineBasicBlock *DispatchBB, int FI) const;
1625 
1626     // Utility function to emit the low-level va_arg code for X86-64.
1627     MachineBasicBlock *
1628     EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
1629 
1630     /// Utility function to emit the xmm reg save portion of va_start.
1631     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1632                                                  MachineInstr &MI2,
1633                                                  MachineBasicBlock *BB) const;
1634 
1635     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1636                                          MachineBasicBlock *BB) const;
1637 
1638     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1639                                            MachineBasicBlock *BB) const;
1640 
1641     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1642                                             MachineBasicBlock *BB) const;
1643 
1644     MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1645                                                MachineBasicBlock *BB) const;
1646 
1647     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1648                                           MachineBasicBlock *BB) const;
1649 
1650     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1651                                           MachineBasicBlock *BB) const;
1652 
1653     MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1654                                                 MachineBasicBlock *BB) const;
1655 
1656     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1657                                         MachineBasicBlock *MBB) const;
1658 
1659     void emitSetJmpShadowStackFix(MachineInstr &MI,
1660                                   MachineBasicBlock *MBB) const;
1661 
1662     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1663                                          MachineBasicBlock *MBB) const;
1664 
1665     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1666                                                  MachineBasicBlock *MBB) const;
1667 
1668     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1669                                              MachineBasicBlock *MBB) const;
1670 
1671     /// Emit flags for the given setcc condition and operands. Also returns the
1672     /// corresponding X86 condition code constant in X86CC.
1673     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1674                               const SDLoc &dl, SelectionDAG &DAG,
1675                               SDValue &X86CC) const;
1676 
1677     /// Check if replacement of SQRT with RSQRT should be disabled.
1678     bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1679 
1680     /// Use rsqrt* to speed up sqrt calculations.
1681     SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1682                             int &RefinementSteps, bool &UseOneConstNR,
1683                             bool Reciprocal) const override;
1684 
1685     /// Use rcp* to speed up fdiv calculations.
1686     SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1687                              int &RefinementSteps) const override;
1688 
1689     /// Reassociate floating point divisions into multiply by reciprocal.
1690     unsigned combineRepeatedFPDivisors() const override;
1691 
1692     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1693                           SmallVectorImpl<SDNode *> &Created) const override;
1694   };
1695 
1696   namespace X86 {
1697     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1698                              const TargetLibraryInfo *libInfo);
1699   } // end namespace X86
1700 
1701   // X86 specific Gather/Scatter nodes.
1702   // The class has the same order of operands as MaskedGatherScatterSDNode for
1703   // convenience.
1704   class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1705   public:
1706     // This is a intended as a utility and should never be directly created.
1707     X86MaskedGatherScatterSDNode() = delete;
1708     ~X86MaskedGatherScatterSDNode() = delete;
1709 
getBasePtr()1710     const SDValue &getBasePtr() const { return getOperand(3); }
getIndex()1711     const SDValue &getIndex()   const { return getOperand(4); }
getMask()1712     const SDValue &getMask()    const { return getOperand(2); }
getScale()1713     const SDValue &getScale()   const { return getOperand(5); }
1714 
classof(const SDNode * N)1715     static bool classof(const SDNode *N) {
1716       return N->getOpcode() == X86ISD::MGATHER ||
1717              N->getOpcode() == X86ISD::MSCATTER;
1718     }
1719   };
1720 
1721   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1722   public:
getPassThru()1723     const SDValue &getPassThru() const { return getOperand(1); }
1724 
classof(const SDNode * N)1725     static bool classof(const SDNode *N) {
1726       return N->getOpcode() == X86ISD::MGATHER;
1727     }
1728   };
1729 
1730   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1731   public:
getValue()1732     const SDValue &getValue() const { return getOperand(1); }
1733 
classof(const SDNode * N)1734     static bool classof(const SDNode *N) {
1735       return N->getOpcode() == X86ISD::MSCATTER;
1736     }
1737   };
1738 
1739   /// Generate unpacklo/unpackhi shuffle mask.
1740   void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1741                                bool Unary);
1742 
1743   /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1744   /// imposed by AVX and specific to the unary pattern. Example:
1745   /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1746   /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1747   void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1748 
1749 } // end namespace llvm
1750 
1751 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1752