1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that X86 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16 
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/TargetLowering.h"
19 
20 namespace llvm {
21   class X86Subtarget;
22   class X86TargetMachine;
23 
24   namespace X86ISD {
25     // X86 Specific DAG Nodes
26   enum NodeType : unsigned {
27     // Start the numbering where the builtin ops leave off.
28     FIRST_NUMBER = ISD::BUILTIN_OP_END,
29 
30     /// Bit scan forward.
31     BSF,
32     /// Bit scan reverse.
33     BSR,
34 
35     /// X86 funnel/double shift i16 instructions. These correspond to
36     /// X86::SHLDW and X86::SHRDW instructions which have different amt
37     /// modulo rules to generic funnel shifts.
38     /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
39     FSHL,
40     FSHR,
41 
42     /// Bitwise logical AND of floating point values. This corresponds
43     /// to X86::ANDPS or X86::ANDPD.
44     FAND,
45 
46     /// Bitwise logical OR of floating point values. This corresponds
47     /// to X86::ORPS or X86::ORPD.
48     FOR,
49 
50     /// Bitwise logical XOR of floating point values. This corresponds
51     /// to X86::XORPS or X86::XORPD.
52     FXOR,
53 
54     ///  Bitwise logical ANDNOT of floating point values. This
55     /// corresponds to X86::ANDNPS or X86::ANDNPD.
56     FANDN,
57 
58     /// These operations represent an abstract X86 call
59     /// instruction, which includes a bunch of information.  In particular the
60     /// operands of these node are:
61     ///
62     ///     #0 - The incoming token chain
63     ///     #1 - The callee
64     ///     #2 - The number of arg bytes the caller pushes on the stack.
65     ///     #3 - The number of arg bytes the callee pops off the stack.
66     ///     #4 - The value to pass in AL/AX/EAX (optional)
67     ///     #5 - The value to pass in DL/DX/EDX (optional)
68     ///
69     /// The result values of these nodes are:
70     ///
71     ///     #0 - The outgoing token chain
72     ///     #1 - The first register result value (optional)
73     ///     #2 - The second register result value (optional)
74     ///
75     CALL,
76 
77     /// Same as call except it adds the NoTrack prefix.
78     NT_CALL,
79 
80     // Pseudo for a OBJC call that gets emitted together with a special
81     // marker instruction.
82     CALL_RVMARKER,
83 
84     /// X86 compare and logical compare instructions.
85     CMP,
86     FCMP,
87     COMI,
88     UCOMI,
89 
90     /// X86 bit-test instructions.
91     BT,
92 
93     /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
94     /// operand, usually produced by a CMP instruction.
95     SETCC,
96 
97     /// X86 Select
98     SELECTS,
99 
100     // Same as SETCC except it's materialized with a sbb and the value is all
101     // one's or all zero's.
102     SETCC_CARRY, // R = carry_bit ? ~0 : 0
103 
104     /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
105     /// Operands are two FP values to compare; result is a mask of
106     /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
107     FSETCC,
108 
109     /// X86 FP SETCC, similar to above, but with output as an i1 mask and
110     /// and a version with SAE.
111     FSETCCM,
112     FSETCCM_SAE,
113 
114     /// X86 conditional moves. Operand 0 and operand 1 are the two values
115     /// to select from. Operand 2 is the condition code, and operand 3 is the
116     /// flag operand produced by a CMP or TEST instruction.
117     CMOV,
118 
119     /// X86 conditional branches. Operand 0 is the chain operand, operand 1
120     /// is the block to branch if condition is true, operand 2 is the
121     /// condition code, and operand 3 is the flag operand produced by a CMP
122     /// or TEST instruction.
123     BRCOND,
124 
125     /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
126     /// operand 1 is the target address.
127     NT_BRIND,
128 
129     /// Return with a flag operand. Operand 0 is the chain operand, operand
130     /// 1 is the number of bytes of stack to pop.
131     RET_FLAG,
132 
133     /// Return from interrupt. Operand 0 is the number of bytes to pop.
134     IRET,
135 
136     /// Repeat fill, corresponds to X86::REP_STOSx.
137     REP_STOS,
138 
139     /// Repeat move, corresponds to X86::REP_MOVSx.
140     REP_MOVS,
141 
142     /// On Darwin, this node represents the result of the popl
143     /// at function entry, used for PIC code.
144     GlobalBaseReg,
145 
146     /// A wrapper node for TargetConstantPool, TargetJumpTable,
147     /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
148     /// MCSymbol and TargetBlockAddress.
149     Wrapper,
150 
151     /// Special wrapper used under X86-64 PIC mode for RIP
152     /// relative displacements.
153     WrapperRIP,
154 
155     /// Copies a 64-bit value from an MMX vector to the low word
156     /// of an XMM vector, with the high word zero filled.
157     MOVQ2DQ,
158 
159     /// Copies a 64-bit value from the low word of an XMM vector
160     /// to an MMX vector.
161     MOVDQ2Q,
162 
163     /// Copies a 32-bit value from the low word of a MMX
164     /// vector to a GPR.
165     MMX_MOVD2W,
166 
167     /// Copies a GPR into the low 32-bit word of a MMX vector
168     /// and zero out the high word.
169     MMX_MOVW2D,
170 
171     /// Extract an 8-bit value from a vector and zero extend it to
172     /// i32, corresponds to X86::PEXTRB.
173     PEXTRB,
174 
175     /// Extract a 16-bit value from a vector and zero extend it to
176     /// i32, corresponds to X86::PEXTRW.
177     PEXTRW,
178 
179     /// Insert any element of a 4 x float vector into any element
180     /// of a destination 4 x floatvector.
181     INSERTPS,
182 
183     /// Insert the lower 8-bits of a 32-bit value to a vector,
184     /// corresponds to X86::PINSRB.
185     PINSRB,
186 
187     /// Insert the lower 16-bits of a 32-bit value to a vector,
188     /// corresponds to X86::PINSRW.
189     PINSRW,
190 
191     /// Shuffle 16 8-bit values within a vector.
192     PSHUFB,
193 
194     /// Compute Sum of Absolute Differences.
195     PSADBW,
196     /// Compute Double Block Packed Sum-Absolute-Differences
197     DBPSADBW,
198 
199     /// Bitwise Logical AND NOT of Packed FP values.
200     ANDNP,
201 
202     /// Blend where the selector is an immediate.
203     BLENDI,
204 
205     /// Dynamic (non-constant condition) vector blend where only the sign bits
206     /// of the condition elements are used. This is used to enforce that the
207     /// condition mask is not valid for generic VSELECT optimizations. This
208     /// is also used to implement the intrinsics.
209     /// Operands are in VSELECT order: MASK, TRUE, FALSE
210     BLENDV,
211 
212     /// Combined add and sub on an FP vector.
213     ADDSUB,
214 
215     //  FP vector ops with rounding mode.
216     FADD_RND,
217     FADDS,
218     FADDS_RND,
219     FSUB_RND,
220     FSUBS,
221     FSUBS_RND,
222     FMUL_RND,
223     FMULS,
224     FMULS_RND,
225     FDIV_RND,
226     FDIVS,
227     FDIVS_RND,
228     FMAX_SAE,
229     FMAXS_SAE,
230     FMIN_SAE,
231     FMINS_SAE,
232     FSQRT_RND,
233     FSQRTS,
234     FSQRTS_RND,
235 
236     // FP vector get exponent.
237     FGETEXP,
238     FGETEXP_SAE,
239     FGETEXPS,
240     FGETEXPS_SAE,
241     // Extract Normalized Mantissas.
242     VGETMANT,
243     VGETMANT_SAE,
244     VGETMANTS,
245     VGETMANTS_SAE,
246     // FP Scale.
247     SCALEF,
248     SCALEF_RND,
249     SCALEFS,
250     SCALEFS_RND,
251 
252     /// Integer horizontal add/sub.
253     HADD,
254     HSUB,
255 
256     /// Floating point horizontal add/sub.
257     FHADD,
258     FHSUB,
259 
260     // Detect Conflicts Within a Vector
261     CONFLICT,
262 
263     /// Floating point max and min.
264     FMAX,
265     FMIN,
266 
267     /// Commutative FMIN and FMAX.
268     FMAXC,
269     FMINC,
270 
271     /// Scalar intrinsic floating point max and min.
272     FMAXS,
273     FMINS,
274 
275     /// Floating point reciprocal-sqrt and reciprocal approximation.
276     /// Note that these typically require refinement
277     /// in order to obtain suitable precision.
278     FRSQRT,
279     FRCP,
280 
281     // AVX-512 reciprocal approximations with a little more precision.
282     RSQRT14,
283     RSQRT14S,
284     RCP14,
285     RCP14S,
286 
287     // Thread Local Storage.
288     TLSADDR,
289 
290     // Thread Local Storage. A call to get the start address
291     // of the TLS block for the current module.
292     TLSBASEADDR,
293 
294     // Thread Local Storage.  When calling to an OS provided
295     // thunk at the address from an earlier relocation.
296     TLSCALL,
297 
298     // Exception Handling helpers.
299     EH_RETURN,
300 
301     // SjLj exception handling setjmp.
302     EH_SJLJ_SETJMP,
303 
304     // SjLj exception handling longjmp.
305     EH_SJLJ_LONGJMP,
306 
307     // SjLj exception handling dispatch.
308     EH_SJLJ_SETUP_DISPATCH,
309 
310     /// Tail call return. See X86TargetLowering::LowerCall for
311     /// the list of operands.
312     TC_RETURN,
313 
314     // Vector move to low scalar and zero higher vector elements.
315     VZEXT_MOVL,
316 
317     // Vector integer truncate.
318     VTRUNC,
319     // Vector integer truncate with unsigned/signed saturation.
320     VTRUNCUS,
321     VTRUNCS,
322 
323     // Masked version of the above. Used when less than a 128-bit result is
324     // produced since the mask only applies to the lower elements and can't
325     // be represented by a select.
326     // SRC, PASSTHRU, MASK
327     VMTRUNC,
328     VMTRUNCUS,
329     VMTRUNCS,
330 
331     // Vector FP extend.
332     VFPEXT,
333     VFPEXT_SAE,
334     VFPEXTS,
335     VFPEXTS_SAE,
336 
337     // Vector FP round.
338     VFPROUND,
339     VFPROUND_RND,
340     VFPROUNDS,
341     VFPROUNDS_RND,
342 
343     // Masked version of above. Used for v2f64->v4f32.
344     // SRC, PASSTHRU, MASK
345     VMFPROUND,
346 
347     // 128-bit vector logical left / right shift
348     VSHLDQ,
349     VSRLDQ,
350 
351     // Vector shift elements
352     VSHL,
353     VSRL,
354     VSRA,
355 
356     // Vector variable shift
357     VSHLV,
358     VSRLV,
359     VSRAV,
360 
361     // Vector shift elements by immediate
362     VSHLI,
363     VSRLI,
364     VSRAI,
365 
366     // Shifts of mask registers.
367     KSHIFTL,
368     KSHIFTR,
369 
370     // Bit rotate by immediate
371     VROTLI,
372     VROTRI,
373 
374     // Vector packed double/float comparison.
375     CMPP,
376 
377     // Vector integer comparisons.
378     PCMPEQ,
379     PCMPGT,
380 
381     // v8i16 Horizontal minimum and position.
382     PHMINPOS,
383 
384     MULTISHIFT,
385 
386     /// Vector comparison generating mask bits for fp and
387     /// integer signed and unsigned data types.
388     CMPM,
389     // Vector mask comparison generating mask bits for FP values.
390     CMPMM,
391     // Vector mask comparison with SAE for FP values.
392     CMPMM_SAE,
393 
394     // Arithmetic operations with FLAGS results.
395     ADD,
396     SUB,
397     ADC,
398     SBB,
399     SMUL,
400     UMUL,
401     OR,
402     XOR,
403     AND,
404 
405     // Bit field extract.
406     BEXTR,
407     BEXTRI,
408 
409     // Zero High Bits Starting with Specified Bit Position.
410     BZHI,
411 
412     // Parallel extract and deposit.
413     PDEP,
414     PEXT,
415 
416     // X86-specific multiply by immediate.
417     MUL_IMM,
418 
419     // Vector sign bit extraction.
420     MOVMSK,
421 
422     // Vector bitwise comparisons.
423     PTEST,
424 
425     // Vector packed fp sign bitwise comparisons.
426     TESTP,
427 
428     // OR/AND test for masks.
429     KORTEST,
430     KTEST,
431 
432     // ADD for masks.
433     KADD,
434 
435     // Several flavors of instructions with vector shuffle behaviors.
436     // Saturated signed/unnsigned packing.
437     PACKSS,
438     PACKUS,
439     // Intra-lane alignr.
440     PALIGNR,
441     // AVX512 inter-lane alignr.
442     VALIGN,
443     PSHUFD,
444     PSHUFHW,
445     PSHUFLW,
446     SHUFP,
447     // VBMI2 Concat & Shift.
448     VSHLD,
449     VSHRD,
450     VSHLDV,
451     VSHRDV,
452     // Shuffle Packed Values at 128-bit granularity.
453     SHUF128,
454     MOVDDUP,
455     MOVSHDUP,
456     MOVSLDUP,
457     MOVLHPS,
458     MOVHLPS,
459     MOVSD,
460     MOVSS,
461     MOVSH,
462     UNPCKL,
463     UNPCKH,
464     VPERMILPV,
465     VPERMILPI,
466     VPERMI,
467     VPERM2X128,
468 
469     // Variable Permute (VPERM).
470     // Res = VPERMV MaskV, V0
471     VPERMV,
472 
473     // 3-op Variable Permute (VPERMT2).
474     // Res = VPERMV3 V0, MaskV, V1
475     VPERMV3,
476 
477     // Bitwise ternary logic.
478     VPTERNLOG,
479     // Fix Up Special Packed Float32/64 values.
480     VFIXUPIMM,
481     VFIXUPIMM_SAE,
482     VFIXUPIMMS,
483     VFIXUPIMMS_SAE,
484     // Range Restriction Calculation For Packed Pairs of Float32/64 values.
485     VRANGE,
486     VRANGE_SAE,
487     VRANGES,
488     VRANGES_SAE,
489     // Reduce - Perform Reduction Transformation on scalar\packed FP.
490     VREDUCE,
491     VREDUCE_SAE,
492     VREDUCES,
493     VREDUCES_SAE,
494     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
495     // Also used by the legacy (V)ROUND intrinsics where we mask out the
496     // scaling part of the immediate.
497     VRNDSCALE,
498     VRNDSCALE_SAE,
499     VRNDSCALES,
500     VRNDSCALES_SAE,
501     // Tests Types Of a FP Values for packed types.
502     VFPCLASS,
503     // Tests Types Of a FP Values for scalar types.
504     VFPCLASSS,
505 
506     // Broadcast (splat) scalar or element 0 of a vector. If the operand is
507     // a vector, this node may change the vector length as part of the splat.
508     VBROADCAST,
509     // Broadcast mask to vector.
510     VBROADCASTM,
511 
512     /// SSE4A Extraction and Insertion.
513     EXTRQI,
514     INSERTQI,
515 
516     // XOP arithmetic/logical shifts.
517     VPSHA,
518     VPSHL,
519     // XOP signed/unsigned integer comparisons.
520     VPCOM,
521     VPCOMU,
522     // XOP packed permute bytes.
523     VPPERM,
524     // XOP two source permutation.
525     VPERMIL2,
526 
527     // Vector multiply packed unsigned doubleword integers.
528     PMULUDQ,
529     // Vector multiply packed signed doubleword integers.
530     PMULDQ,
531     // Vector Multiply Packed UnsignedIntegers with Round and Scale.
532     MULHRS,
533 
534     // Multiply and Add Packed Integers.
535     VPMADDUBSW,
536     VPMADDWD,
537 
538     // AVX512IFMA multiply and add.
539     // NOTE: These are different than the instruction and perform
540     // op0 x op1 + op2.
541     VPMADD52L,
542     VPMADD52H,
543 
544     // VNNI
545     VPDPBUSD,
546     VPDPBUSDS,
547     VPDPWSSD,
548     VPDPWSSDS,
549 
550     // FMA nodes.
551     // We use the target independent ISD::FMA for the non-inverted case.
552     FNMADD,
553     FMSUB,
554     FNMSUB,
555     FMADDSUB,
556     FMSUBADD,
557 
558     // FMA with rounding mode.
559     FMADD_RND,
560     FNMADD_RND,
561     FMSUB_RND,
562     FNMSUB_RND,
563     FMADDSUB_RND,
564     FMSUBADD_RND,
565 
566     // AVX512-FP16 complex addition and multiplication.
567     VFMADDC,
568     VFMADDC_RND,
569     VFCMADDC,
570     VFCMADDC_RND,
571 
572     VFMULC,
573     VFMULC_RND,
574     VFCMULC,
575     VFCMULC_RND,
576 
577     VFMADDCSH,
578     VFMADDCSH_RND,
579     VFCMADDCSH,
580     VFCMADDCSH_RND,
581 
582     VFMULCSH,
583     VFMULCSH_RND,
584     VFCMULCSH,
585     VFCMULCSH_RND,
586 
587     // Compress and expand.
588     COMPRESS,
589     EXPAND,
590 
591     // Bits shuffle
592     VPSHUFBITQMB,
593 
594     // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
595     SINT_TO_FP_RND,
596     UINT_TO_FP_RND,
597     SCALAR_SINT_TO_FP,
598     SCALAR_UINT_TO_FP,
599     SCALAR_SINT_TO_FP_RND,
600     SCALAR_UINT_TO_FP_RND,
601 
602     // Vector float/double to signed/unsigned integer.
603     CVTP2SI,
604     CVTP2UI,
605     CVTP2SI_RND,
606     CVTP2UI_RND,
607     // Scalar float/double to signed/unsigned integer.
608     CVTS2SI,
609     CVTS2UI,
610     CVTS2SI_RND,
611     CVTS2UI_RND,
612 
613     // Vector float/double to signed/unsigned integer with truncation.
614     CVTTP2SI,
615     CVTTP2UI,
616     CVTTP2SI_SAE,
617     CVTTP2UI_SAE,
618     // Scalar float/double to signed/unsigned integer with truncation.
619     CVTTS2SI,
620     CVTTS2UI,
621     CVTTS2SI_SAE,
622     CVTTS2UI_SAE,
623 
624     // Vector signed/unsigned integer to float/double.
625     CVTSI2P,
626     CVTUI2P,
627 
628     // Masked versions of above. Used for v2f64->v4f32.
629     // SRC, PASSTHRU, MASK
630     MCVTP2SI,
631     MCVTP2UI,
632     MCVTTP2SI,
633     MCVTTP2UI,
634     MCVTSI2P,
635     MCVTUI2P,
636 
637     // Vector float to bfloat16.
638     // Convert TWO packed single data to one packed BF16 data
639     CVTNE2PS2BF16,
640     // Convert packed single data to packed BF16 data
641     CVTNEPS2BF16,
642     // Masked version of above.
643     // SRC, PASSTHRU, MASK
644     MCVTNEPS2BF16,
645 
646     // Dot product of BF16 pairs to accumulated into
647     // packed single precision.
648     DPBF16PS,
649 
650     // A stack checking function call. On Windows it's _chkstk call.
651     DYN_ALLOCA,
652 
653     // For allocating variable amounts of stack space when using
654     // segmented stacks. Check if the current stacklet has enough space, and
655     // falls back to heap allocation if not.
656     SEG_ALLOCA,
657 
658     // For allocating stack space when using stack clash protector.
659     // Allocation is performed by block, and each block is probed.
660     PROBED_ALLOCA,
661 
662     // Memory barriers.
663     MEMBARRIER,
664     MFENCE,
665 
666     // Get a random integer and indicate whether it is valid in CF.
667     RDRAND,
668 
669     // Get a NIST SP800-90B & C compliant random integer and
670     // indicate whether it is valid in CF.
671     RDSEED,
672 
673     // Protection keys
674     // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
675     // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
676     // value for ECX.
677     RDPKRU,
678     WRPKRU,
679 
680     // SSE42 string comparisons.
681     // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
682     // will emit one or two instructions based on which results are used. If
683     // flags and index/mask this allows us to use a single instruction since
684     // we won't have to pick and opcode for flags. Instead we can rely on the
685     // DAG to CSE everything and decide at isel.
686     PCMPISTR,
687     PCMPESTR,
688 
689     // Test if in transactional execution.
690     XTEST,
691 
692     // ERI instructions.
693     RSQRT28,
694     RSQRT28_SAE,
695     RSQRT28S,
696     RSQRT28S_SAE,
697     RCP28,
698     RCP28_SAE,
699     RCP28S,
700     RCP28S_SAE,
701     EXP2,
702     EXP2_SAE,
703 
704     // Conversions between float and half-float.
705     CVTPS2PH,
706     CVTPH2PS,
707     CVTPH2PS_SAE,
708 
709     // Masked version of above.
710     // SRC, RND, PASSTHRU, MASK
711     MCVTPS2PH,
712 
713     // Galois Field Arithmetic Instructions
714     GF2P8AFFINEINVQB,
715     GF2P8AFFINEQB,
716     GF2P8MULB,
717 
718     // LWP insert record.
719     LWPINS,
720 
721     // User level wait
722     UMWAIT,
723     TPAUSE,
724 
725     // Enqueue Stores Instructions
726     ENQCMD,
727     ENQCMDS,
728 
729     // For avx512-vp2intersect
730     VP2INTERSECT,
731 
732     // User level interrupts - testui
733     TESTUI,
734 
735     /// X86 strict FP compare instructions.
736     STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
737     STRICT_FCMPS,
738 
739     // Vector packed double/float comparison.
740     STRICT_CMPP,
741 
742     /// Vector comparison generating mask bits for fp and
743     /// integer signed and unsigned data types.
744     STRICT_CMPM,
745 
746     // Vector float/double to signed/unsigned integer with truncation.
747     STRICT_CVTTP2SI,
748     STRICT_CVTTP2UI,
749 
750     // Vector FP extend.
751     STRICT_VFPEXT,
752 
753     // Vector FP round.
754     STRICT_VFPROUND,
755 
756     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
757     // Also used by the legacy (V)ROUND intrinsics where we mask out the
758     // scaling part of the immediate.
759     STRICT_VRNDSCALE,
760 
761     // Vector signed/unsigned integer to float/double.
762     STRICT_CVTSI2P,
763     STRICT_CVTUI2P,
764 
765     // Strict FMA nodes.
766     STRICT_FNMADD,
767     STRICT_FMSUB,
768     STRICT_FNMSUB,
769 
770     // Conversions between float and half-float.
771     STRICT_CVTPS2PH,
772     STRICT_CVTPH2PS,
773 
774     // WARNING: Only add nodes here if they are stric FP nodes. Non-memory and
775     // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
776 
777     // Compare and swap.
778     LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
779     LCMPXCHG8_DAG,
780     LCMPXCHG16_DAG,
781     LCMPXCHG16_SAVE_RBX_DAG,
782 
783     /// LOCK-prefixed arithmetic read-modify-write instructions.
784     /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
785     LADD,
786     LSUB,
787     LOR,
788     LXOR,
789     LAND,
790     LBTS,
791     LBTC,
792     LBTR,
793 
794     // Load, scalar_to_vector, and zero extend.
795     VZEXT_LOAD,
796 
797     // extract_vector_elt, store.
798     VEXTRACT_STORE,
799 
800     // scalar broadcast from memory.
801     VBROADCAST_LOAD,
802 
803     // subvector broadcast from memory.
804     SUBV_BROADCAST_LOAD,
805 
806     // Store FP control word into i16 memory.
807     FNSTCW16m,
808 
809     // Load FP control word from i16 memory.
810     FLDCW16m,
811 
812     /// This instruction implements FP_TO_SINT with the
813     /// integer destination in memory and a FP reg source.  This corresponds
814     /// to the X86::FIST*m instructions and the rounding mode change stuff. It
815     /// has two inputs (token chain and address) and two outputs (int value
816     /// and token chain). Memory VT specifies the type to store to.
817     FP_TO_INT_IN_MEM,
818 
819     /// This instruction implements SINT_TO_FP with the
820     /// integer source in memory and FP reg result.  This corresponds to the
821     /// X86::FILD*m instructions. It has two inputs (token chain and address)
822     /// and two outputs (FP value and token chain). The integer source type is
823     /// specified by the memory VT.
824     FILD,
825 
826     /// This instruction implements a fp->int store from FP stack
827     /// slots. This corresponds to the fist instruction. It takes a
828     /// chain operand, value to store, address, and glue. The memory VT
829     /// specifies the type to store as.
830     FIST,
831 
832     /// This instruction implements an extending load to FP stack slots.
833     /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
834     /// operand, and ptr to load from. The memory VT specifies the type to
835     /// load from.
836     FLD,
837 
838     /// This instruction implements a truncating store from FP stack
839     /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
840     /// chain operand, value to store, address, and glue. The memory VT
841     /// specifies the type to store as.
842     FST,
843 
844     /// These instructions grab the address of the next argument
845     /// from a va_list. (reads and modifies the va_list in memory)
846     VAARG_64,
847     VAARG_X32,
848 
849     // Vector truncating store with unsigned/signed saturation
850     VTRUNCSTOREUS,
851     VTRUNCSTORES,
852     // Vector truncating masked store with unsigned/signed saturation
853     VMTRUNCSTOREUS,
854     VMTRUNCSTORES,
855 
856     // X86 specific gather and scatter
857     MGATHER,
858     MSCATTER,
859 
860     // Key locker nodes that produce flags.
861     AESENC128KL,
862     AESDEC128KL,
863     AESENC256KL,
864     AESDEC256KL,
865     AESENCWIDE128KL,
866     AESDECWIDE128KL,
867     AESENCWIDE256KL,
868     AESDECWIDE256KL,
869 
870     // Save xmm argument registers to the stack, according to %al. An operator
871     // is needed so that this can be expanded with control flow.
872     VASTART_SAVE_XMM_REGS,
873 
874     // WARNING: Do not add anything in the end unless you want the node to
875     // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
876     // opcodes will be thought as target memory ops!
877   };
878   } // end namespace X86ISD
879 
880   namespace X86 {
881     /// Current rounding mode is represented in bits 11:10 of FPSR. These
882     /// values are same as corresponding constants for rounding mode used
883     /// in glibc.
884     enum RoundingMode {
885       rmToNearest   = 0,        // FE_TONEAREST
886       rmDownward    = 1 << 10,  // FE_DOWNWARD
887       rmUpward      = 2 << 10,  // FE_UPWARD
888       rmTowardZero  = 3 << 10,  // FE_TOWARDZERO
889       rmMask        = 3 << 10   // Bit mask selecting rounding mode
890     };
891   }
892 
893   /// Define some predicates that are used for node matching.
894   namespace X86 {
895     /// Returns true if Elt is a constant zero or floating point constant +0.0.
896     bool isZeroNode(SDValue Elt);
897 
898     /// Returns true of the given offset can be
899     /// fit into displacement field of the instruction.
900     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
901                                       bool hasSymbolicDisplacement);
902 
903     /// Determines whether the callee is required to pop its
904     /// own arguments. Callee pop is necessary to support tail calls.
905     bool isCalleePop(CallingConv::ID CallingConv,
906                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
907 
908     /// If Op is a constant whose elements are all the same constant or
909     /// undefined, return true and return the constant value in \p SplatVal.
910     /// If we have undef bits that don't cover an entire element, we treat these
911     /// as zero if AllowPartialUndefs is set, else we fail and return false.
912     bool isConstantSplat(SDValue Op, APInt &SplatVal,
913                          bool AllowPartialUndefs = true);
914 
915     /// Check if Op is a load operation that could be folded into some other x86
916     /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
917     bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
918                      bool AssumeSingleUse = false);
919 
920     /// Check if Op is a load operation that could be folded into a vector splat
921     /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
922     bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
923                                          const X86Subtarget &Subtarget,
924                                          bool AssumeSingleUse = false);
925 
926     /// Check if Op is a value that could be used to fold a store into some
927     /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
928     bool mayFoldIntoStore(SDValue Op);
929 
930     /// Check if Op is an operation that could be folded into a zero extend x86
931     /// instruction.
932     bool mayFoldIntoZeroExtend(SDValue Op);
933   } // end namespace X86
934 
935   //===--------------------------------------------------------------------===//
936   //  X86 Implementation of the TargetLowering interface
937   class X86TargetLowering final : public TargetLowering {
938   public:
939     explicit X86TargetLowering(const X86TargetMachine &TM,
940                                const X86Subtarget &STI);
941 
942     unsigned getJumpTableEncoding() const override;
943     bool useSoftFloat() const override;
944 
945     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
946                                ArgListTy &Args) const override;
947 
948     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
949       return MVT::i8;
950     }
951 
952     const MCExpr *
953     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
954                               const MachineBasicBlock *MBB, unsigned uid,
955                               MCContext &Ctx) const override;
956 
957     /// Returns relocation base for the given PIC jumptable.
958     SDValue getPICJumpTableRelocBase(SDValue Table,
959                                      SelectionDAG &DAG) const override;
960     const MCExpr *
961     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
962                                  unsigned JTI, MCContext &Ctx) const override;
963 
964     /// Return the desired alignment for ByVal aggregate
965     /// function arguments in the caller parameter area. For X86, aggregates
966     /// that contains are placed at 16-byte boundaries while the rest are at
967     /// 4-byte boundaries.
968     uint64_t getByValTypeAlignment(Type *Ty,
969                                    const DataLayout &DL) const override;
970 
971     EVT getOptimalMemOpType(const MemOp &Op,
972                             const AttributeList &FuncAttributes) const override;
973 
974     /// Returns true if it's safe to use load / store of the
975     /// specified type to expand memcpy / memset inline. This is mostly true
976     /// for all types except for some special cases. For example, on X86
977     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
978     /// also does type conversion. Note the specified type doesn't have to be
979     /// legal as the hook is used before type legalization.
980     bool isSafeMemOpType(MVT VT) const override;
981 
982     /// Returns true if the target allows unaligned memory accesses of the
983     /// specified type. Returns whether it is "fast" in the last argument.
984     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
985                                         MachineMemOperand::Flags Flags,
986                                         bool *Fast) const override;
987 
988     /// Provide custom lowering hooks for some operations.
989     ///
990     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
991 
992     /// Replace the results of node with an illegal result
993     /// type with new values built out of custom code.
994     ///
995     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
996                             SelectionDAG &DAG) const override;
997 
998     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
999 
1000     /// Return true if the target has native support for
1001     /// the specified value type and it is 'desirable' to use the type for the
1002     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
1003     /// instruction encodings are longer and some i16 instructions are slow.
1004     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
1005 
1006     /// Return true if the target has native support for the
1007     /// specified value type and it is 'desirable' to use the type. e.g. On x86
1008     /// i16 is legal, but undesirable since i16 instruction encodings are longer
1009     /// and some i16 instructions are slow.
1010     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
1011 
1012     /// Return the newly negated expression if the cost is not expensive and
1013     /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
1014     /// do the negation.
1015     SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
1016                                  bool LegalOperations, bool ForCodeSize,
1017                                  NegatibleCost &Cost,
1018                                  unsigned Depth) const override;
1019 
1020     MachineBasicBlock *
1021     EmitInstrWithCustomInserter(MachineInstr &MI,
1022                                 MachineBasicBlock *MBB) const override;
1023 
1024     /// This method returns the name of a target specific DAG node.
1025     const char *getTargetNodeName(unsigned Opcode) const override;
1026 
1027     /// Do not merge vector stores after legalization because that may conflict
1028     /// with x86-specific store splitting optimizations.
1029     bool mergeStoresAfterLegalization(EVT MemVT) const override {
1030       return !MemVT.isVector();
1031     }
1032 
1033     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
1034                           const MachineFunction &MF) const override;
1035 
1036     bool isCheapToSpeculateCttz() const override;
1037 
1038     bool isCheapToSpeculateCtlz() const override;
1039 
1040     bool isCtlzFast() const override;
1041 
1042     bool hasBitPreservingFPLogic(EVT VT) const override;
1043 
1044     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1045       // If the pair to store is a mixture of float and int values, we will
1046       // save two bitwise instructions and one float-to-int instruction and
1047       // increase one store instruction. There is potentially a more
1048       // significant benefit because it avoids the float->int domain switch
1049       // for input value. So It is more likely a win.
1050       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
1051           (LTy.isInteger() && HTy.isFloatingPoint()))
1052         return true;
1053       // If the pair only contains int values, we will save two bitwise
1054       // instructions and increase one store instruction (costing one more
1055       // store buffer). Since the benefit is more blurred so we leave
1056       // such pair out until we get testcase to prove it is a win.
1057       return false;
1058     }
1059 
1060     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1061 
1062     bool hasAndNotCompare(SDValue Y) const override;
1063 
1064     bool hasAndNot(SDValue Y) const override;
1065 
1066     bool hasBitTest(SDValue X, SDValue Y) const override;
1067 
1068     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1069         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1070         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1071         SelectionDAG &DAG) const override;
1072 
1073     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1074                                            CombineLevel Level) const override;
1075 
1076     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1077 
1078     bool
1079     shouldTransformSignedTruncationCheck(EVT XVT,
1080                                          unsigned KeptBits) const override {
1081       // For vectors, we don't have a preference..
1082       if (XVT.isVector())
1083         return false;
1084 
1085       auto VTIsOk = [](EVT VT) -> bool {
1086         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1087                VT == MVT::i64;
1088       };
1089 
1090       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1091       // XVT will be larger than KeptBitsVT.
1092       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
1093       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1094     }
1095 
1096     bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
1097 
1098     bool shouldSplatInsEltVarIndex(EVT VT) const override;
1099 
1100     bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
1101       // Converting to sat variants holds little benefit on X86 as we will just
1102       // need to saturate the value back using fp arithmatic.
1103       return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT);
1104     }
1105 
1106     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1107       return VT.isScalarInteger();
1108     }
1109 
1110     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1111     MVT hasFastEqualityCompare(unsigned NumBits) const override;
1112 
1113     /// Return the value type to use for ISD::SETCC.
1114     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1115                            EVT VT) const override;
1116 
1117     bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1118                                       const APInt &DemandedElts,
1119                                       TargetLoweringOpt &TLO) const override;
1120 
1121     /// Determine which of the bits specified in Mask are known to be either
1122     /// zero or one and return them in the KnownZero/KnownOne bitsets.
1123     void computeKnownBitsForTargetNode(const SDValue Op,
1124                                        KnownBits &Known,
1125                                        const APInt &DemandedElts,
1126                                        const SelectionDAG &DAG,
1127                                        unsigned Depth = 0) const override;
1128 
1129     /// Determine the number of bits in the operation that are sign bits.
1130     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1131                                              const APInt &DemandedElts,
1132                                              const SelectionDAG &DAG,
1133                                              unsigned Depth) const override;
1134 
1135     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1136                                                  const APInt &DemandedElts,
1137                                                  APInt &KnownUndef,
1138                                                  APInt &KnownZero,
1139                                                  TargetLoweringOpt &TLO,
1140                                                  unsigned Depth) const override;
1141 
1142     bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1143                                                     const APInt &DemandedElts,
1144                                                     unsigned MaskIndex,
1145                                                     TargetLoweringOpt &TLO,
1146                                                     unsigned Depth) const;
1147 
1148     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1149                                            const APInt &DemandedBits,
1150                                            const APInt &DemandedElts,
1151                                            KnownBits &Known,
1152                                            TargetLoweringOpt &TLO,
1153                                            unsigned Depth) const override;
1154 
1155     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1156         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1157         SelectionDAG &DAG, unsigned Depth) const override;
1158 
1159     bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
1160                                    APInt &UndefElts,
1161                                    unsigned Depth) const override;
1162 
1163     bool isTargetCanonicalConstantNode(SDValue Op) const override {
1164       // Peek through bitcasts/extracts/inserts to see if we have a broadcast
1165       // vector from memory.
1166       while (Op.getOpcode() == ISD::BITCAST ||
1167              Op.getOpcode() == ISD::EXTRACT_SUBVECTOR ||
1168              (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
1169               Op.getOperand(0).isUndef()))
1170         Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0);
1171 
1172       return Op.getOpcode() == X86ISD::VBROADCAST_LOAD ||
1173              TargetLowering::isTargetCanonicalConstantNode(Op);
1174     }
1175 
1176     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1177 
1178     SDValue unwrapAddress(SDValue N) const override;
1179 
1180     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1181 
1182     bool ExpandInlineAsm(CallInst *CI) const override;
1183 
1184     ConstraintType getConstraintType(StringRef Constraint) const override;
1185 
1186     /// Examine constraint string and operand type and determine a weight value.
1187     /// The operand object must already have been set up with the operand type.
1188     ConstraintWeight
1189       getSingleConstraintMatchWeight(AsmOperandInfo &info,
1190                                      const char *constraint) const override;
1191 
1192     const char *LowerXConstraint(EVT ConstraintVT) const override;
1193 
1194     /// Lower the specified operand into the Ops vector. If it is invalid, don't
1195     /// add anything to Ops. If hasMemory is true it means one of the asm
1196     /// constraint of the inline asm instruction being processed is 'm'.
1197     void LowerAsmOperandForConstraint(SDValue Op,
1198                                       std::string &Constraint,
1199                                       std::vector<SDValue> &Ops,
1200                                       SelectionDAG &DAG) const override;
1201 
1202     unsigned
1203     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1204       if (ConstraintCode == "v")
1205         return InlineAsm::Constraint_v;
1206       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1207     }
1208 
1209     /// Handle Lowering flag assembly outputs.
1210     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1211                                         const SDLoc &DL,
1212                                         const AsmOperandInfo &Constraint,
1213                                         SelectionDAG &DAG) const override;
1214 
1215     /// Given a physical register constraint
1216     /// (e.g. {edx}), return the register number and the register class for the
1217     /// register.  This should only be used for C_Register constraints.  On
1218     /// error, this returns a register number of 0.
1219     std::pair<unsigned, const TargetRegisterClass *>
1220     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1221                                  StringRef Constraint, MVT VT) const override;
1222 
1223     /// Return true if the addressing mode represented
1224     /// by AM is legal for this target, for a load/store of the specified type.
1225     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1226                                Type *Ty, unsigned AS,
1227                                Instruction *I = nullptr) const override;
1228 
1229     /// Return true if the specified immediate is legal
1230     /// icmp immediate, that is the target has icmp instructions which can
1231     /// compare a register against the immediate without having to materialize
1232     /// the immediate into a register.
1233     bool isLegalICmpImmediate(int64_t Imm) const override;
1234 
1235     /// Return true if the specified immediate is legal
1236     /// add immediate, that is the target has add instructions which can
1237     /// add a register and the immediate without having to materialize
1238     /// the immediate into a register.
1239     bool isLegalAddImmediate(int64_t Imm) const override;
1240 
1241     bool isLegalStoreImmediate(int64_t Imm) const override;
1242 
1243     /// Return the cost of the scaling factor used in the addressing
1244     /// mode represented by AM for this target, for a load/store
1245     /// of the specified type.
1246     /// If the AM is supported, the return value must be >= 0.
1247     /// If the AM is not supported, it returns a negative value.
1248     InstructionCost getScalingFactorCost(const DataLayout &DL,
1249                                          const AddrMode &AM, Type *Ty,
1250                                          unsigned AS) const override;
1251 
1252     /// This is used to enable splatted operand transforms for vector shifts
1253     /// and vector funnel shifts.
1254     bool isVectorShiftByScalarCheap(Type *Ty) const override;
1255 
1256     /// Add x86-specific opcodes to the default list.
1257     bool isBinOp(unsigned Opcode) const override;
1258 
1259     /// Returns true if the opcode is a commutative binary operation.
1260     bool isCommutativeBinOp(unsigned Opcode) const override;
1261 
1262     /// Return true if it's free to truncate a value of
1263     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1264     /// register EAX to i16 by referencing its sub-register AX.
1265     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1266     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1267 
1268     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1269 
1270     /// Return true if any actual instruction that defines a
1271     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1272     /// register. This does not necessarily include registers defined in
1273     /// unknown ways, such as incoming arguments, or copies from unknown
1274     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1275     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1276     /// all instructions that define 32-bit values implicit zero-extend the
1277     /// result out to 64 bits.
1278     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1279     bool isZExtFree(EVT VT1, EVT VT2) const override;
1280     bool isZExtFree(SDValue Val, EVT VT2) const override;
1281 
1282     bool shouldSinkOperands(Instruction *I,
1283                             SmallVectorImpl<Use *> &Ops) const override;
1284     bool shouldConvertPhiType(Type *From, Type *To) const override;
1285 
1286     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1287     /// extend node) is profitable.
1288     bool isVectorLoadExtDesirable(SDValue) const override;
1289 
1290     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1291     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1292     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1293     bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1294                                     EVT VT) const override;
1295 
1296     /// Return true if it's profitable to narrow
1297     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1298     /// from i32 to i8 but not from i32 to i16.
1299     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1300 
1301     bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
1302                                               EVT VT) const override;
1303 
1304     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1305     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1306     /// true and stores the intrinsic information into the IntrinsicInfo that was
1307     /// passed to the function.
1308     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1309                             MachineFunction &MF,
1310                             unsigned Intrinsic) const override;
1311 
1312     /// Returns true if the target can instruction select the
1313     /// specified FP immediate natively. If false, the legalizer will
1314     /// materialize the FP immediate as a load from a constant pool.
1315     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1316                       bool ForCodeSize) const override;
1317 
1318     /// Targets can use this to indicate that they only support *some*
1319     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1320     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1321     /// be legal.
1322     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1323 
1324     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1325     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1326     /// constant pool entry.
1327     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1328 
1329     /// Returns true if lowering to a jump table is allowed.
1330     bool areJTsAllowed(const Function *Fn) const override;
1331 
1332     MVT getPreferredSwitchConditionType(LLVMContext &Context,
1333                                         EVT ConditionVT) const override;
1334 
1335     /// If true, then instruction selection should
1336     /// seek to shrink the FP constant of the specified type to a smaller type
1337     /// in order to save space and / or reduce runtime.
1338     bool ShouldShrinkFPConstant(EVT VT) const override;
1339 
1340     /// Return true if we believe it is correct and profitable to reduce the
1341     /// load node to a smaller type.
1342     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1343                                EVT NewVT) const override;
1344 
1345     /// Return true if the specified scalar FP type is computed in an SSE
1346     /// register, not on the X87 floating point stack.
1347     bool isScalarFPTypeInSSEReg(EVT VT) const;
1348 
1349     /// Returns true if it is beneficial to convert a load of a constant
1350     /// to just the constant itself.
1351     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1352                                            Type *Ty) const override;
1353 
1354     bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1355 
1356     bool convertSelectOfConstantsToMath(EVT VT) const override;
1357 
1358     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1359                                 SDValue C) const override;
1360 
1361     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1362     /// with this index.
1363     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1364                                  unsigned Index) const override;
1365 
1366     /// Scalar ops always have equal or better analysis/performance/power than
1367     /// the vector equivalent, so this always makes sense if the scalar op is
1368     /// supported.
1369     bool shouldScalarizeBinop(SDValue) const override;
1370 
1371     /// Extract of a scalar FP value from index 0 of a vector is free.
1372     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1373       EVT EltVT = VT.getScalarType();
1374       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1375     }
1376 
1377     /// Overflow nodes should get combined/lowered to optimal instructions
1378     /// (they should allow eliminating explicit compares by getting flags from
1379     /// math ops).
1380     bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1381                               bool MathUsed) const override;
1382 
1383     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1384                                       unsigned AddrSpace) const override {
1385       // If we can replace more than 2 scalar stores, there will be a reduction
1386       // in instructions even after we add a vector constant load.
1387       return NumElem > 2;
1388     }
1389 
1390     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1391                                  const SelectionDAG &DAG,
1392                                  const MachineMemOperand &MMO) const override;
1393 
1394     /// Intel processors have a unified instruction and data cache
1395     const char * getClearCacheBuiltinName() const override {
1396       return nullptr; // nothing to do, move along.
1397     }
1398 
1399     Register getRegisterByName(const char* RegName, LLT VT,
1400                                const MachineFunction &MF) const override;
1401 
1402     /// If a physical register, this returns the register that receives the
1403     /// exception address on entry to an EH pad.
1404     Register
1405     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1406 
1407     /// If a physical register, this returns the register that receives the
1408     /// exception typeid on entry to a landing pad.
1409     Register
1410     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1411 
1412     virtual bool needsFixedCatchObjects() const override;
1413 
1414     /// This method returns a target specific FastISel object,
1415     /// or null if the target does not support "fast" ISel.
1416     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1417                              const TargetLibraryInfo *libInfo) const override;
1418 
1419     /// If the target has a standard location for the stack protector cookie,
1420     /// returns the address of that location. Otherwise, returns nullptr.
1421     Value *getIRStackGuard(IRBuilderBase &IRB) const override;
1422 
1423     bool useLoadStackGuardNode() const override;
1424     bool useStackGuardXorFP() const override;
1425     void insertSSPDeclarations(Module &M) const override;
1426     Value *getSDagStackGuard(const Module &M) const override;
1427     Function *getSSPStackGuardCheck(const Module &M) const override;
1428     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1429                                 const SDLoc &DL) const override;
1430 
1431 
1432     /// Return true if the target stores SafeStack pointer at a fixed offset in
1433     /// some non-standard address space, and populates the address space and
1434     /// offset as appropriate.
1435     Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
1436 
1437     std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1438                                           SDValue Chain, SDValue Pointer,
1439                                           MachinePointerInfo PtrInfo,
1440                                           Align Alignment,
1441                                           SelectionDAG &DAG) const;
1442 
1443     /// Customize the preferred legalization strategy for certain types.
1444     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1445 
1446     bool softPromoteHalfType() const override { return true; }
1447 
1448     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1449                                       EVT VT) const override;
1450 
1451     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1452                                            CallingConv::ID CC,
1453                                            EVT VT) const override;
1454 
1455     unsigned getVectorTypeBreakdownForCallingConv(
1456         LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1457         unsigned &NumIntermediates, MVT &RegisterVT) const override;
1458 
1459     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1460 
1461     bool supportSwiftError() const override;
1462 
1463     bool hasStackProbeSymbol(MachineFunction &MF) const override;
1464     bool hasInlineStackProbe(MachineFunction &MF) const override;
1465     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1466 
1467     unsigned getStackProbeSize(MachineFunction &MF) const;
1468 
1469     bool hasVectorBlend() const override { return true; }
1470 
1471     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1472 
1473     /// Lower interleaved load(s) into target specific
1474     /// instructions/intrinsics.
1475     bool lowerInterleavedLoad(LoadInst *LI,
1476                               ArrayRef<ShuffleVectorInst *> Shuffles,
1477                               ArrayRef<unsigned> Indices,
1478                               unsigned Factor) const override;
1479 
1480     /// Lower interleaved store(s) into target specific
1481     /// instructions/intrinsics.
1482     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1483                                unsigned Factor) const override;
1484 
1485     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1486                                    SDValue Addr, SelectionDAG &DAG)
1487                                    const override;
1488 
1489     Align getPrefLoopAlignment(MachineLoop *ML) const override;
1490 
1491   protected:
1492     std::pair<const TargetRegisterClass *, uint8_t>
1493     findRepresentativeClass(const TargetRegisterInfo *TRI,
1494                             MVT VT) const override;
1495 
1496   private:
1497     /// Keep a reference to the X86Subtarget around so that we can
1498     /// make the right decision when generating code for different targets.
1499     const X86Subtarget &Subtarget;
1500 
1501     /// A list of legal FP immediates.
1502     std::vector<APFloat> LegalFPImmediates;
1503 
1504     /// Indicate that this x86 target can instruction
1505     /// select the specified FP immediate natively.
1506     void addLegalFPImmediate(const APFloat& Imm) {
1507       LegalFPImmediates.push_back(Imm);
1508     }
1509 
1510     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1511                             CallingConv::ID CallConv, bool isVarArg,
1512                             const SmallVectorImpl<ISD::InputArg> &Ins,
1513                             const SDLoc &dl, SelectionDAG &DAG,
1514                             SmallVectorImpl<SDValue> &InVals,
1515                             uint32_t *RegMask) const;
1516     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1517                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1518                              const SDLoc &dl, SelectionDAG &DAG,
1519                              const CCValAssign &VA, MachineFrameInfo &MFI,
1520                              unsigned i) const;
1521     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1522                              const SDLoc &dl, SelectionDAG &DAG,
1523                              const CCValAssign &VA,
1524                              ISD::ArgFlagsTy Flags, bool isByval) const;
1525 
1526     // Call lowering helpers.
1527 
1528     /// Check whether the call is eligible for tail call optimization. Targets
1529     /// that want to do tail call optimization should implement this function.
1530     bool IsEligibleForTailCallOptimization(
1531         SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleeStackStructRet,
1532         bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
1533         const SmallVectorImpl<SDValue> &OutVals,
1534         const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
1535     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1536                                     SDValue Chain, bool IsTailCall,
1537                                     bool Is64Bit, int FPDiff,
1538                                     const SDLoc &dl) const;
1539 
1540     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1541                                          SelectionDAG &DAG) const;
1542 
1543     unsigned getAddressSpace() const;
1544 
1545     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1546                             SDValue &Chain) const;
1547     SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1548 
1549     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1550     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1551     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1552     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1553 
1554     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1555                                   const unsigned char OpFlags = 0) const;
1556     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1557     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1558     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1559     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1560     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1561 
1562     /// Creates target global address or external symbol nodes for calls or
1563     /// other uses.
1564     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1565                                   bool ForCall) const;
1566 
1567     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1568     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1569     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1570     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1571     SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1572     SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1573     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1574     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1575     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1576     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1577     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1578     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1579     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1580     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1581     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1582     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1583     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1584     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1585     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1586     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1587     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1588     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1589     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1590     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1591     SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1592     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1593     SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
1594                                     SDValue &Chain) const;
1595     SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1596     SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1597     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1598     SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1599     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1600     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1601 
1602     SDValue
1603     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1604                          const SmallVectorImpl<ISD::InputArg> &Ins,
1605                          const SDLoc &dl, SelectionDAG &DAG,
1606                          SmallVectorImpl<SDValue> &InVals) const override;
1607     SDValue LowerCall(CallLoweringInfo &CLI,
1608                       SmallVectorImpl<SDValue> &InVals) const override;
1609 
1610     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1611                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1612                         const SmallVectorImpl<SDValue> &OutVals,
1613                         const SDLoc &dl, SelectionDAG &DAG) const override;
1614 
1615     bool supportSplitCSR(MachineFunction *MF) const override {
1616       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1617           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1618     }
1619     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1620     void insertCopiesSplitCSR(
1621       MachineBasicBlock *Entry,
1622       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1623 
1624     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1625 
1626     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1627 
1628     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1629                             ISD::NodeType ExtendKind) const override;
1630 
1631     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1632                         bool isVarArg,
1633                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1634                         LLVMContext &Context) const override;
1635 
1636     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1637 
1638     TargetLoweringBase::AtomicExpansionKind
1639     shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1640     TargetLoweringBase::AtomicExpansionKind
1641     shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1642     TargetLoweringBase::AtomicExpansionKind
1643     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1644     TargetLoweringBase::AtomicExpansionKind
1645     shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
1646     void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1647 
1648     LoadInst *
1649     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1650 
1651     bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
1652     bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;
1653 
1654     bool needsCmpXchgNb(Type *MemType) const;
1655 
1656     template<typename T> bool isSoftFP16(T VT) const;
1657 
1658     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1659                                 MachineBasicBlock *DispatchBB, int FI) const;
1660 
1661     // Utility function to emit the low-level va_arg code for X86-64.
1662     MachineBasicBlock *
1663     EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
1664 
1665     /// Utility function to emit the xmm reg save portion of va_start.
1666     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1667                                                  MachineInstr &MI2,
1668                                                  MachineBasicBlock *BB) const;
1669 
1670     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1671                                          MachineBasicBlock *BB) const;
1672 
1673     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1674                                            MachineBasicBlock *BB) const;
1675 
1676     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1677                                             MachineBasicBlock *BB) const;
1678 
1679     MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1680                                                MachineBasicBlock *BB) const;
1681 
1682     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1683                                           MachineBasicBlock *BB) const;
1684 
1685     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1686                                           MachineBasicBlock *BB) const;
1687 
1688     MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1689                                                 MachineBasicBlock *BB) const;
1690 
1691     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1692                                         MachineBasicBlock *MBB) const;
1693 
1694     void emitSetJmpShadowStackFix(MachineInstr &MI,
1695                                   MachineBasicBlock *MBB) const;
1696 
1697     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1698                                          MachineBasicBlock *MBB) const;
1699 
1700     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1701                                                  MachineBasicBlock *MBB) const;
1702 
1703     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1704                                              MachineBasicBlock *MBB) const;
1705 
1706     /// Emit flags for the given setcc condition and operands. Also returns the
1707     /// corresponding X86 condition code constant in X86CC.
1708     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1709                               const SDLoc &dl, SelectionDAG &DAG,
1710                               SDValue &X86CC) const;
1711 
1712     /// Check if replacement of SQRT with RSQRT should be disabled.
1713     bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1714 
1715     /// Use rsqrt* to speed up sqrt calculations.
1716     SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1717                             int &RefinementSteps, bool &UseOneConstNR,
1718                             bool Reciprocal) const override;
1719 
1720     /// Use rcp* to speed up fdiv calculations.
1721     SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1722                              int &RefinementSteps) const override;
1723 
1724     /// Reassociate floating point divisions into multiply by reciprocal.
1725     unsigned combineRepeatedFPDivisors() const override;
1726 
1727     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1728                           SmallVectorImpl<SDNode *> &Created) const override;
1729   };
1730 
1731   namespace X86 {
1732     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1733                              const TargetLibraryInfo *libInfo);
1734   } // end namespace X86
1735 
1736   // X86 specific Gather/Scatter nodes.
1737   // The class has the same order of operands as MaskedGatherScatterSDNode for
1738   // convenience.
1739   class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1740   public:
1741     // This is a intended as a utility and should never be directly created.
1742     X86MaskedGatherScatterSDNode() = delete;
1743     ~X86MaskedGatherScatterSDNode() = delete;
1744 
1745     const SDValue &getBasePtr() const { return getOperand(3); }
1746     const SDValue &getIndex()   const { return getOperand(4); }
1747     const SDValue &getMask()    const { return getOperand(2); }
1748     const SDValue &getScale()   const { return getOperand(5); }
1749 
1750     static bool classof(const SDNode *N) {
1751       return N->getOpcode() == X86ISD::MGATHER ||
1752              N->getOpcode() == X86ISD::MSCATTER;
1753     }
1754   };
1755 
1756   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1757   public:
1758     const SDValue &getPassThru() const { return getOperand(1); }
1759 
1760     static bool classof(const SDNode *N) {
1761       return N->getOpcode() == X86ISD::MGATHER;
1762     }
1763   };
1764 
1765   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1766   public:
1767     const SDValue &getValue() const { return getOperand(1); }
1768 
1769     static bool classof(const SDNode *N) {
1770       return N->getOpcode() == X86ISD::MSCATTER;
1771     }
1772   };
1773 
1774   /// Generate unpacklo/unpackhi shuffle mask.
1775   void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1776                                bool Unary);
1777 
1778   /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1779   /// imposed by AVX and specific to the unary pattern. Example:
1780   /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1781   /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1782   void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1783 
1784 } // end namespace llvm
1785 
1786 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1787