1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16 
17 #include "AArch64.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/SelectionDAG.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/IR/CallingConv.h"
23 #include "llvm/IR/Instruction.h"
24 
25 namespace llvm {
26 
27 namespace AArch64ISD {
28 
29 // For predicated nodes where the result is a vector, the operation is
30 // controlled by a governing predicate and the inactive lanes are explicitly
31 // defined with a value, please stick the following naming convention:
32 //
33 //    _MERGE_OP<n>        The result value is a vector with inactive lanes equal
34 //                        to source operand OP<n>.
35 //
36 //    _MERGE_ZERO         The result value is a vector with inactive lanes
37 //                        actively zeroed.
38 //
39 //    _MERGE_PASSTHRU     The result value is a vector with inactive lanes equal
40 //                        to the last source operand which only purpose is being
41 //                        a passthru value.
42 //
43 // For other cases where no explicit action is needed to set the inactive lanes,
44 // or when the result is not a vector and it is needed or helpful to
45 // distinguish a node from similar unpredicated nodes, use:
46 //
47 //    _PRED
48 //
49 enum NodeType : unsigned {
50   FIRST_NUMBER = ISD::BUILTIN_OP_END,
51   WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
52   CALL,         // Function call.
53 
54   // Pseudo for a OBJC call that gets emitted together with a special `mov
55   // x29, x29` marker instruction.
56   CALL_RVMARKER,
57 
58   // Produces the full sequence of instructions for getting the thread pointer
59   // offset of a variable into X0, using the TLSDesc model.
60   TLSDESC_CALLSEQ,
61   ADRP,     // Page address of a TargetGlobalAddress operand.
62   ADR,      // ADR
63   ADDlow,   // Add the low 12 bits of a TargetGlobalAddress operand.
64   LOADgot,  // Load from automatically generated descriptor (e.g. Global
65             // Offset Table, TLS record).
66   RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
67   BRCOND,   // Conditional branch instruction; "b.cond".
68   CSEL,
69   CSINV, // Conditional select invert.
70   CSNEG, // Conditional select negate.
71   CSINC, // Conditional select increment.
72 
73   // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
74   // ELF.
75   THREAD_POINTER,
76   ADC,
77   SBC, // adc, sbc instructions
78 
79   // Predicated instructions where inactive lanes produce undefined results.
80   ADD_PRED,
81   FADD_PRED,
82   FDIV_PRED,
83   FMA_PRED,
84   FMAXNM_PRED,
85   FMINNM_PRED,
86   FMAX_PRED,
87   FMIN_PRED,
88   FMUL_PRED,
89   FSUB_PRED,
90   MUL_PRED,
91   MULHS_PRED,
92   MULHU_PRED,
93   SDIV_PRED,
94   SHL_PRED,
95   SMAX_PRED,
96   SMIN_PRED,
97   SRA_PRED,
98   SRL_PRED,
99   SUB_PRED,
100   UDIV_PRED,
101   UMAX_PRED,
102   UMIN_PRED,
103 
104   // Unpredicated vector instructions
105   BIC,
106 
107   // Predicated instructions with the result of inactive lanes provided by the
108   // last operand.
109   FABS_MERGE_PASSTHRU,
110   FCEIL_MERGE_PASSTHRU,
111   FFLOOR_MERGE_PASSTHRU,
112   FNEARBYINT_MERGE_PASSTHRU,
113   FNEG_MERGE_PASSTHRU,
114   FRECPX_MERGE_PASSTHRU,
115   FRINT_MERGE_PASSTHRU,
116   FROUND_MERGE_PASSTHRU,
117   FROUNDEVEN_MERGE_PASSTHRU,
118   FSQRT_MERGE_PASSTHRU,
119   FTRUNC_MERGE_PASSTHRU,
120   FP_ROUND_MERGE_PASSTHRU,
121   FP_EXTEND_MERGE_PASSTHRU,
122   UINT_TO_FP_MERGE_PASSTHRU,
123   SINT_TO_FP_MERGE_PASSTHRU,
124   FCVTZU_MERGE_PASSTHRU,
125   FCVTZS_MERGE_PASSTHRU,
126   SIGN_EXTEND_INREG_MERGE_PASSTHRU,
127   ZERO_EXTEND_INREG_MERGE_PASSTHRU,
128   ABS_MERGE_PASSTHRU,
129   NEG_MERGE_PASSTHRU,
130 
131   SETCC_MERGE_ZERO,
132 
133   // Arithmetic instructions which write flags.
134   ADDS,
135   SUBS,
136   ADCS,
137   SBCS,
138   ANDS,
139 
140   // Conditional compares. Operands: left,right,falsecc,cc,flags
141   CCMP,
142   CCMN,
143   FCCMP,
144 
145   // Floating point comparison
146   FCMP,
147 
148   // Scalar extract
149   EXTR,
150 
151   // Scalar-to-vector duplication
152   DUP,
153   DUPLANE8,
154   DUPLANE16,
155   DUPLANE32,
156   DUPLANE64,
157 
158   // Vector immedate moves
159   MOVI,
160   MOVIshift,
161   MOVIedit,
162   MOVImsl,
163   FMOV,
164   MVNIshift,
165   MVNImsl,
166 
167   // Vector immediate ops
168   BICi,
169   ORRi,
170 
171   // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
172   // element must be identical.
173   BSP,
174 
175   // Vector shuffles
176   ZIP1,
177   ZIP2,
178   UZP1,
179   UZP2,
180   TRN1,
181   TRN2,
182   REV16,
183   REV32,
184   REV64,
185   EXT,
186   SPLICE,
187 
188   // Vector shift by scalar
189   VSHL,
190   VLSHR,
191   VASHR,
192 
193   // Vector shift by scalar (again)
194   SQSHL_I,
195   UQSHL_I,
196   SQSHLU_I,
197   SRSHR_I,
198   URSHR_I,
199 
200   // Vector shift by constant and insert
201   VSLI,
202   VSRI,
203 
204   // Vector comparisons
205   CMEQ,
206   CMGE,
207   CMGT,
208   CMHI,
209   CMHS,
210   FCMEQ,
211   FCMGE,
212   FCMGT,
213 
214   // Vector zero comparisons
215   CMEQz,
216   CMGEz,
217   CMGTz,
218   CMLEz,
219   CMLTz,
220   FCMEQz,
221   FCMGEz,
222   FCMGTz,
223   FCMLEz,
224   FCMLTz,
225 
226   // Vector across-lanes addition
227   // Only the lower result lane is defined.
228   SADDV,
229   UADDV,
230 
231   // Vector halving addition
232   SHADD,
233   UHADD,
234 
235   // Vector rounding halving addition
236   SRHADD,
237   URHADD,
238 
239   // Unsigned Add Long Pairwise
240   UADDLP,
241 
242   // udot/sdot instructions
243   UDOT,
244   SDOT,
245 
246   // Vector across-lanes min/max
247   // Only the lower result lane is defined.
248   SMINV,
249   UMINV,
250   SMAXV,
251   UMAXV,
252 
253   SADDV_PRED,
254   UADDV_PRED,
255   SMAXV_PRED,
256   UMAXV_PRED,
257   SMINV_PRED,
258   UMINV_PRED,
259   ORV_PRED,
260   EORV_PRED,
261   ANDV_PRED,
262 
263   // Vector bitwise insertion
264   BIT,
265 
266   // Compare-and-branch
267   CBZ,
268   CBNZ,
269   TBZ,
270   TBNZ,
271 
272   // Tail calls
273   TC_RETURN,
274 
275   // Custom prefetch handling
276   PREFETCH,
277 
278   // {s|u}int to FP within a FP register.
279   SITOF,
280   UITOF,
281 
282   /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
283   /// world w.r.t vectors; which causes additional REV instructions to be
284   /// generated to compensate for the byte-swapping. But sometimes we do
285   /// need to re-interpret the data in SIMD vector registers in big-endian
286   /// mode without emitting such REV instructions.
287   NVCAST,
288 
289   MRS, // MRS, also sets the flags via a glue.
290 
291   SMULL,
292   UMULL,
293 
294   // Reciprocal estimates and steps.
295   FRECPE,
296   FRECPS,
297   FRSQRTE,
298   FRSQRTS,
299 
300   SUNPKHI,
301   SUNPKLO,
302   UUNPKHI,
303   UUNPKLO,
304 
305   CLASTA_N,
306   CLASTB_N,
307   LASTA,
308   LASTB,
309   TBL,
310 
311   // Floating-point reductions.
312   FADDA_PRED,
313   FADDV_PRED,
314   FMAXV_PRED,
315   FMAXNMV_PRED,
316   FMINV_PRED,
317   FMINNMV_PRED,
318 
319   INSR,
320   PTEST,
321   PTRUE,
322 
323   BITREVERSE_MERGE_PASSTHRU,
324   BSWAP_MERGE_PASSTHRU,
325   CTLZ_MERGE_PASSTHRU,
326   CTPOP_MERGE_PASSTHRU,
327   DUP_MERGE_PASSTHRU,
328   INDEX_VECTOR,
329 
330   // Cast between vectors of the same element type but differ in length.
331   REINTERPRET_CAST,
332 
333   // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
334   LS64_BUILD,
335   LS64_EXTRACT,
336 
337   LD1_MERGE_ZERO,
338   LD1S_MERGE_ZERO,
339   LDNF1_MERGE_ZERO,
340   LDNF1S_MERGE_ZERO,
341   LDFF1_MERGE_ZERO,
342   LDFF1S_MERGE_ZERO,
343   LD1RQ_MERGE_ZERO,
344   LD1RO_MERGE_ZERO,
345 
346   // Structured loads.
347   SVE_LD2_MERGE_ZERO,
348   SVE_LD3_MERGE_ZERO,
349   SVE_LD4_MERGE_ZERO,
350 
351   // Unsigned gather loads.
352   GLD1_MERGE_ZERO,
353   GLD1_SCALED_MERGE_ZERO,
354   GLD1_UXTW_MERGE_ZERO,
355   GLD1_SXTW_MERGE_ZERO,
356   GLD1_UXTW_SCALED_MERGE_ZERO,
357   GLD1_SXTW_SCALED_MERGE_ZERO,
358   GLD1_IMM_MERGE_ZERO,
359 
360   // Signed gather loads
361   GLD1S_MERGE_ZERO,
362   GLD1S_SCALED_MERGE_ZERO,
363   GLD1S_UXTW_MERGE_ZERO,
364   GLD1S_SXTW_MERGE_ZERO,
365   GLD1S_UXTW_SCALED_MERGE_ZERO,
366   GLD1S_SXTW_SCALED_MERGE_ZERO,
367   GLD1S_IMM_MERGE_ZERO,
368 
369   // Unsigned gather loads.
370   GLDFF1_MERGE_ZERO,
371   GLDFF1_SCALED_MERGE_ZERO,
372   GLDFF1_UXTW_MERGE_ZERO,
373   GLDFF1_SXTW_MERGE_ZERO,
374   GLDFF1_UXTW_SCALED_MERGE_ZERO,
375   GLDFF1_SXTW_SCALED_MERGE_ZERO,
376   GLDFF1_IMM_MERGE_ZERO,
377 
378   // Signed gather loads.
379   GLDFF1S_MERGE_ZERO,
380   GLDFF1S_SCALED_MERGE_ZERO,
381   GLDFF1S_UXTW_MERGE_ZERO,
382   GLDFF1S_SXTW_MERGE_ZERO,
383   GLDFF1S_UXTW_SCALED_MERGE_ZERO,
384   GLDFF1S_SXTW_SCALED_MERGE_ZERO,
385   GLDFF1S_IMM_MERGE_ZERO,
386 
387   // Non-temporal gather loads
388   GLDNT1_MERGE_ZERO,
389   GLDNT1_INDEX_MERGE_ZERO,
390   GLDNT1S_MERGE_ZERO,
391 
392   // Contiguous masked store.
393   ST1_PRED,
394 
395   // Scatter store
396   SST1_PRED,
397   SST1_SCALED_PRED,
398   SST1_UXTW_PRED,
399   SST1_SXTW_PRED,
400   SST1_UXTW_SCALED_PRED,
401   SST1_SXTW_SCALED_PRED,
402   SST1_IMM_PRED,
403 
404   // Non-temporal scatter store
405   SSTNT1_PRED,
406   SSTNT1_INDEX_PRED,
407 
408   // Asserts that a function argument (i32) is zero-extended to i8 by
409   // the caller
410   ASSERT_ZEXT_BOOL,
411 
412   // Strict (exception-raising) floating point comparison
413   STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
414   STRICT_FCMPE,
415 
416   // NEON Load/Store with post-increment base updates
417   LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
418   LD3post,
419   LD4post,
420   ST2post,
421   ST3post,
422   ST4post,
423   LD1x2post,
424   LD1x3post,
425   LD1x4post,
426   ST1x2post,
427   ST1x3post,
428   ST1x4post,
429   LD1DUPpost,
430   LD2DUPpost,
431   LD3DUPpost,
432   LD4DUPpost,
433   LD1LANEpost,
434   LD2LANEpost,
435   LD3LANEpost,
436   LD4LANEpost,
437   ST2LANEpost,
438   ST3LANEpost,
439   ST4LANEpost,
440 
441   STG,
442   STZG,
443   ST2G,
444   STZ2G,
445 
446   LDP,
447   STP,
448   STNP,
449 };
450 
451 } // end namespace AArch64ISD
452 
453 namespace {
454 
455 // Any instruction that defines a 32-bit result zeros out the high half of the
456 // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
457 // be copying from a truncate. But any other 32-bit operation will zero-extend
458 // up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
459 // 32 bits, they're probably just qualifying a CopyFromReg.
isDef32(const SDNode & N)460 static inline bool isDef32(const SDNode &N) {
461   unsigned Opc = N.getOpcode();
462   return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
463          Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
464          Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
465          Opc != ISD::FREEZE;
466 }
467 
468 } // end anonymous namespace
469 
470 namespace AArch64 {
471 /// Possible values of current rounding mode, which is specified in bits
472 /// 23:22 of FPCR.
473 enum Rounding {
474   RN = 0,    // Round to Nearest
475   RP = 1,    // Round towards Plus infinity
476   RM = 2,    // Round towards Minus infinity
477   RZ = 3,    // Round towards Zero
478   rmMask = 3 // Bit mask selecting rounding mode
479 };
480 
481 // Bit position of rounding mode bits in FPCR.
482 const unsigned RoundingBitsPos = 22;
483 } // namespace AArch64
484 
485 class AArch64Subtarget;
486 class AArch64TargetMachine;
487 
488 class AArch64TargetLowering : public TargetLowering {
489 public:
490   explicit AArch64TargetLowering(const TargetMachine &TM,
491                                  const AArch64Subtarget &STI);
492 
493   /// Selects the correct CCAssignFn for a given CallingConvention value.
494   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
495 
496   /// Selects the correct CCAssignFn for a given CallingConvention value.
497   CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
498 
499   /// Determine which of the bits specified in Mask are known to be either zero
500   /// or one and return them in the KnownZero/KnownOne bitsets.
501   void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
502                                      const APInt &DemandedElts,
503                                      const SelectionDAG &DAG,
504                                      unsigned Depth = 0) const override;
505 
506   MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
507     // Returning i64 unconditionally here (i.e. even for ILP32) means that the
508     // *DAG* representation of pointers will always be 64-bits. They will be
509     // truncated and extended when transferred to memory, but the 64-bit DAG
510     // allows us to use AArch64's addressing modes much more easily.
511     return MVT::getIntegerVT(64);
512   }
513 
514   bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
515                                     const APInt &DemandedElts,
516                                     TargetLoweringOpt &TLO) const override;
517 
518   MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
519 
520   /// Returns true if the target allows unaligned memory accesses of the
521   /// specified type.
522   bool allowsMisalignedMemoryAccesses(
523       EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
524       MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
525       bool *Fast = nullptr) const override;
526   /// LLT variant.
527   bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
528                                       Align Alignment,
529                                       MachineMemOperand::Flags Flags,
530                                       bool *Fast = nullptr) const override;
531 
532   /// Provide custom lowering hooks for some operations.
533   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
534 
535   const char *getTargetNodeName(unsigned Opcode) const override;
536 
537   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
538 
539   /// This method returns a target specific FastISel object, or null if the
540   /// target does not support "fast" ISel.
541   FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
542                            const TargetLibraryInfo *libInfo) const override;
543 
544   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
545 
546   bool isFPImmLegal(const APFloat &Imm, EVT VT,
547                     bool ForCodeSize) const override;
548 
549   /// Return true if the given shuffle mask can be codegen'd directly, or if it
550   /// should be stack expanded.
551   bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
552 
553   /// Return the ISD::SETCC ValueType.
554   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
555                          EVT VT) const override;
556 
557   SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
558 
559   MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
560                                   MachineBasicBlock *BB) const;
561 
562   MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
563                                            MachineBasicBlock *BB) const;
564 
565   MachineBasicBlock *
566   EmitInstrWithCustomInserter(MachineInstr &MI,
567                               MachineBasicBlock *MBB) const override;
568 
569   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
570                           MachineFunction &MF,
571                           unsigned Intrinsic) const override;
572 
573   bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
574                              EVT NewVT) const override;
575 
576   bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
577   bool isTruncateFree(EVT VT1, EVT VT2) const override;
578 
579   bool isProfitableToHoist(Instruction *I) const override;
580 
581   bool isZExtFree(Type *Ty1, Type *Ty2) const override;
582   bool isZExtFree(EVT VT1, EVT VT2) const override;
583   bool isZExtFree(SDValue Val, EVT VT2) const override;
584 
585   bool shouldSinkOperands(Instruction *I,
586                           SmallVectorImpl<Use *> &Ops) const override;
587 
588   bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
589 
getMaxSupportedInterleaveFactor()590   unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
591 
592   bool lowerInterleavedLoad(LoadInst *LI,
593                             ArrayRef<ShuffleVectorInst *> Shuffles,
594                             ArrayRef<unsigned> Indices,
595                             unsigned Factor) const override;
596   bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
597                              unsigned Factor) const override;
598 
599   bool isLegalAddImmediate(int64_t) const override;
600   bool isLegalICmpImmediate(int64_t) const override;
601 
602   bool isMulAddWithConstProfitable(const SDValue &AddNode,
603                                    const SDValue &ConstNode) const override;
604 
605   bool shouldConsiderGEPOffsetSplit() const override;
606 
607   EVT getOptimalMemOpType(const MemOp &Op,
608                           const AttributeList &FuncAttributes) const override;
609 
610   LLT getOptimalMemOpLLT(const MemOp &Op,
611                          const AttributeList &FuncAttributes) const override;
612 
613   /// Return true if the addressing mode represented by AM is legal for this
614   /// target, for a load/store of the specified type.
615   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
616                              unsigned AS,
617                              Instruction *I = nullptr) const override;
618 
619   /// Return the cost of the scaling factor used in the addressing
620   /// mode represented by AM for this target, for a load/store
621   /// of the specified type.
622   /// If the AM is supported, the return value must be >= 0.
623   /// If the AM is not supported, it returns a negative value.
624   InstructionCost getScalingFactorCost(const DataLayout &DL, const AddrMode &AM,
625                                        Type *Ty, unsigned AS) const override;
626 
627   /// Return true if an FMA operation is faster than a pair of fmul and fadd
628   /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
629   /// returns true, otherwise fmuladd is expanded to fmul + fadd.
630   bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
631                                   EVT VT) const override;
632   bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
633 
634   bool generateFMAsInMachineCombiner(EVT VT,
635                                      CodeGenOpt::Level OptLevel) const override;
636 
637   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
638 
639   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
640   bool isDesirableToCommuteWithShift(const SDNode *N,
641                                      CombineLevel Level) const override;
642 
643   /// Returns true if it is beneficial to convert a load of a constant
644   /// to just the constant itself.
645   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
646                                          Type *Ty) const override;
647 
648   /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
649   /// with this index.
650   bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
651                                unsigned Index) const override;
652 
shouldFormOverflowOp(unsigned Opcode,EVT VT,bool MathUsed)653   bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
654                             bool MathUsed) const override {
655     // Using overflow ops for overflow checks only should beneficial on
656     // AArch64.
657     return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
658   }
659 
660   Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
661                         AtomicOrdering Ord) const override;
662   Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
663                               AtomicOrdering Ord) const override;
664 
665   void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
666 
667   bool isOpSuitableForLDPSTP(const Instruction *I) const;
668   bool shouldInsertFencesForAtomic(const Instruction *I) const override;
669 
670   TargetLoweringBase::AtomicExpansionKind
671   shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
672   bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
673   TargetLoweringBase::AtomicExpansionKind
674   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
675 
676   TargetLoweringBase::AtomicExpansionKind
677   shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
678 
679   bool useLoadStackGuardNode() const override;
680   TargetLoweringBase::LegalizeTypeAction
681   getPreferredVectorAction(MVT VT) const override;
682 
683   /// If the target has a standard location for the stack protector cookie,
684   /// returns the address of that location. Otherwise, returns nullptr.
685   Value *getIRStackGuard(IRBuilderBase &IRB) const override;
686 
687   void insertSSPDeclarations(Module &M) const override;
688   Value *getSDagStackGuard(const Module &M) const override;
689   Function *getSSPStackGuardCheck(const Module &M) const override;
690 
691   /// If the target has a standard location for the unsafe stack pointer,
692   /// returns the address of that location. Otherwise, returns nullptr.
693   Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
694 
695   /// If a physical register, this returns the register that receives the
696   /// exception address on entry to an EH pad.
697   Register
getExceptionPointerRegister(const Constant * PersonalityFn)698   getExceptionPointerRegister(const Constant *PersonalityFn) const override {
699     // FIXME: This is a guess. Has this been defined yet?
700     return AArch64::X0;
701   }
702 
703   /// If a physical register, this returns the register that receives the
704   /// exception typeid on entry to a landing pad.
705   Register
getExceptionSelectorRegister(const Constant * PersonalityFn)706   getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
707     // FIXME: This is a guess. Has this been defined yet?
708     return AArch64::X1;
709   }
710 
711   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
712 
canMergeStoresTo(unsigned AddressSpace,EVT MemVT,const MachineFunction & MF)713   bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
714                         const MachineFunction &MF) const override {
715     // Do not merge to float value size (128 bytes) if no implicit
716     // float attribute is set.
717 
718     bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
719 
720     if (NoFloat)
721       return (MemVT.getSizeInBits() <= 64);
722     return true;
723   }
724 
isCheapToSpeculateCttz()725   bool isCheapToSpeculateCttz() const override {
726     return true;
727   }
728 
isCheapToSpeculateCtlz()729   bool isCheapToSpeculateCtlz() const override {
730     return true;
731   }
732 
733   bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
734 
hasAndNotCompare(SDValue V)735   bool hasAndNotCompare(SDValue V) const override {
736     // We can use bics for any scalar.
737     return V.getValueType().isScalarInteger();
738   }
739 
hasAndNot(SDValue Y)740   bool hasAndNot(SDValue Y) const override {
741     EVT VT = Y.getValueType();
742 
743     if (!VT.isVector())
744       return hasAndNotCompare(Y);
745 
746     return VT.getSizeInBits() >= 64; // vector 'bic'
747   }
748 
749   bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
750       SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
751       unsigned OldShiftOpcode, unsigned NewShiftOpcode,
752       SelectionDAG &DAG) const override;
753 
754   bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
755 
shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)756   bool shouldTransformSignedTruncationCheck(EVT XVT,
757                                             unsigned KeptBits) const override {
758     // For vectors, we don't have a preference..
759     if (XVT.isVector())
760       return false;
761 
762     auto VTIsOk = [](EVT VT) -> bool {
763       return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
764              VT == MVT::i64;
765     };
766 
767     // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
768     // XVT will be larger than KeptBitsVT.
769     MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
770     return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
771   }
772 
773   bool preferIncOfAddToSubOfNot(EVT VT) const override;
774 
hasBitPreservingFPLogic(EVT VT)775   bool hasBitPreservingFPLogic(EVT VT) const override {
776     // FIXME: Is this always true? It should be true for vectors at least.
777     return VT == MVT::f32 || VT == MVT::f64;
778   }
779 
supportSplitCSR(MachineFunction * MF)780   bool supportSplitCSR(MachineFunction *MF) const override {
781     return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
782            MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
783   }
784   void initializeSplitCSR(MachineBasicBlock *Entry) const override;
785   void insertCopiesSplitCSR(
786       MachineBasicBlock *Entry,
787       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
788 
supportSwiftError()789   bool supportSwiftError() const override {
790     return true;
791   }
792 
793   /// Enable aggressive FMA fusion on targets that want it.
794   bool enableAggressiveFMAFusion(EVT VT) const override;
795 
796   /// Returns the size of the platform's va_list object.
797   unsigned getVaListSizeInBits(const DataLayout &DL) const override;
798 
799   /// Returns true if \p VecTy is a legal interleaved access type. This
800   /// function checks the vector element type and the overall width of the
801   /// vector.
802   bool isLegalInterleavedAccessType(VectorType *VecTy,
803                                     const DataLayout &DL) const;
804 
805   /// Returns the number of interleaved accesses that will be generated when
806   /// lowering accesses of the given type.
807   unsigned getNumInterleavedAccesses(VectorType *VecTy,
808                                      const DataLayout &DL) const;
809 
810   MachineMemOperand::Flags getTargetMMOFlags(
811     const Instruction &I) const override;
812 
813   bool functionArgumentNeedsConsecutiveRegisters(
814       Type *Ty, CallingConv::ID CallConv, bool isVarArg,
815       const DataLayout &DL) const override;
816 
817   /// Used for exception handling on Win64.
818   bool needsFixedCatchObjects() const override;
819 
820   bool fallBackToDAGISel(const Instruction &Inst) const override;
821 
822   /// SVE code generation for fixed length vectors does not custom lower
823   /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
824   /// merge. However, merging them creates a BUILD_VECTOR that is just as
825   /// illegal as the original, thus leading to an infinite legalisation loop.
826   /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
827   /// vector types this override can be removed.
828   bool mergeStoresAfterLegalization(EVT VT) const override;
829 
830   // If the platform/function should have a redzone, return the size in bytes.
getRedZoneSize(const Function & F)831   unsigned getRedZoneSize(const Function &F) const {
832     if (F.hasFnAttribute(Attribute::NoRedZone))
833       return 0;
834     return 128;
835   }
836 
837   bool isAllActivePredicate(SDValue N) const;
838   EVT getPromotedVTForPredicate(EVT VT) const;
839 
840   EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
841                              bool AllowUnknown = false) const override;
842 
843 private:
844   /// Keep a pointer to the AArch64Subtarget around so that we can
845   /// make the right decision when generating code for different targets.
846   const AArch64Subtarget *Subtarget;
847 
848   bool isExtFreeImpl(const Instruction *Ext) const override;
849 
850   void addTypeForNEON(MVT VT);
851   void addTypeForFixedLengthSVE(MVT VT);
852   void addDRTypeForNEON(MVT VT);
853   void addQRTypeForNEON(MVT VT);
854 
855   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
856                                bool isVarArg,
857                                const SmallVectorImpl<ISD::InputArg> &Ins,
858                                const SDLoc &DL, SelectionDAG &DAG,
859                                SmallVectorImpl<SDValue> &InVals) const override;
860 
861   SDValue LowerCall(CallLoweringInfo & /*CLI*/,
862                     SmallVectorImpl<SDValue> &InVals) const override;
863 
864   SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
865                           CallingConv::ID CallConv, bool isVarArg,
866                           const SmallVectorImpl<ISD::InputArg> &Ins,
867                           const SDLoc &DL, SelectionDAG &DAG,
868                           SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
869                           SDValue ThisVal) const;
870 
871   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
872   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
873   SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
874   SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
875 
876   SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
877   SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
878 
879   SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
880 
881   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
882 
883   bool isEligibleForTailCallOptimization(
884       SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
885       const SmallVectorImpl<ISD::OutputArg> &Outs,
886       const SmallVectorImpl<SDValue> &OutVals,
887       const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
888 
889   /// Finds the incoming stack arguments which overlap the given fixed stack
890   /// object and incorporates their load into the current chain. This prevents
891   /// an upcoming store from clobbering the stack argument before it's used.
892   SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
893                               MachineFrameInfo &MFI, int ClobberedFI) const;
894 
895   bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
896 
897   void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
898                            SDValue &Chain) const;
899 
900   bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
901                       bool isVarArg,
902                       const SmallVectorImpl<ISD::OutputArg> &Outs,
903                       LLVMContext &Context) const override;
904 
905   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
906                       const SmallVectorImpl<ISD::OutputArg> &Outs,
907                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
908                       SelectionDAG &DAG) const override;
909 
910   SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
911                         unsigned Flag) const;
912   SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
913                         unsigned Flag) const;
914   SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
915                         unsigned Flag) const;
916   SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
917                         unsigned Flag) const;
918   template <class NodeTy>
919   SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
920   template <class NodeTy>
921   SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
922   template <class NodeTy>
923   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
924   template <class NodeTy>
925   SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
926   SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
927   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
928   SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
929   SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
930   SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
931   SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
932                                const SDLoc &DL, SelectionDAG &DAG) const;
933   SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
934                                  SelectionDAG &DAG) const;
935   SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
936   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
937   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
938   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
939   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
940   SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
941                          SDValue TVal, SDValue FVal, const SDLoc &dl,
942                          SelectionDAG &DAG) const;
943   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
944   SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
945   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
946   SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
947   SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
948   SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
949   SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
950   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
951   SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
952   SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
953   SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
954   SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
955   SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
956   SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
957   SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
958   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
959   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
960   SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
961   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
962   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
963   SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
964   SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
965   SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
966                               bool OverrideNEON = false) const;
967   SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
968   SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
969   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
970   SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
971   SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
972   SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
973   SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
974   SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
975   SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
976   SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
977   SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
978   SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
979   SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
980   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
981   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
982   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
983   SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
984   SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
985   SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
986   SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
987   SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
988   SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
989   SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
990   SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
991   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
992   SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
993   SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
994   SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
995   SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
996   SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
997   SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
998   SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
999   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1000   SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
1001                                          SDValue &Size,
1002                                          SelectionDAG &DAG) const;
1003   SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
1004                              EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;
1005 
1006   SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
1007                                                SelectionDAG &DAG) const;
1008   SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
1009                                                SelectionDAG &DAG) const;
1010   SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1011   SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1012   SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
1013   SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
1014   SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
1015                               SelectionDAG &DAG) const;
1016   SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
1017   SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
1018   SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
1019   SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
1020                                             SelectionDAG &DAG) const;
1021   SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
1022                                               SelectionDAG &DAG) const;
1023   SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
1024   SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
1025   SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
1026   SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
1027                                              SelectionDAG &DAG) const;
1028   SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
1029   SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
1030   SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
1031   SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
1032   SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
1033                                               SelectionDAG &DAG) const;
1034 
1035   SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1036                         SmallVectorImpl<SDNode *> &Created) const override;
1037   SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1038                           int &ExtraSteps, bool &UseOneConst,
1039                           bool Reciprocal) const override;
1040   SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1041                            int &ExtraSteps) const override;
1042   SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1043                            const DenormalMode &Mode) const override;
1044   SDValue getSqrtResultForDenormInput(SDValue Operand,
1045                                       SelectionDAG &DAG) const override;
1046   unsigned combineRepeatedFPDivisors() const override;
1047 
1048   ConstraintType getConstraintType(StringRef Constraint) const override;
1049   Register getRegisterByName(const char* RegName, LLT VT,
1050                              const MachineFunction &MF) const override;
1051 
1052   /// Examine constraint string and operand type and determine a weight value.
1053   /// The operand object must already have been set up with the operand type.
1054   ConstraintWeight
1055   getSingleConstraintMatchWeight(AsmOperandInfo &info,
1056                                  const char *constraint) const override;
1057 
1058   std::pair<unsigned, const TargetRegisterClass *>
1059   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1060                                StringRef Constraint, MVT VT) const override;
1061 
1062   const char *LowerXConstraint(EVT ConstraintVT) const override;
1063 
1064   void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1065                                     std::vector<SDValue> &Ops,
1066                                     SelectionDAG &DAG) const override;
1067 
getInlineAsmMemConstraint(StringRef ConstraintCode)1068   unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1069     if (ConstraintCode == "Q")
1070       return InlineAsm::Constraint_Q;
1071     // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
1072     //        followed by llvm_unreachable so we'll leave them unimplemented in
1073     //        the backend for now.
1074     return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1075   }
1076 
1077   bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
1078   bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
1079   bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1080   bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1081   bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1082   bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
1083                               ISD::MemIndexedMode &AM, bool &IsInc,
1084                               SelectionDAG &DAG) const;
1085   bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1086                                  ISD::MemIndexedMode &AM,
1087                                  SelectionDAG &DAG) const override;
1088   bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1089                                   SDValue &Offset, ISD::MemIndexedMode &AM,
1090                                   SelectionDAG &DAG) const override;
1091 
1092   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1093                           SelectionDAG &DAG) const override;
1094   void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1095                              SelectionDAG &DAG) const;
1096   void ReplaceExtractSubVectorResults(SDNode *N,
1097                                       SmallVectorImpl<SDValue> &Results,
1098                                       SelectionDAG &DAG) const;
1099 
1100   bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1101 
1102   void finalizeLowering(MachineFunction &MF) const override;
1103 
1104   bool shouldLocalize(const MachineInstr &MI,
1105                       const TargetTransformInfo *TTI) const override;
1106 
1107   bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1108                                          const APInt &OriginalDemandedBits,
1109                                          const APInt &OriginalDemandedElts,
1110                                          KnownBits &Known,
1111                                          TargetLoweringOpt &TLO,
1112                                          unsigned Depth) const override;
1113 
1114   // Normally SVE is only used for byte size vectors that do not fit within a
1115   // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
1116   // used for 64bit and 128bit vectors as well.
1117   bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
1118 
1119   // With the exception of data-predicate transitions, no instructions are
1120   // required to cast between legal scalable vector types. However:
1121   //  1. Packed and unpacked types have different bit lengths, meaning BITCAST
1122   //     is not universally useable.
1123   //  2. Most unpacked integer types are not legal and thus integer extends
1124   //     cannot be used to convert between unpacked and packed types.
1125   // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1126   // to transition between unpacked and packed types of the same element type,
1127   // with BITCAST used otherwise.
1128   SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1129 
1130   bool isConstantUnsignedBitfieldExtactLegal(unsigned Opc, LLT Ty1,
1131                                              LLT Ty2) const override;
1132 };
1133 
1134 namespace AArch64 {
1135 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1136                          const TargetLibraryInfo *libInfo);
1137 } // end namespace AArch64
1138 
1139 } // end namespace llvm
1140 
1141 #endif
1142